In [1]:
%store -r params

In [2]:
import csv
import time

In [3]:
def split_csv(input_file, output_prefix, records_per_file=10000, sleep_time=5):
  """
  Splits a CSV file into multiple files with a specified number of records each.

  Args:
    input_file: Path to the input CSV file.
    output_prefix: Prefix for the output filenames (e.g., "data_").
    records_per_file: Number of records to write to each output file.
  """
  if records_per_file <= 0:
    raise ValueError("records_per_file must be a positive integer")

  with open(input_file, 'r') as infile:
    reader = csv.reader(infile)
    headers = next(reader)  # Read the header row
    file_num = 0
    current_file = None
    current_records = 0

    for row in reader:
      if current_records == 0:
        # Create a new output file
        output_filename = f"{output_prefix}_{file_num}.csv"
        current_file = open(output_filename, 'w', newline='')
        writer = csv.writer(current_file)
        writer.writerow(headers)  # Write header to new file
      
      writer.writerow(row)
      current_records += 1

      if current_records == records_per_file:
        print(current_records)
        current_file.close()  # Close the current file
        current_records = 0
        file_num += 1
        print(f"File {output_filename} added..")
        time.sleep(sleep_time)

    if current_records > 0:
      current_file.close()  # Close the last file if it has remaining rows



In [None]:
input_file = params["credit_card_file"]
output_prefix = f"{params['mainframe_landing']}/credit_card"
split_csv(input_file, output_prefix, 10000, 10)

10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_0.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_1.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_2.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_3.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_4.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_5.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_6.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/exec/red/mainframe/landing/credit_card_7.csv added..
10000
File /Users/chiradip/Documents/learn/credit-card/datalake/