In [4]:
import pandas as pd
import os

def split_csv(file_path, output_dir, max_file_size=25*1024*1024):
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Read the input CSV file
    df = pd.read_csv(file_path)
    
    # Calculate the number of rows per chunk to ensure each file is less than max_file_size
    avg_row_size = df.memory_usage(index=True, deep=True).sum() / len(df)
    rows_per_chunk = int(max_file_size / avg_row_size)
    
    # Split the DataFrame into chunks
    num_chunks = (len(df) // rows_per_chunk) + 1
    
    for i in range(num_chunks):
        start_row = i * rows_per_chunk
        end_row = min((i + 1) * rows_per_chunk, len(df))
        
        # Extract the chunk
        chunk = df.iloc[start_row:end_row]
        
        # Define the output file name
        output_file = os.path.join(output_dir, f"chunk_{i+1}.csv")
        
        # Save the chunk to a CSV file
        chunk.to_csv(output_file, index=False)
        print(f"Created {output_file} with {len(chunk)} rows.")

# Usage
input_file_path = 'daily_dataset.csv'
output_directory = 'chunks'

split_csv(input_file_path, output_directory)

Created chunks\chunk_1.csv with 138700 rows.
Created chunks\chunk_2.csv with 138700 rows.
Created chunks\chunk_3.csv with 138700 rows.
Created chunks\chunk_4.csv with 138700 rows.
Created chunks\chunk_5.csv with 138700 rows.
Created chunks\chunk_6.csv with 138700 rows.
Created chunks\chunk_7.csv with 138700 rows.
Created chunks\chunk_8.csv with 138700 rows.
Created chunks\chunk_9.csv with 138700 rows.
Created chunks\chunk_10.csv with 138700 rows.
Created chunks\chunk_11.csv with 138700 rows.
Created chunks\chunk_12.csv with 138700 rows.
Created chunks\chunk_13.csv with 138700 rows.
Created chunks\chunk_14.csv with 138700 rows.
Created chunks\chunk_15.csv with 138700 rows.
Created chunks\chunk_16.csv with 138700 rows.
Created chunks\chunk_17.csv with 138700 rows.
Created chunks\chunk_18.csv with 138700 rows.
Created chunks\chunk_19.csv with 138700 rows.
Created chunks\chunk_20.csv with 138700 rows.
Created chunks\chunk_21.csv with 138700 rows.
Created chunks\chunk_22.csv with 138700 row