<a href="https://colab.research.google.com/github/imaansh/File-Splitting/blob/main/File_Splitting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

def split_file_by_size(input_file, output_dir, num_files=7):
    """
    Splits a large text file into multiple smaller files by size and saves them in Google Drive.

    Parameters:
    - input_file (str): Path to the input text file (e.g., '/content/drive/MyDrive/large_input.txt').
    - output_dir (str): Path to the output directory in Google Drive (e.g., '/content/drive/MyDrive/SplitFiles').
    - num_files (int): Number of parts to split into.
    """
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Calculate approximate size of each split file
    file_size = os.path.getsize(input_file)
    chunk_size = file_size // num_files  # Target size for each part in bytes

    part_num = 1  # Track part number
    current_size = 0  # Track current part size in bytes
    output_file_path = os.path.join(output_dir, f"output_part_{part_num}.txt")
    output_file = open(output_file_path, 'w')

    with open(input_file, 'r') as f:
        for line in f:
            # Check if adding this line would exceed the chunk size
            current_size += len(line.encode('utf-8'))  # Line size in bytes
            if current_size > chunk_size and part_num < num_files:
                output_file.close()  # Close the current part file
                part_num += 1
                output_file_path = os.path.join(output_dir, f"output_part_{part_num}.txt")
                output_file = open(output_file_path, 'w')  # Open new part file
                current_size = len(line.encode('utf-8'))  # Reset size with current line

            # Write the line to the current output file
            output_file.write(line)

    output_file.close()  # Close the last file
    print(f"File split into {part_num} parts and saved to {output_dir}")

# Example Usage
# Make sure to replace 'large_input.txt' and the path with your own file and Drive path
input_file_path = '/content/drive/MyDrive/Customer/customer_profile_20241022.txt'
output_directory = '/content/drive/MyDrive/Split_Customer_Files'
split_file_by_size(input_file_path, output_directory, num_files=7)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File split into 7 parts and saved to /content/drive/MyDrive/Split_Customer_Files
