In [None]:
# Import necessary libraries
import pandas as pd
import os

# Define the path where the CSV files are stored.
path = r'new_data'

# List all files in the given directory.
file_list = os.listdir(path)

# Define the chunk size indicating the number of files to read and concatenate at a time.
chunk_size = 1000

# Initialize an empty list to hold DataFrames created from each chunk of files.
chunked_data_frames = []

# Iterate over the file list in steps of chunk_size.
for i in range(0, len(file_list), chunk_size):
    # Select a subset of files for the current chunk.
    chunk_files = file_list[i:i+chunk_size]
    # Initialize an empty list to store DataFrames for the current chunk of files.
    data_frames = []

    # Read each file in the current chunk, load it into a DataFrame, and append it to the list.
    for file in chunk_files:
        file_path = os.path.join(path, file)
        survey_data = pd.read_csv(file_path, encoding='latin1', delimiter=',')
        data_frames.append(survey_data)

    # Concatenate all DataFrames in the current chunk into a single DataFrame.
    chunked_data = pd.concat(data_frames, ignore_index=True)
    # Append the concatenated DataFrame of the current chunk to the list of chunked DataFrames.
    chunked_data_frames.append(chunked_data)

# Concatenate all chunked DataFrames vertically into a single DataFrame representing the merged dataset.
merged_data = pd.concat(chunked_data_frames, ignore_index=True)

# Display information about the merged DataFrame, such as its number of rows and columns.
print(merged_data.shape[0])  # Number of rows
print(merged_data.shape[1])  # Number of columns
merged_data.info()  # Detailed information about the DataFrame

# Save the merged DataFrame to a CSV file for future use.
merged_data.to_csv('merged_survey_data.csv', index=True)