In [3]:
import pandas as pd
import glob
import os

def combine_csv_files(input_path, output_filename):
    """
    Combines all CSV files in the specified directory into a single CSV file.
    All input CSV files must have the same column structure.
    
    Parameters:
    -----------
    input_path : str
        Directory path containing the CSV files (e.g., "data/*.csv")
    output_filename : str
        Name of the output CSV file
    
    Returns:
    --------
    pd.DataFrame
        The combined DataFrame
    """
    # Get all CSV files in the specified directory
    all_files = glob.glob(input_path)
    
    # Check if any files were found
    if not all_files:
        print(f"No CSV files found in {input_path}")
        return None
    
    # Create an empty list to store individual DataFrames
    dataframes_list = []
    
    # Read each CSV file and append to the list
    for filename in all_files:
        df = pd.read_csv(filename)
        
        # Add a note about which file this data came from (optional)
        # df['source_file'] = os.path.basename(filename)
        
        dataframes_list.append(df)
        print(f"Loaded {filename} with {len(df)} rows and {len(df.columns)} columns")
    
    # Combine all DataFrames into one
    combined_df = pd.concat(dataframes_list, ignore_index=True)
    
    # Save the combined DataFrame to a new CSV file
    combined_df.to_csv(output_filename, index=False)
    
    print(f"\nSuccessfully combined {len(all_files)} files into {output_filename}")
    print(f"Combined DataFrame has {len(combined_df)} rows and {len(combined_df.columns)} columns")
    
    return combined_df



# Combine all CSV files in the "data" folder
# Note: The *.csv pattern will match all CSV files in that directory
combined_data = combine_csv_files(r"C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\*.csv", "combined_lll_data.csv")
    
# If you want to see the first few rows of the combined data
if combined_data is not None:
    print("\nFirst 5 rows of combined data:")
    print(combined_data.head())

Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\breastfeeding_info_links.csv with 10 rows and 3 columns
Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\breastfeeding_info_links10.csv with 10 rows and 3 columns
Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\breastfeeding_info_links2.csv with 10 rows and 3 columns
Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\breastfeeding_info_links3.csv with 10 rows and 3 columns
Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\breastfeeding_info_links4.csv with 10 rows and 3 columns
Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_project\data\breastfeeding_info_links5.csv with 10 rows and 3 columns
Loaded C:\Users\Jade Ana-Maria\peckham1\NLP_for_Creatives\NLP_for_Creatives\final_projec