In [8]:
def combine_cleaned_csvs(folder_path, output_file='combined_cleaned_scripts.csv'):
    """
    Combines all cleaned CSV files in a folder into one DataFrame and saves it.
    
    Parameters:
    - folder_path (str): Path to the folder containing cleaned CSV files.
    - output_file (str): Name of the output combined CSV file.
    """
    combined_df = pd.DataFrame()

    # Loop through all cleaned CSV files
    for filename in os.listdir(folder_path):
        if filename.startswith('cleaned_') and filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            print(f"Combining file: {filename}")
            df = pd.read_csv(file_path)
            combined_df = pd.concat([combined_df, df], ignore_index=True)

    # Save the combined DataFrame
    combined_file_path = os.path.join(folder_path, output_file)
    combined_df.to_csv(combined_file_path, index=False)
    print(f"Combined CSV saved as: {combined_file_path}")

    return combined_df

# Example usage:
combined_df = combine_cleaned_csvs('/home/sagemaker-user/trekBERT/Movies_cleaned_csv')



Combining file: cleaned_Star_Trek_Final_Frontier_Script.csv
Combining file: cleaned_Star_Trek_First_Contact_Script.csv
Combining file: cleaned_Star_Trek_Generations_Script.csv
Combining file: cleaned_Star_Trek_Insurrection_Script.csv
Combining file: cleaned_Star_Trek_Nemesis_Script.csv
Combining file: cleaned_Star_Trek_Script.csv
Combining file: cleaned_Star_Trek_Search_for_Spock_Script.csv
Combining file: cleaned_Star_Trek_Undiscovered_Country_Script.csv
Combining file: cleaned_Star_Trek_Voyage_Home_Script.csv
Combining file: cleaned_Star_Trek_Wrath_of_Khan_Script.csv
Combined CSV saved as: /home/sagemaker-user/trekBERT/Movies_cleaned_csv/combined_cleaned_scripts.csv
