In [1]:
import pandas as pd
import openpyxl
import os
import re

# Load the workbook
file_path = '1_finalized_datasets.xlsx'
workbook = openpyxl.load_workbook(file_path)

dataframes = {}
# Loop through all sheet names
for sheet in workbook.sheetnames:
    print(f"Processing sheet: {sheet}")
    clean_name = re.sub(r'[^\w\s]', '', sheet).strip()
    clean_name = clean_name.replace(" ", "_")
    if clean_name in dataframes:
        print(f"⚠️ Skipping duplicate sheet name: {clean_name}")
        continue
    df = pd.read_excel(file_path, sheet_name=sheet)
    dataframes[clean_name] = df
    print(f"✅ Created DataFrame: {clean_name} ({df.shape[0]} rows)")

# Create an output directory
output_dir = "exported_csvs"
os.makedirs(output_dir, exist_ok=True)

# Export each DataFrame to CSV
for name, df in dataframes.items():
    csv_path = os.path.join(output_dir, f"{name}.csv")
    df.to_csv(csv_path, index=False)
    print(f"📁 Exported: {csv_path}")


Processing sheet: Sheet1
✅ Created DataFrame: Sheet1 (1322 rows)
Processing sheet: Sheet2
✅ Created DataFrame: Sheet2 (1336 rows)
Processing sheet: category
✅ Created DataFrame: category (11 rows)
Processing sheet: allergen
✅ Created DataFrame: allergen (8 rows)
Processing sheet: countrymap 
✅ Created DataFrame: countrymap (76 rows)
Processing sheet: 📝 recipe_ingredient
✅ Created DataFrame: recipe_ingredient (8209 rows)
Processing sheet: 🏷️ recipe_category_df
✅ Created DataFrame: recipe_category_df (8123 rows)
Processing sheet: 🍴 recipes
✅ Created DataFrame: recipes (1322 rows)
Processing sheet: 📊 recipe_nutrition
✅ Created DataFrame: recipe_nutrition (1322 rows)
Processing sheet: 🧂 final_ingredient_df
✅ Created DataFrame: final_ingredient_df (999 rows)
📁 Exported: exported_csvs\Sheet1.csv
📁 Exported: exported_csvs\Sheet2.csv
📁 Exported: exported_csvs\category.csv
📁 Exported: exported_csvs\allergen.csv
📁 Exported: exported_csvs\countrymap.csv
📁 Exported: exported_csvs\recipe_ingredient