In [15]:
import pandas as pd
import json
import os

# Define the file names
file_names = ["Cupertino.csv", "District Office.csv", "Fremont.csv", "Homestead.csv", "Lynbrook.csv", "Monta Vista.csv"]

# Directory paths
input_directory = '/mnt/data/'
output_directory = '/mnt/data/processed/'

# Create output directory if not exists
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to process CSV files
def process_csv(file_name):
    try:
        # Read the CSV file
        df = pd.read_csv(input_directory + file_name)
        
        # Print columns for debugging
        print(f"Processing {file_name}")
        print(f"Columns: {df.columns.tolist()}")
        
        # Extract Date and CO2 columns
        if file_name == "District Office.csv":
            co2_column = 'Total'
        else:
            co2_column = [col for col in df.columns if 'CO2' in col or 'Carbon' in col]
            if not co2_column:
                print(f"No CO2/Carbon column found in {file_name}")
                return
            co2_column = co2_column[0]
        
        df_co2 = df[[' Date', co2_column]].copy()
        
        # Rename columns
        df_co2.columns = ['Date', 'CO2']
        
        # Save to new CSV
        output_csv_file = output_directory + file_name.replace('.csv', '_co2.csv')
        df_co2.to_csv(output_csv_file, index=False)
        
        # Convert to JSON
        json_data = df_co2.to_json(orient='records')
        output_json_file = output_csv_file.replace('.csv', '.json')
        with open(output_json_file, 'w') as json_file:
            json_file.write(json_data)
        
        print(f"Processed {file_name} successfully.")
    
    except Exception as e:
        print(f"Error processing {file_name}: {e}")

# Process each file
for file_name in file_names:
    process_csv(file_name)


Processing Cupertino.csv
Columns: ['School', ' Date', ' KWH', ' Therms', 'Date Set 2', 'KWH', 'Therms', 'Date Set 3', 'Kwh', 'Therms.1', 'Unnamed: 10', 'KWH Co2', 'Therm CO2', 'Total']
Error processing Cupertino.csv: "[' Date Set 1'] not in index"
Processing District Office.csv
Columns: ['School', ' Date', ' KWH', ' Therms', 'Date Set 2', 'KWH', 'Therm', 'Date Set 3', 'KWH.1', 'Therms', 'KWH Co2', 'Therms Co2', 'Total ']
No CO2/Carbon column found in District Office.csv
Processing Fremont.csv
Columns: ['School', ' Date', ' KWH', ' Therms', 'Date Set 2', 'Unnamed: 5', 'Unnamed: 6', 'Date Set 3', 'Unnamed: 8', 'Unnamed: 9', 'Date Set 4', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'KWH Co2', 'Therms CO2', 'Total']
Error processing Fremont.csv: "[' Date Set 1'] not in index"
Processing Homestead.csv
Columns: ['School', ' Date Set 1', ' KWH', ' Therms', 'Date Set 2', 'KWH #2', 'Therms #2', 'Date Set 3', 'Kwh #3', 'Therms #3', 'KWH TO CO2', 'Therms to CO2', 'Unnamed: 12', 'KWH CO2', 'Therm