In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Function to safely convert to float and handle NoneType
def safe_float(value, default=0.0):
    try:
        return float(value)
    except (TypeError, ValueError):
        return default

# Function to extract emission data from an XML file
def extract_emission_data(file_path, file_name):
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    emission_data = []
    for event in root.findall('event'):
        if event.get('type') in ['warmEmissionEvent', 'coldEmissionEvent']:
            emission_info = {
                'time': safe_float(event.get('time')),
                'linkId': event.get('linkId'),
                'vehicleId': event.get('vehicleId'),
                'NO2': safe_float(event.get('NO2')),  
                'CO2_TOTAL': safe_float(event.get('CO2_TOTAL')),  
                'NOX': safe_float(event.get('NOX')),  
                'SO2': safe_float(event.get('SO2')),  
                'HC': safe_float(event.get('HC')),  
                'CO': safe_float(event.get('CO')),  
                'FC': safe_float(event.get('FC')), 
                'PM': safe_float(event.get('PM')),  
                'NMHC': safe_float(event.get('NMHC')),  
                'NH3': safe_float(event.get('NH3')),  
                'PM2_5': safe_float(event.get('PM2_5')),  
                'File': file_name  #
            }
            emission_data.append(emission_info)
    
    return emission_data

# List of files and their corresponding names
files = [
    {'path': 'osm.xml', 'name': 'osm'},
    {'path': 'landuse.xml', 'name': 'landuse'},
    {'path': 'buildings.xml', 'name': 'building'}
]

all_emission_data = []

for file in files:
    data = extract_emission_data(file['path'], file['name'])
    all_emission_data.extend(data)

# Step 3: Analyze the data
df = pd.DataFrame(all_emission_data)

# Sum emissions for CO2_TOTAL
total_CO2 = df.groupby('File')['CO2_TOTAL'].sum().reset_index()
total_CO2.columns = ['File', 'Total CO2 Emission']

# Sum emissions for NO2, NH3, PM2_5, and PM
total_other_emissions = df.groupby('File')[['NO2', 'NH3', 'PM2_5', 'PM']].sum().reset_index()
total_other_emissions = total_other_emissions.melt(id_vars='File', var_name='Emission Type', value_name='Total Emission')

# Plot total CO2 emissions by file
plt.figure(figsize=(10, 6))
sns.barplot(data=total_CO2, x='File', y='Total CO2 Emission')
plt.title('Total CO2 Emission')
plt.xlabel('File')
plt.ylabel('Total CO2 Emission (kg)')
plt.show()

# Plot other emissions by file
plt.figure(figsize=(10, 6))
sns.barplot(data=total_other_emissions, x='File', y='Total Emission', hue='Emission Type')
plt.title('Total NO2, NH3, PM2.5, and PM Emissions ')
plt.xlabel('File')
plt.ylabel('Total Emission (kg)')
plt.legend(title='Emission Type')
plt.show()
