### Analysis of the Fuel Economy Dataset

In [1]:
#importing modules

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#Importing the dataset
df = pd.read_csv('merged_fuel_economy_data.csv')
df.head()

Unnamed: 0,model,make,year,displ,cyl,trans,trans_type,drive,fuel,veh_class,city_mpg,hwy_mpg,cmb_mpg,air_pollution_score,greenhouse_gas_score,smartway
0,ACURA MDX,ACURA,2008,3.7,6,Auto-S5,Auto,4WD,Gasoline,SUV,15,20,17,7,4,No
1,ACURA MDX,ACURA,2008,3.7,6,Auto-S5,Auto,4WD,Gasoline,SUV,15,20,17,6,4,No
2,ACURA RDX,ACURA,2008,2.3,4,Auto-S5,Auto,4WD,Gasoline,SUV,17,22,19,7,5,No
3,ACURA RDX,ACURA,2008,2.3,4,Auto-S5,Auto,4WD,Gasoline,SUV,17,22,19,6,5,No
4,ACURA RL,ACURA,2008,3.5,6,Auto-S5,Auto,4WD,Gasoline,midsize car,16,24,19,7,5,No


In [None]:
plt.figure(figsize =(18,5))
plt.subplot(1,3,1)
sns.histplot(data=df, x='cmb_mpg', hue='year', kde=True, palette='Set1', bins=20)
plt.title('distribution of combined fuel economy')

plt.subplot(1,3,2)
sns.histplot(data=df, x='city_mpg', hue='year', kde=True, palette='Set1', bins=20)
plt.title('distribution of city fuel economy')

plt.subplot(1,3,3)
sns.histplot(data=df, x='hwy_mpg', hue='year', kde=True, palette='Set1', bins=20)
plt.title('distribution of highway fuel economy');

In [None]:
# Step 1: Normalize the Sample Sizes
total_samples = len(df)

# Calculate proportions
prop_2008 = len(df[df['year'] == 2008]) / total_samples
prop_2018 = len(df[df['year'] == 2018]) / total_samples

# Step 2: Weighted Analysis
# Apply weights to the combined MPG values
df['weighted_cmb_mpg'] = df.apply(lambda row: row['cmb_mpg'] * prop_2008 if row['year'] == 2008 else row['cmb_mpg'] * prop_2018, axis=1)

# Step 3: Comparison
# Plot histograms of weighted combined MPG values
plt.figure(figsize=(5,3))
plt.hist(df[df['year'] == 2008]['weighted_cmb_mpg'], bins=20, alpha=0.5, label='2008 (Adjusted)', color='blue')
plt.hist(df[df['year'] == 2018]['weighted_cmb_mpg'], bins=20, alpha=0.5, label='2018 (Adjusted)', color='red')
plt.title('Distribution of Adjusted Combined MPG (2008 vs 2018)')
plt.xlabel('Combined MPG')
plt.ylabel('Frequency')
plt.legend()
plt.show()