# Question 1: Travel Data Analysis

We merge monthly passenger movement data (January 1990 - April 2004) in USA. Simple graphs are plotted and we compare travel patterns between automobile, train and air travel.

## 1. Load Required Libraries

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')

## 2. Load Data

In [4]:
# Load the three travel datasets
df_auto = pd.read_csv('Data/Travel Data/Auto_vehicle_miles_traveled_USA.csv')
df_train = pd.read_csv('Data/Travel Data/TrainTravel_USA.csv')
df_air = pd.read_excel('Data/Travel Data/AirTravel_USA.xls')

print("Auto Travel Data:")
print(df_auto.head())
print("\nTrain Travel Data:")
print(df_train.head())
print("\nAir Travel Data:")
print(df_air.head())

ImportError: Missing optional dependency 'xlrd'. Install xlrd >= 2.0.1 for xls Excel support Use pip or conda to install xlrd.

## 3. Clean and Prepare Data

In [None]:
# Convert Month column in auto data to datetime
df_auto['Date'] = pd.to_datetime(df_auto['Month'], format='%b-%y')

# Create Date column for train and air data
df_train['Date'] = pd.to_datetime(df_train['Year'].astype(str) + '-' + df_train['Month'].astype(str) + '-01')
df_air['Date'] = pd.to_datetime(df_air['Year'].astype(str) + '-' + df_air['Month'].astype(str) + '-01')

# Select relevant columns
df_auto = df_auto[['Date', 'VMT (billions)']].rename(columns={'VMT (billions)': 'Automobile'})
df_train = df_train[['Date', 'Rail PM']].rename(columns={'Rail PM': 'Train'})
df_air = df_air[['Date', 'Air RPM (000s)']].rename(columns={'Air RPM (000s)': 'Air'})

print("Cleaned data:")
print(df_auto.head())

## 4. Merge Datasets

**Tidy Data Principle**: Merging on Date creates a single dataset where each observation (month) is in one row with all related variables (transport modes) as columns.

In [None]:
# Merge all three datasets
df = df_auto.merge(df_train, on='Date').merge(df_air, on='Date')

print(f"Merged dataset shape: {df.shape}")
print(df.head())
print("\nMissing values:")
print(df.isnull().sum())

## 5. Visualize Travel Patterns

In [None]:
# Plot individual time series
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

axes[0].plot(df['Date'], df['Automobile'])
axes[0].set_title('Automobile Travel (Billions of Miles)')
axes[0].set_ylabel('Billions of Miles')

axes[1].plot(df['Date'], df['Train'])
axes[1].set_title('Train Travel (Passenger Miles)')
axes[1].set_ylabel('Passenger Miles')

axes[2].plot(df['Date'], df['Air'])
axes[2].set_title('Air Travel (Thousands of Revenue Passenger Miles)')
axes[2].set_ylabel('Revenue Passenger Miles (000s)')
axes[2].set_xlabel('Date')

plt.tight_layout()
plt.show()

## 6. September 11, 2001 Analysis

In [1]:
# Filter data around 9/11 (2000-2002)
df_filtered = df[(df['Date'] >= '2000-01-01') & (df['Date'] <= '2002-12-31')]
sept_11 = pd.to_datetime('2001-09-11')

# Plot with 9/11 marker
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

axes[0].plot(df_filtered['Date'], df_filtered['Automobile'])
axes[0].axvline(sept_11, color='red', linestyle='--', label='9/11')
axes[0].set_title('Automobile Travel (2000-2002)')
axes[0].legend()

axes[1].plot(df_filtered['Date'], df_filtered['Train'])
axes[1].axvline(sept_11, color='red', linestyle='--', label='9/11')
axes[1].set_title('Train Travel (2000-2002)')
axes[1].legend()

axes[2].plot(df_filtered['Date'], df_filtered['Air'])
axes[2].axvline(sept_11, color='red', linestyle='--', label='9/11')
axes[2].set_title('Air Travel (2000-2002)')
axes[2].set_xlabel('Date')
axes[2].legend()

plt.tight_layout()
plt.show()

NameError: name 'df' is not defined