# Traffic Volume vs Weather — Python Project

## 1–5: Data Import & Setup

In [5]:
!pip install -q kaggle


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\ASUS\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [6]:
pip install google.colab

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement google.colab (from versions: none)

[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\ASUS\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for google.colab


In [7]:
from google.colab import files
files.upload()

ModuleNotFoundError: No module named 'google.colab'

In [8]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content"

In [9]:
!kaggle datasets download -d orvile/traffic-and-weather-datasets

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\ASUS\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\Scripts\kaggle.exe\__main__.py", line 7, in <module>
  File "C:\Users\ASUS\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\kaggle\cli.py", line 68, in main
    out = args.func(**command_args)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ASUS\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\kaggle\api\kaggle_api_extended.py", line 1741, in dataset_download_cli
    with self.build_kaggle_client() as kaggle:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ASUS\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Py

In [None]:
import zipfile
with zipfile.ZipFile("/content/traffic-and-weather-datasets.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/traffic_data")

## 6–10: Data Loading & Inspection

In [None]:
import pandas as pd
df = pd.read_csv("/content/traffic_data/Metro_Interstate_Traffic_Volume.csv")

In [None]:
print(df.shape)

In [None]:
print(df.dtypes)

In [None]:
df.head()

In [None]:
df.isnull().sum()

## 11–15: Data Cleaning

In [None]:
df.dropna(inplace=True)

In [None]:
df['date_time'] = pd.to_datetime(df['date_time'])

In [None]:
df.set_index('date_time', inplace=True)

In [None]:
df = df.drop_duplicates()

In [None]:
print(df.info())

## 16–20: Exploratory Data Analysis (EDA)

In [None]:
corr = df.corr()
corr

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.show()

In [None]:
df['traffic_volume'].plot(figsize=(15,5), title='Traffic Volume Over Time')
plt.ylabel('Volume')
plt.show()

In [None]:
sns.histplot(df['traffic_volume'], bins=50, kde=True)
plt.title("Traffic Volume Distribution")
plt.show()

In [None]:
sns.boxplot(data=df, x='weather_main', y='traffic_volume')
plt.xticks(rotation=90)
plt.title("Traffic Volume by Weather Type")
plt.show()

## 21–25: Feature Engineering

In [None]:
df['hour'] = df.index.hour

In [None]:
df['dayofweek'] = df.index.dayofweek

In [None]:
df['month'] = df.index.month

In [None]:
df['is_weekend'] = df['dayofweek'].apply(lambda x: 1 if x >= 5 else 0)

In [None]:
df[['hour', 'dayofweek', 'month', 'is_weekend']].head()

## 26–30: Modeling & Evaluation

In [None]:
features = ['temp', 'rain_1h', 'snow_1h', 'clouds_all', 'hour', 'dayofweek', 'is_weekend']
X = df[features]
y = df['traffic_volume']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
y_pred = model.predict(X_test)
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("R²:", r2_score(y_test, y_pred))

In [None]:
import numpy as np
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
sns.barplot(x=[features[i] for i in indices], y=importances[indices])
plt.title("Feature Importances")
plt.show()

## 31+: Bonus Snippets

In [None]:
import joblib
joblib.dump(model, 'traffic_model.pkl')

In [None]:
loaded_model = joblib.load('traffic_model.pkl')

In [None]:
sample = X_test.iloc[0:1]
prediction = loaded_model.predict(sample)
print("Predicted traffic volume:", prediction[0])

In [None]:
df['rolling_volume'] = df['traffic_volume'].rolling(window=24).mean()
df[['traffic_volume', 'rolling_volume']].plot(figsize=(15,5))
plt.title("Traffic Volume vs 24-Hour Moving Average")
plt.show()

In [None]:
df.to_csv("cleaned_traffic_data.csv")