# 🌍 Climate Change Modeling (Part B)

This notebook builds a machine learning model to predict global land temperatures using historical data.

## 1. Load and Preprocess Data

In [11]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load a sample of the large dataset
df = pd.read_csv("GlobalLandTemperaturesByCity.csv", usecols=["dt", "AverageTemperature"], nrows=500_000)

# Preprocess
df['dt'] = pd.to_datetime(df['dt'], errors='coerce')
df = df.dropna()
df['year'] = df['dt'].dt.year

# Group by year
yearly_avg = df.groupby('year')['AverageTemperature'].mean().reset_index()
yearly_avg = yearly_avg[yearly_avg['year'] >= 1900]


## 2. Feature Engineering

In [12]:

# Rolling average and polynomial feature
yearly_avg['temp_rolling3'] = yearly_avg['AverageTemperature'].rolling(window=3, min_periods=1).mean()
yearly_avg['year_squared'] = yearly_avg['year'] ** 2


## 3. Model Training (Linear Regression & Random Forest)

In [13]:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

X = yearly_avg[['year', 'year_squared', 'temp_rolling3']]
y = yearly_avg['AverageTemperature']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, shuffle=False)

# Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

# Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# Metrics
print("Linear Regression R2:", r2_score(y_test, lr_pred))
print("Random Forest R2:", r2_score(y_test, rf_pred))


Linear Regression R2: 0.44746385778344655
Random Forest R2: -0.45463056711926364


## 4. Save Trained Models

In [14]:

import joblib

joblib.dump(lr_model, "linear_temp_model.pkl")
joblib.dump(rf_model, "rf_temp_model.pkl")


['rf_temp_model.pkl']

## 5. Predict Future Temperatures (2024–2030)

In [15]:

future_years = pd.DataFrame({'year': list(range(2024, 2031))})
future_years['year_squared'] = future_years['year'] ** 2
future_years['temp_rolling3'] = [yearly_avg['temp_rolling3'].iloc[-1]] * len(future_years)

future_preds = rf_model.predict(future_years)
future_years['PredictedTemperature'] = future_preds
print(future_years)


   year  year_squared  temp_rolling3  PredictedTemperature
0  2024       4096576       18.37761             18.051359
1  2025       4100625       18.37761             18.051359
2  2026       4104676       18.37761             18.051359
3  2027       4108729       18.37761             18.051359
4  2028       4112784       18.37761             18.051359
5  2029       4116841       18.37761             18.051359
6  2030       4120900       18.37761             18.051359
