# Climate Change Modeling Project

In [5]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import folium
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Step 2: Load Dataset
# Replace 'climate_data.csv' with your actual dataset path
data = pd.read_csv(r"C:\Users\USER\Downloads\climate_nasa.csv")

# Preview dataset
print(data.head())
print(data.info())
print(data.describe())

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (2119220294.py, line 3)

In [None]:
#  Step 3: Exploratory Data Analysis (EDA)
# Plot target variable distribution
sns.histplot(data['emission'], bins=30, kde=True)
plt.title('Distribution of CO2 Emissions')
plt.xlabel('Emission')
plt.ylabel('Frequency')
plt.show()

# Plot correlation heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

# Geospatial visualization (example)
geometry = gpd.points_from_xy(data.longitude, data.latitude)
geo_df = gpd.GeoDataFrame(data[['latitude', 'longitude']], geometry=geometry)
map_visual = folium.Map(location=[-0.51, 29.29], zoom_start=8)
for idx, row in geo_df.iterrows():
    folium.CircleMarker(location=[row.latitude, row.longitude],
                        radius=2, color='blue').add_to(map_visual)
map_visual.save('geospatial_map.html')

In [None]:
#  Step 4: Data Preprocessing
# Handle missing values (example: fill with median)
data.fillna(data.median(), inplace=True)

# Feature selection (drop unnecessary columns)
X = data.drop(['emission', 'ID'], axis=1)  # Customize according to dataset
y = data['emission']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
#  Step 5: Model Training
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

In [None]:
# Step 6: Model Evaluation
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae:.3f}')
print(f'MSE: {mse:.3f}')
print(f'R² Score: {r2:.3f}')

# Plot Actual vs Predicted
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel('Actual Emission')
plt.ylabel('Predicted Emission')
plt.title('Actual vs Predicted CO2 Emission')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.show()

In [None]:
#  Step 7: Future Projections Example
# Assuming future_data.csv is available
try:
    future_data = pd.read_csv('future_climate_data.csv')
    future_data_scaled = scaler.transform(future_data)
    future_predictions = model.predict(future_data_scaled)
    print('Future CO2 Emission Projections:', future_predictions)
except Exception as e:
    print('Future projection data not available:', e)

In [None]:
#  Step 8: Save Model (Optional)
import joblib
joblib.dump(model, 'climate_model.pkl')