# Climate Risk and Disaster Management: Heat Wave and Cold Wave Prediction

This project analyzes India's weather data to predict future heat waves and cold waves for effective disaster management.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.cluster import KMeans, DBSCAN
from sklearn.tree import DecisionTreeClassifier

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA

import datetime as dt
import json
from scipy import stats
from scipy.stats import pearsonr, spearmanr
import joblib
import warnings
warnings.filterwarnings('ignore')


In [None]:
df = pd.read_csv('india_weather_data.csv')
print(f"Dataset shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
df.head()


In [None]:
df.info()
print("\nMissing values:")
print(df.isnull().sum())


In [None]:
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df = df.dropna(subset=['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['hour'] = df['date'].dt.hour

print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"Total records after cleaning: {len(df)}")


In [None]:
numeric_columns = ['wind_speed', 'cloud_cover', 'precipitation_probability', 'pressure_surface_level', 
                  'dew_point', 'uv_index', 'visibility', 'rainfall', 'solar_radiation', 'snowfall',
                  'max_temperature', 'min_temperature', 'max_humidity', 'min_humidity']

for col in numeric_columns:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

df = df.dropna(subset=['max_temperature', 'min_temperature'])
print(f"Records after temperature cleaning: {len(df)}")


In [None]:
plt.figure(figsize=(15, 10))

plt.subplot(2, 3, 1)
df['max_temperature'].hist(bins=50, alpha=0.7)
plt.title('Max Temperature Distribution')
plt.xlabel('Temperature (°C)')

plt.subplot(2, 3, 2)
df['min_temperature'].hist(bins=50, alpha=0.7, color='blue')
plt.title('Min Temperature Distribution')
plt.xlabel('Temperature (°C)')

plt.subplot(2, 3, 3)
df['heatwave'].value_counts().plot(kind='bar')
plt.title('Heat Wave Occurrences')
plt.xlabel('Heat Wave (0=No, 1=Yes)')

plt.subplot(2, 3, 4)
df['month'].value_counts().sort_index().plot(kind='bar')
plt.title('Data Distribution by Month')
plt.xlabel('Month')

plt.subplot(2, 3, 5)
df['rainfall'].hist(bins=50, alpha=0.7, color='green')
plt.title('Rainfall Distribution')
plt.xlabel('Rainfall (mm)')

plt.subplot(2, 3, 6)
df['humidity'] = (df['max_humidity'] + df['min_humidity']) / 2
df['humidity'].hist(bins=50, alpha=0.7, color='orange')
plt.title('Average Humidity Distribution')
plt.xlabel('Humidity (%)')

plt.tight_layout()
plt.show()


In [None]:
df['temperature_range'] = df['max_temperature'] - df['min_temperature']
df['avg_temperature'] = (df['max_temperature'] + df['min_temperature']) / 2

heat_wave_threshold = df['max_temperature'].quantile(0.95)
cold_wave_threshold = df['min_temperature'].quantile(0.05)

print(f"Heat wave threshold (95th percentile): {heat_wave_threshold:.2f}°C")
print(f"Cold wave threshold (5th percentile): {cold_wave_threshold:.2f}°C")

df['predicted_heatwave'] = (df['max_temperature'] >= heat_wave_threshold).astype(int)
df['predicted_coldwave'] = (df['min_temperature'] <= cold_wave_threshold).astype(int)


In [None]:
plt.figure(figsize=(15, 8))

plt.subplot(2, 2, 1)
monthly_avg = df.groupby('month')['avg_temperature'].mean()
monthly_avg.plot(kind='line', marker='o')
plt.title('Average Temperature by Month')
plt.xlabel('Month')
plt.ylabel('Temperature (°C)')

plt.subplot(2, 2, 2)
monthly_heatwaves = df.groupby('month')['predicted_heatwave'].sum()
monthly_heatwaves.plot(kind='bar', color='red', alpha=0.7)
plt.title('Heat Wave Events by Month')
plt.xlabel('Month')
plt.ylabel('Number of Heat Wave Events')

plt.subplot(2, 2, 3)
monthly_coldwaves = df.groupby('month')['predicted_coldwave'].sum()
monthly_coldwaves.plot(kind='bar', color='blue', alpha=0.7)
plt.title('Cold Wave Events by Month')
plt.xlabel('Month')
plt.ylabel('Number of Cold Wave Events')

plt.subplot(2, 2, 4)
df.plot(x='max_temperature', y='min_temperature', kind='scatter', alpha=0.5)
plt.title('Max vs Min Temperature')
plt.xlabel('Max Temperature (°C)')
plt.ylabel('Min Temperature (°C)')

plt.tight_layout()
plt.show()


In [None]:
correlation_features = ['max_temperature', 'min_temperature', 'humidity', 'rainfall', 
                       'wind_speed', 'pressure_surface_level', 'uv_index', 'cloud_cover']

correlation_data = df[correlation_features].corr()

plt.figure(figsize=(12, 8))
sns.heatmap(correlation_data, annot=True, cmap='coolwarm', center=0, fmt='.2f')
plt.title('Weather Variables Correlation Matrix')
plt.show()


In [None]:
feature_columns = ['max_temperature', 'min_temperature', 'humidity', 'rainfall', 
                  'wind_speed', 'pressure_surface_level', 'uv_index', 'cloud_cover',
                  'month', 'day', 'hour']

X = df[feature_columns].fillna(df[feature_columns].mean())
y_heatwave = df['predicted_heatwave']
y_coldwave = df['predicted_coldwave']

X_train, X_test, y_heat_train, y_heat_test = train_test_split(X, y_heatwave, test_size=0.2, random_state=42)
X_train, X_test, y_cold_train, y_cold_test = train_test_split(X, y_coldwave, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")


In [None]:
rf_heatwave = RandomForestClassifier(n_estimators=100, random_state=42)
rf_heatwave.fit(X_train_scaled, y_heat_train)

y_heat_pred = rf_heatwave.predict(X_test_scaled)
heatwave_accuracy = accuracy_score(y_heat_test, y_heat_pred)

print("Heat Wave Prediction Results:")
print(f"Accuracy: {heatwave_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_heat_test, y_heat_pred))

feature_importance = pd.DataFrame({
    'feature': feature_columns,
    'importance': rf_heatwave.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance for Heat Wave Prediction:")
print(feature_importance)


In [None]:
rf_coldwave = RandomForestClassifier(n_estimators=100, random_state=42)
rf_coldwave.fit(X_train_scaled, y_cold_train)

y_cold_pred = rf_coldwave.predict(X_test_scaled)
coldwave_accuracy = accuracy_score(y_cold_test, y_cold_pred)

print("Cold Wave Prediction Results:")
print(f"Accuracy: {coldwave_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_cold_test, y_cold_pred))

feature_importance_cold = pd.DataFrame({
    'feature': feature_columns,
    'importance': rf_coldwave.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance for Cold Wave Prediction:")
print(feature_importance_cold)


In [None]:
plt.figure(figsize=(15, 6))

plt.subplot(1, 2, 1)
feature_importance.head(10).plot(x='feature', y='importance', kind='barh')
plt.title('Top 10 Features for Heat Wave Prediction')
plt.xlabel('Importance')

plt.subplot(1, 2, 2)
feature_importance_cold.head(10).plot(x='feature', y='importance', kind='barh', color='blue')
plt.title('Top 10 Features for Cold Wave Prediction')
plt.xlabel('Importance')

plt.tight_layout()
plt.show()


In [None]:
daily_data = df.groupby(df['date'].dt.date).agg({
    'max_temperature': 'max',
    'min_temperature': 'min',
    'avg_temperature': 'mean',
    'rainfall': 'sum',
    'humidity': 'mean',
    'wind_speed': 'mean',
    'pressure_surface_level': 'mean'
}).reset_index()

daily_data['date'] = pd.to_datetime(daily_data['date'])
daily_data = daily_data.sort_values('date')
daily_data.set_index('date', inplace=True)

print(f"Daily data shape: {daily_data.shape}")
daily_data.head()


In [None]:
plt.figure(figsize=(15, 10))

plt.subplot(3, 1, 1)
daily_data['avg_temperature'].plot(figsize=(15, 10))
plt.title('Daily Average Temperature Over Time')
plt.ylabel('Temperature (°C)')

plt.subplot(3, 1, 2)
daily_data['max_temperature'].plot(color='red', alpha=0.7)
daily_data['min_temperature'].plot(color='blue', alpha=0.7)
plt.title('Daily Max and Min Temperature')
plt.ylabel('Temperature (°C)')
plt.legend(['Max Temp', 'Min Temp'])

plt.subplot(3, 1, 3)
daily_data['rainfall'].plot(color='green')
plt.title('Daily Rainfall')
plt.ylabel('Rainfall (mm)')

plt.tight_layout()
plt.show()


In [None]:
decomposition = seasonal_decompose(daily_data['avg_temperature'].dropna(), model='additive', period=30)

plt.figure(figsize=(15, 12))

plt.subplot(4, 1, 1)
decomposition.observed.plot()
plt.title('Original Temperature Data')

plt.subplot(4, 1, 2)
decomposition.trend.plot()
plt.title('Trend Component')

plt.subplot(4, 1, 3)
decomposition.seasonal.plot()
plt.title('Seasonal Component')

plt.subplot(4, 1, 4)
decomposition.resid.plot()
plt.title('Residual Component')

plt.tight_layout()
plt.show()


In [None]:
temp_series = daily_data['avg_temperature'].dropna()
train_size = int(len(temp_series) * 0.8)
train_data = temp_series[:train_size]
test_data = temp_series[train_size:]

model = ARIMA(train_data, order=(1, 1, 1))
fitted_model = model.fit()

forecast_steps = 30
forecast = fitted_model.forecast(steps=forecast_steps)
forecast_index = pd.date_range(start=temp_series.index[-1], periods=forecast_steps+1, freq='D')[1:]

plt.figure(figsize=(15, 8))
plt.plot(temp_series.index[-100:], temp_series.values[-100:], label='Historical Data', color='blue')
plt.plot(forecast_index, forecast, label='30-Day Forecast', color='red', linestyle='--')
plt.title('Temperature Forecast for Next 30 Days')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print(f"\nForecasted temperatures for next 30 days:")
for i, (date, temp) in enumerate(zip(forecast_index, forecast)):
    print(f"Day {i+1} ({date.strftime('%Y-%m-%d')}): {temp:.2f}°C")


In [None]:
future_heatwave_risk = []
future_coldwave_risk = []

for temp in forecast:
    if temp >= heat_wave_threshold:
        future_heatwave_risk.append('High')
    elif temp >= heat_wave_threshold * 0.9:
        future_heatwave_risk.append('Medium')
    else:
        future_heatwave_risk.append('Low')
    
    if temp <= cold_wave_threshold:
        future_coldwave_risk.append('High')
    elif temp <= cold_wave_threshold * 1.1:
        future_coldwave_risk.append('Medium')
    else:
        future_coldwave_risk.append('Low')

risk_df = pd.DataFrame({
    'Date': forecast_index,
    'Forecasted_Temperature': forecast,
    'Heat_Wave_Risk': future_heatwave_risk,
    'Cold_Wave_Risk': future_coldwave_risk
})

print("\nFuture Climate Risk Assessment:")
print(risk_df.head(10))


In [None]:
plt.figure(figsize=(15, 10))

plt.subplot(2, 2, 1)
risk_df['Heat_Wave_Risk'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Heat Wave Risk Distribution (Next 30 Days)')

plt.subplot(2, 2, 2)
risk_df['Cold_Wave_Risk'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Cold Wave Risk Distribution (Next 30 Days)')

plt.subplot(2, 2, 3)
colors = {'High': 'red', 'Medium': 'orange', 'Low': 'green'}
risk_df['Heat_Wave_Risk'].map(colors).value_counts().plot(kind='bar', color=['red', 'orange', 'green'])
plt.title('Heat Wave Risk Count')
plt.xlabel('Risk Level')
plt.ylabel('Number of Days')

plt.subplot(2, 2, 4)
colors_cold = {'High': 'blue', 'Medium': 'lightblue', 'Low': 'lightgreen'}
risk_df['Cold_Wave_Risk'].map(colors_cold).value_counts().plot(kind='bar', color=['blue', 'lightblue', 'lightgreen'])
plt.title('Cold Wave Risk Count')
plt.xlabel('Risk Level')
plt.ylabel('Number of Days')

plt.tight_layout()
plt.show()


In [None]:
high_heatwave_days = risk_df[risk_df['Heat_Wave_Risk'] == 'High']
high_coldwave_days = risk_df[risk_df['Cold_Wave_Risk'] == 'High']

print("=== CLIMATE RISK AND DISASTER MANAGEMENT REPORT ===\n")

print("1. HEAT WAVE PREDICTION:")
print(f"   - Model Accuracy: {heatwave_accuracy:.2%}")
print(f"   - Heat wave threshold: {heat_wave_threshold:.2f}°C")
print(f"   - High risk days in next 30 days: {len(high_heatwave_days)}")
if len(high_heatwave_days) > 0:
    print(f"   - High risk dates: {', '.join(high_heatwave_days['Date'].dt.strftime('%Y-%m-%d'))}")

print("\n2. COLD WAVE PREDICTION:")
print(f"   - Model Accuracy: {coldwave_accuracy:.2%}")
print(f"   - Cold wave threshold: {cold_wave_threshold:.2f}°C")
print(f"   - High risk days in next 30 days: {len(high_coldwave_days)}")
if len(high_coldwave_days) > 0:
    print(f"   - High risk dates: {', '.join(high_coldwave_days['Date'].dt.strftime('%Y-%m-%d'))}")

print("\n3. KEY INSIGHTS:")
print(f"   - Most important factor for heat waves: {feature_importance.iloc[0]['feature']}")
print(f"   - Most important factor for cold waves: {feature_importance_cold.iloc[0]['feature']}")
print(f"   - Average forecasted temperature: {forecast.mean():.2f}°C")
print(f"   - Temperature range in forecast: {forecast.max() - forecast.min():.2f}°C")

print("\n4. DISASTER MANAGEMENT RECOMMENDATIONS:")
if len(high_heatwave_days) > 5:
    print("   - HIGH ALERT: Multiple heat wave days predicted")
    print("   - Prepare cooling centers and water distribution")
    print("   - Issue public health warnings")
elif len(high_heatwave_days) > 0:
    print("   - MODERATE ALERT: Some heat wave days predicted")
    print("   - Monitor vulnerable populations")
else:
    print("   - LOW RISK: No extreme heat waves predicted")

if len(high_coldwave_days) > 5:
    print("   - HIGH ALERT: Multiple cold wave days predicted")
    print("   - Prepare heating centers and warm clothing distribution")
    print("   - Issue cold weather warnings")
elif len(high_coldwave_days) > 0:
    print("   - MODERATE ALERT: Some cold wave days predicted")
    print("   - Monitor vulnerable populations")
else:
    print("   - LOW RISK: No extreme cold waves predicted")


In [None]:
joblib.dump(rf_heatwave, 'heatwave_model.pkl')
joblib.dump(rf_coldwave, 'coldwave_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(fitted_model, 'arima_model.pkl')

print("Models saved successfully!")
print("Files created:")
print("- heatwave_model.pkl")
print("- coldwave_model.pkl")
print("- scaler.pkl")
print("- arima_model.pkl")
