<a href="https://colab.research.google.com/github/edwinkuruvila10/Summer-Heat-Waves-Mobile-Alert-System/blob/main/Summer_Heat_Waves_Mobile_Alert_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import smtplib

In [3]:
data = pd.read_csv('weather.csv')  # Replace with the actual path to your data

# Check for missing values
print(data.isnull().sum())

# Drop or fill missing values
data.fillna(method='ffill', inplace=True)

# Encoding categorical variables (like Wind Direction)
data = pd.get_dummies(data, columns=['WindGustDir', 'WindDir9am', 'WindDir3pm'], drop_first=True)

# Convert 'RainToday' and 'RainTomorrow' to binary variables
data['RainToday'] = data['RainToday'].map({'No': 0, 'Yes': 1})
data['RainTomorrow'] = data['RainTomorrow'].map({'No': 0, 'Yes': 1})

data['HeatWave'] = np.where(data['MaxTemp'] > 35, 1, 0)

MinTemp           0
MaxTemp           0
Rainfall          0
Evaporation       0
Sunshine          3
WindGustDir       3
WindGustSpeed     2
WindDir9am       31
WindDir3pm        1
WindSpeed9am      7
WindSpeed3pm      0
Humidity9am       0
Humidity3pm       0
Pressure9am       0
Pressure3pm       0
Cloud9am          0
Cloud3pm          0
Temp9am           0
Temp3pm           0
RainToday         0
RISK_MM           0
RainTomorrow      0
dtype: int64


  data.fillna(method='ffill', inplace=True)


In [4]:
X = data.drop(columns=['MaxTemp', 'HeatWave', 'RainTomorrow'])
y = data['HeatWave']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9864864864864865
              precision    recall  f1-score   support

           0       0.99      1.00      0.99        73
           1       0.00      0.00      0.00         1

    accuracy                           0.99        74
   macro avg       0.49      0.50      0.50        74
weighted avg       0.97      0.99      0.98        74



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
def send_alert(alert_message, to_email):
    # Set up the email server (using Gmail as an example)
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.starttls()

    # Login to the email account
    sender_email = "your_email@gmail.com"
    password = "your_password"
    server.login(sender_email, password)

    # Send the email
    subject = "Heatwave Alert!"
    message = f'Subject: {subject}\n\n{alert_message}'
    server.sendmail(sender_email, to_email, message)
    server.quit()

# Trigger alerts for heatwaves
for i in range(len(X_test)):
    if y_pred[i] == 1:  # Heatwave predicted
        alert_message = "A heatwave is predicted for tomorrow. Stay safe and hydrated!"
        send_alert(alert_message, "recipient_email@example.com")

In [8]:
# data['MinTemp'].fillna(data['MinTemp'].mean(), inplace=True)
# data['MaxTemp'].fillna(data['MaxTemp'].mean(), inplace=True)

# The WindGustDir column was removed by get_dummies so these lines are not needed.
# Fill missing values for categorical columns like wind direction with the mode
# data['WindGustDir'].fillna(data['WindGustDir'].mode()[0], inplace=True)

# For columns like RainToday, you can use forward fill if there's a temporal correlation
# data['RainToday'].fillna(method='ffill', inplace=True)

#The code in cell 3 already handles missing values so this entire cell is unnecessary

In [9]:
data['HeatIndex'] = 0.5 * (data['MaxTemp'] + data['Humidity3pm'])

# Rolling Mean of temperature over 3 days to smooth data
data['MaxTemp_3DayAvg'] = data['MaxTemp'].rolling(window=3).mean().fillna(data['MaxTemp'].mean())


In [10]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

# Initialize the model
gb_model = GradientBoostingClassifier()

# Set up hyperparameter grid for tuning
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.1, 0.05, 0.01],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
}

# Grid search for best hyperparameters
grid_search = GridSearchCV(gb_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best model and its parameters
best_gb_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Make predictions with the best model
y_pred_gb = best_gb_model.predict(X_test)

# Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))




Best Parameters: {'learning_rate': 0.05, 'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        73
           1       1.00      1.00      1.00         1

    accuracy                           1.00        74
   macro avg       1.00      1.00      1.00        74
weighted avg       1.00      1.00      1.00        74

