<a href="https://colab.research.google.com/github/Xcalibur-hub/machine-learning/blob/main/Heat_wave_detection_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import missingno
import tensorflow as tf
from datetime import datetime
import plotly.express as px
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint

# Load and prepare the data
def load_data(file_path):
    df = pd.read_csv(file_path)
    df['date'] = pd.to_datetime(df['date'])
    return df

# Visualize missing values
def visualize_missing_data(df):
    missingno.bar(df)
    plt.show()

# Clean the DataFrame
def clean_data(df):
    # Filter dates and drop irrelevant columns
    df = df[df['date'] >= '2020-01-01']
    df.drop(columns=['capital', 'capital_lat', 'capital_lng', 'iso2', 'iso3',
                    'population', 'native_name', 'continent', 'region',
                    'sunshine_total_min', 'Unnamed: 0', 'snow_depth_mm',
                    'peak_wind_gust_kmh', 'station_id'], inplace=True)
    return df

# Add date features and visualize temperature trends
def enhance_and_visualize(df):
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day

    # Plot max temperature over time
    fig = px.line(df, x='date', y='max_temp_c', color='city_name', title='Max Temp Over Time')
    fig.show()

# Handle missing values
def handle_missing_values(df):
    df.fillna(method='bfill', inplace=True)
    df.fillna(0, inplace=True)
    return df

# Feature engineering
def feature_engineering(df):
    df['sin_month'] = np.sin(2 * np.pi * df['month'] / 12)
    df['cos_month'] = np.cos(2 * np.pi * df['month'] / 12)
    df['sin_day'] = np.sin(2 * np.pi * df['day'] / 31)
    df['cos_day'] = np.cos(2 * np.pi * df['day'] / 31)

    # Additional feature calculations
    df['min_plus_avg'] = df['min_temp_c'] + df['avg_temp_c']
    # More features can be added as needed...

    return df

# Scale the features
def scale_features(df):
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df.select_dtypes(include=['float', 'int']))
    return pd.DataFrame(scaled_features, columns=df.select_dtypes(include=['float', 'int']).columns)

# Prepare data for the model
def prepare_data(df, window_size=14):
    X, y = [], []
    df_as_np = df.to_numpy()
    for i in range(len(df_as_np) - window_size):
        X.append(df_as_np[i:i + window_size])
        y.append(df_as_np[i + window_size, df.columns.get_loc('max_temp_c')])
    return np.array(X), np.array(y)

# Build and train the LSTM model
def build_and_train_model(X_train, y_train):
    model = Sequential([
        InputLayer(input_shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(64, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Set up callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    cp = ModelCheckpoint('best_model.h5', save_best_only=True)

    # Train the model
    history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32,
                        callbacks=[early_stopping, cp], verbose=1)
    return model

# Evaluate model
def evaluate_model(model, X_val, y_val):
    y_val_pred = model.predict(X_val)
    mse = mean_squared_error(y_val, y_val_pred)
    r2 = r2_score(y_val, y_val_pred)
    return mse, r2

# Main execution
if __name__ == "__main__":
    # Load data
    file_path = "/content/drive/MyDrive/heat-wave-data/data/ea.csv"
    df = load_data(file_path)

    # Visualize missing data
    visualize_missing_data(df)

    # Clean and prepare data
    df = clean_data(df)
    df = handle_missing_values(df)
    df = feature_engineering(df)

    # Scale features
    scaled_df = scale_features(df)

    # Prepare X and y
    X, y = prepare_data(scaled_df)

    # Split data into training and validation sets
    X_train, X_val = X[:4500], X[4500:]
    y_train, y_val = y[:4500], y[4500:]

    # Build and train the model
    model = build_and_train_model(X_train, y_train)

    # Evaluate the model
    mse, r2 = evaluate_model(model, X_val, y_val)
    print(f'MSE: {mse}, R-squared: {r2}')
