<a href="https://colab.research.google.com/github/Umamaheswari2001/Heatwave/blob/master/heatwave.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional, Conv1D
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error

In [None]:
# Load the dataset
url = 'https://drive.google.com/file/d/1Hr4MmNyDzuSaVPTi8d7zM5wDKRO5cz0s/view?usp=sharing'
file_id = url.split('/')[-2]
read_url = 'https://drive.google.com/uc?id=' + file_id
df = pd.read_csv(read_url)
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14974 entries, 0 to 14973
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   temperature        14974 non-null  float64
 1   relative humidity  14974 non-null  float64
 2   wind speed         14974 non-null  float64
 3   surface pressure   14974 non-null  float64
dtypes: float64(4)
memory usage: 468.1 KB


Unnamed: 0,temperature,relative humidity,wind speed,surface pressure
0,26.350000,100.00,9.58,101.99
1,26.440001,100.00,13.72,101.89
2,27.219999,100.00,9.58,101.67
3,28.290001,99.50,9.34,100.96
4,28.059999,100.00,6.50,101.50
...,...,...,...,...
14969,27.270000,92.56,6.98,101.68
14970,25.550000,95.44,5.11,101.43
14971,20.240000,87.06,6.42,101.47
14972,20.770000,93.19,6.66,101.17


In [None]:
# Check for missing values
print(df.isnull().sum())

# Drop the missing values
df.dropna(inplace=True)


temperature          0
relative humidity    0
wind speed           0
surface pressure     0
dtype: int64


In [None]:
# Scale the data
scaler = MinMaxScaler()
temp = df['temperature'].values.reshape(-1,1)
temp_scaled = scaler.fit_transform(temp)

In [None]:
train_split= 0.8
split_idx = int(len(df) * 0.8)
training_set = df.iloc[:,0:4].values
training_set = df[:split_idx].values
test_set = df[split_idx:].values

In [None]:
# 5-day prediction using 30 days data
x_train = []
y_train = []
n_future = 5 #Next 5 days rainfall forecast
n_past = 30 #Past 30 days
for i in range(0, len(training_set) - n_past - n_future + 1):
    x_train.append(training_set[i : i + n_past, 0])
    y_train.append(training_set[i + n_past : i + n_past + n_future, 0])
x_train, y_train = np.array(x_train), np.array(y_train)

x_test = []
y_test = []
for i in range(n_past, len(test_set) - n_future + 1):
    x_test.append(test_set[i - n_past:i, 0])
    y_test.append(test_set[i:i + n_future, 0])
x_test, y_test = np.array(x_test), np.array(y_test)


In [None]:
# Reshape the input data to LSTM format
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
# Add an additional dimension to the input data for Conv1D layer
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)

In [None]:
# Define the model
model = Sequential()
model.add(Conv1D(filters=256, kernel_size=2, activation='relu',input_shape = (x_train.shape[1], 1)))
model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(units=30, return_sequences=True)))
model.add(LSTM(units= 30, return_sequences=True))
model.add(LSTM(units= 30, return_sequences=True))
model.add(LSTM(units= 30))
model.add(Dropout(0.2))
model.add(Dense( units= 5 ,activation='relu'))
model.compile(loss='mean_squared_error',optimizer='adam', metrics=['accuracy'])

# Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
history = model.fit(x_train, y_train, epochs=500, batch_size=32, validation_data=(x_test, y_test), callbacks=[early_stop])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 13: early stopping


In [None]:
x_test = test_set[: n_past, 0]
y_test = test_set[n_past : n_past + n_future, 0]
x_test, y_test = np.array(x_test), np.array(y_test)
x_test = np.reshape(x_test, (1, x_test.shape[0], 1))
predicted_temperature = model.predict(x_test)
print('Predicted temperature {}'.format(predicted_temperature))
print('Real temperature {}'.format(y_test))

# Print the heatwave array
heatwaves = []
for i in range(len(predicted_temperature[0])):
    if predicted_temperature[0][i] >= y_test[i]:
        heatwaves.append(1)
    else:
        heatwaves.append(0)

print("Heatwave " + str(heatwaves))

total_accuracy = 0
total_weight = sum(range(1, n_future+1))  # weight for each day
for i in range(n_future):
    accuracy = 100 - abs((predicted_temperature[0][i] - y_test[i]) / y_test[i]) * 100
    weight = n_future - i  # weight for this day
    total_accuracy += accuracy * weight
    print("Day {}: {:.2f}%".format(i+1, accuracy))

overall_accuracy = total_accuracy / total_weight
print("Overall Accuracy: {:.2f}%".format(overall_accuracy))



Predicted temperature [[32.401676 32.40887  32.40386  32.40294  32.395435]]
Real temperature [28.82999992 28.37999916 28.38999939 28.13999939 30.35000038]
Heatwave [1, 1, 1, 1, 1]
Day 1: 87.61%
Day 2: 85.80%
Day 3: 85.86%
Day 4: 84.85%
Day 5: 93.26%
Overall Accuracy: 86.79%
