# 9. Recurrent Neural Networks (RNN)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv('clean_weather.csv')

In [3]:
# Check for NaN values
print(df.isna().sum())

date               0
tmax              11
tmin              14
rain             281
tmax_tomorrow     11
dtype: int64


In [4]:
# Fill NaN values with mean, excluding the 'Date' column
df.fillna(df.drop('date', axis=1).mean(), inplace=True)

In [5]:
# Check for NaN values
print(df.isna().sum())

date             0
tmax             0
tmin             0
rain             0
tmax_tomorrow    0
dtype: int64


In [6]:
df.head()

Unnamed: 0,date,tmax,tmin,rain,tmax_tomorrow
0,01-01-1970,60.0,35.0,0.0,52.0
1,02-01-1970,52.0,39.0,0.0,52.0
2,03-01-1970,52.0,35.0,0.0,53.0
3,04-01-1970,53.0,36.0,0.0,52.0
4,05-01-1970,52.0,35.0,0.0,50.0


In [7]:
df.shape

(13509, 5)

In [8]:
#  Prepare the data
X = df[['tmax', 'tmin', 'rain']].values
y = df['tmax_tomorrow'].values

In [9]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.reshape(-1, 1))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

In [11]:
# Reshape input to be [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [12]:
# Define the RNN model
model = Sequential([
    SimpleRNN(units=50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])

  super().__init__(**kwargs)


In [13]:
# Compile the model
model.compile(optimizer='adam', loss='mse')

In [14]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=40, verbose=1)

Epoch 1/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.0235
Epoch 2/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0031
Epoch 3/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0031
Epoch 4/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0031
Epoch 5/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0031
Epoch 6/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0031
Epoch 7/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0031   
Epoch 8/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0030
Epoch 9/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 995us/step - loss: 0.0030
Epoch 10/100
[1m271/271[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

<keras.src.callbacks.history.History at 0x18c028a6b10>

In [15]:
#Evaluate the model
train_loss = model.evaluate(X_train, y_train, verbose=0)
test_loss = model.evaluate(X_test, y_test, verbose=0)

In [16]:
print(f"Train Loss: {train_loss}, Test Loss: {test_loss}")

Train Loss: 0.0029915550258010626, Test Loss: 0.0033491922076791525


In [17]:
#Make predictions
y_pred = model.predict(X_test)
y_pred_inv = scaler.inverse_transform(y_pred)
y_test_inv = scaler.inverse_transform(y_test)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [18]:

# Calculate and print the RMSE
rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
print("Root Mean Squared Error:", rmse)

Root Mean Squared Error: 4.86126545425215


In [19]:
from sklearn.metrics import r2_score

# Calculate R-squared score
r_squared = r2_score(y_test_inv, y_pred_inv)
print("R-squared Score:", r_squared)


R-squared Score: 0.6647478452809494
