In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout, Input
import warnings

warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)
warnings.filterwarnings('ignore', category=FutureWarning)

In [2]:
# Read data from CSV file
file_path = 'GlobalTemperatures.csv'
df = pd.read_csv(file_path)

# Impute LandAverageTemperature and LandAverageTemperatureUncertainty with mean
df['LandAverageTemperature'].fillna(df['LandAverageTemperature'].mean(), inplace=True)
df['LandAverageTemperatureUncertainty'].fillna(df['LandAverageTemperatureUncertainty'].mean(), inplace=True)

# For columns with 1200 missing values, drop those rows
cols_to_dropna = ['LandMaxTemperature', 'LandMaxTemperatureUncertainty', 'LandMinTemperature', 'LandMinTemperatureUncertainty', 'LandAndOceanAverageTemperature', 'LandAndOceanAverageTemperatureUncertainty']
df.dropna(subset=cols_to_dropna, inplace=True)

In [3]:
# Add Year and Month columns based on 'dt' column
df['Year'] = pd.to_datetime(df['dt']).dt.year
df['Month'] = pd.to_datetime(df['dt']).dt.month

# Prepare X (features) and y (target)
X = df.drop(['LandAverageTemperature', 'dt'], axis=1)
y = df['LandAverageTemperature']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale X and y using MinMaxScaler
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

# Fit and transform the training data
X_train_scaled = scaler_x.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

# Only transform the testing data
X_test_scaled = scaler_x.transform(X_test)
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

# Reshape X_train_scaled and X_test_scaled for RNN input
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Print the shapes to verify
print("X_train_scaled shape:", X_train_scaled.shape)
print("X_test_scaled shape:", X_test_scaled.shape)
print("y_train_scaled shape:", y_train_scaled.shape)
print("y_test_scaled shape:", y_test_scaled.shape)

X_train_scaled shape: (1593, 9, 1)
X_test_scaled shape: (399, 9, 1)
y_train_scaled shape: (1593, 1)
y_test_scaled shape: (399, 1)


In [4]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Input

model = Sequential()
model.add(Input(shape=(X_train_scaled.shape[1], 1)))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

model.summary()


In [5]:

# Train the model
history = model.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32, validation_split=0.2,verbose=2)


Epoch 1/100
40/40 - 3s - 72ms/step - loss: 0.0628 - val_loss: 0.0238
Epoch 2/100
40/40 - 0s - 7ms/step - loss: 0.0191 - val_loss: 0.0150
Epoch 3/100
40/40 - 0s - 8ms/step - loss: 0.0164 - val_loss: 0.0138
Epoch 4/100
40/40 - 0s - 7ms/step - loss: 0.0127 - val_loss: 0.0097
Epoch 5/100
40/40 - 0s - 8ms/step - loss: 0.0094 - val_loss: 0.0091
Epoch 6/100
40/40 - 0s - 8ms/step - loss: 0.0121 - val_loss: 0.0072
Epoch 7/100
40/40 - 0s - 9ms/step - loss: 0.0073 - val_loss: 0.0063
Epoch 8/100
40/40 - 0s - 10ms/step - loss: 0.0064 - val_loss: 0.0056
Epoch 9/100
40/40 - 0s - 9ms/step - loss: 0.0055 - val_loss: 0.0053
Epoch 10/100
40/40 - 0s - 8ms/step - loss: 0.0048 - val_loss: 0.0048
Epoch 11/100
40/40 - 0s - 7ms/step - loss: 0.0044 - val_loss: 0.0044
Epoch 12/100
40/40 - 0s - 8ms/step - loss: 0.0045 - val_loss: 0.0043
Epoch 13/100
40/40 - 0s - 8ms/step - loss: 0.0043 - val_loss: 0.0052
Epoch 14/100
40/40 - 0s - 8ms/step - loss: 0.0040 - val_loss: 0.0040
Epoch 15/100
40/40 - 0s - 8ms/step - loss

In [6]:
# Make predictions
predictions = model.predict(X_test_scaled)
predictions = scaler_y.inverse_transform(predictions)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


In [7]:
# Compare predictions with actual values
actual = scaler_y.inverse_transform(y_test_scaled)
for i in range(len(predictions)):
    print(f"Actual: {actual[i][0]}, Predicted: {predictions[i][0]}")

Actual: 3.88, Predicted: 3.928748369216919
Actual: 8.689, Predicted: 8.596373558044434
Actual: 13.622, Predicted: 13.261086463928223
Actual: 2.335, Predicted: 2.5791003704071045
Actual: 5.952999999999999, Predicted: 5.891555309295654
Actual: 4.869, Predicted: 4.730517387390137
Actual: 14.768, Predicted: 14.575563430786133
Actual: 7.423999999999999, Predicted: 7.562475204467773
Actual: 4.103, Predicted: 4.206771373748779
Actual: 4.519, Predicted: 4.805171489715576
Actual: 14.021, Predicted: 13.837247848510742
Actual: 14.034, Predicted: 14.062273025512695
Actual: 4.85, Predicted: 5.164228439331055
Actual: 9.453, Predicted: 9.453171730041504
Actual: 6.222, Predicted: 6.202760219573975
Actual: 14.445, Predicted: 14.319841384887695
Actual: 11.062, Predicted: 10.153813362121582
Actual: 2.455, Predicted: 2.140364170074463
Actual: 6.273, Predicted: 6.394965171813965
Actual: 4.303, Predicted: 4.324885845184326
Actual: 5.066, Predicted: 5.968013286590576
Actual: 10.941999999999998, Predicted: 10

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Calculate metrics
mae = mean_absolute_error(actual, predictions)
mse = mean_squared_error(actual, predictions)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 0.3429270513864388
Mean Squared Error (MSE): 0.3456430696371938
Root Mean Squared Error (RMSE): 0.5879141685970783
