In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout, Input
import warnings

warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

In [2]:
# Read data from CSV file
file_path = 'GlobalTemperatures.csv'
df = pd.read_csv(file_path)

# Impute LandAverageTemperature and LandAverageTemperatureUncertainty with mean
df['LandAverageTemperature'].fillna(df['LandAverageTemperature'].mean(), inplace=True)
df['LandAverageTemperatureUncertainty'].fillna(df['LandAverageTemperatureUncertainty'].mean(), inplace=True)

# For columns with 1200 missing values, drop those rows
cols_to_dropna = ['LandMaxTemperature', 'LandMaxTemperatureUncertainty', 'LandMinTemperature', 'LandMinTemperatureUncertainty', 'LandAndOceanAverageTemperature', 'LandAndOceanAverageTemperatureUncertainty']
df.dropna(subset=cols_to_dropna, inplace=True)

In [3]:
# Add Year and Month columns based on 'dt' column
df['Year'] = pd.to_datetime(df['dt']).dt.year
df['Month'] = pd.to_datetime(df['dt']).dt.month

# Prepare X (features) and y (target)
X = df.drop(['LandAverageTemperature', 'dt'], axis=1)
y = df['LandAverageTemperature']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale X and y using MinMaxScaler
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

# Fit and transform the training data
X_train_scaled = scaler_x.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

# Only transform the testing data
X_test_scaled = scaler_x.transform(X_test)
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

# Reshape X_train_scaled and X_test_scaled for RNN input
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

# Print the shapes to verify
print("X_train_scaled shape:", X_train_scaled.shape)
print("X_test_scaled shape:", X_test_scaled.shape)
print("y_train_scaled shape:", y_train_scaled.shape)
print("y_test_scaled shape:", y_test_scaled.shape)

X_train_scaled shape: (1593, 9, 1)
X_test_scaled shape: (399, 9, 1)
y_train_scaled shape: (1593, 1)
y_test_scaled shape: (399, 1)


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, Input
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import numpy as np

# Define a function to create the GRU model
def create_gru_model(optimizer='adam', dropout_rate=0.2, units=100):
    model = Sequential()
    model.add(Input(shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
    model.add(GRU(units, activation='relu', return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(GRU(units, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    
    model.compile(optimizer=optimizer, loss='mse')
    return model

# Wrap the Keras model so it can be used by sklearn
model = KerasRegressor(build_fn=create_gru_model, verbose=0,units=50,dropout_rate=0.1)

# Define the parameter grid
param_grid = {
    'optimizer': ['adam', 'rmsprop'],
    'dropout_rate': [0.1, 0.2, 0.3],
    'units': [50, 100, 150]
}

# Setup random search with cross-validation
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid,
                                   n_iter=10, cv=3, verbose=2, random_state=42)

# Perform the random search
random_search.fit(X_train_scaled, y_train_scaled)

# Print the best parameters and best score
print("Best parameters found: ", random_search.best_params_)
print("Best CV score: ", random_search.best_score_)

# Use the best model found
best_model = random_search.best_estimator_

# Evaluate the best model on test data if available
test_loss = best_model.score(X_test_scaled, y_test_scaled)
print("Test loss: ", test_loss)


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .........dropout_rate=0.1, optimizer=adam, units=50; total time=   3.1s
[CV] END .........dropout_rate=0.1, optimizer=adam, units=50; total time=   3.3s
[CV] END .........dropout_rate=0.1, optimizer=adam, units=50; total time=   3.3s
[CV] END ........dropout_rate=0.1, optimizer=adam, units=100; total time=   3.2s
[CV] END ........dropout_rate=0.1, optimizer=adam, units=100; total time=   3.3s
[CV] END ........dropout_rate=0.1, optimizer=adam, units=100; total time=   3.2s
[CV] END ........dropout_rate=0.2, optimizer=adam, units=150; total time=   3.4s
[CV] END ........dropout_rate=0.2, optimizer=adam, units=150; total time=   3.2s
[CV] END ........dropout_rate=0.2, optimizer=adam, units=150; total time=   3.3s
[CV] END .....dropout_rate=0.1, optimizer=rmsprop, units=150; total time=   3.3s
[CV] END .....dropout_rate=0.1, optimizer=rmsprop, units=150; total time=   3.5s
[CV] END .....dropout_rate=0.1, optimizer=rmspro

In [5]:
# Train the model
history = model.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32, validation_split=0.2,verbose=2)

Epoch 1/100
40/40 - 4s - 90ms/step - loss: 0.1238 - val_loss: 0.0542
Epoch 2/100
40/40 - 0s - 6ms/step - loss: 0.0412 - val_loss: 0.0221
Epoch 3/100
40/40 - 0s - 6ms/step - loss: 0.0169 - val_loss: 0.0096
Epoch 4/100
40/40 - 0s - 6ms/step - loss: 0.0161 - val_loss: 0.0109
Epoch 5/100
40/40 - 0s - 7ms/step - loss: 0.0143 - val_loss: 0.0078
Epoch 6/100
40/40 - 0s - 6ms/step - loss: 0.0127 - val_loss: 0.0071
Epoch 7/100
40/40 - 0s - 7ms/step - loss: 0.0121 - val_loss: 0.0070
Epoch 8/100
40/40 - 0s - 6ms/step - loss: 0.0117 - val_loss: 0.0063
Epoch 9/100
40/40 - 0s - 7ms/step - loss: 0.0104 - val_loss: 0.0051
Epoch 10/100
40/40 - 0s - 6ms/step - loss: 0.0092 - val_loss: 0.0052
Epoch 11/100
40/40 - 0s - 6ms/step - loss: 0.0087 - val_loss: 0.0037
Epoch 12/100
40/40 - 0s - 7ms/step - loss: 0.0076 - val_loss: 0.0032
Epoch 13/100
40/40 - 0s - 6ms/step - loss: 0.0079 - val_loss: 0.0034
Epoch 14/100
40/40 - 0s - 6ms/step - loss: 0.0066 - val_loss: 0.0033
Epoch 15/100
40/40 - 0s - 7ms/step - loss:

In [6]:
# Make predictions
predictions = model.predict(X_test_scaled)
predictions = scaler_y.inverse_transform(predictions)

In [7]:
# Compare predictions with actual values
actual = scaler_y.inverse_transform(y_test_scaled)
for i in range(len(predictions)):
    print(f"Actual: {actual[i][0]}, Predicted: {predictions[i][0]}")

Actual: 3.88, Predicted: 3.806281805038452
Actual: 8.689, Predicted: 8.661441802978516
Actual: 13.622, Predicted: 13.519972801208496
Actual: 2.335, Predicted: 2.64821195602417
Actual: 5.952999999999999, Predicted: 6.254845142364502
Actual: 4.869, Predicted: 5.085196495056152
Actual: 14.768, Predicted: 14.942634582519531
Actual: 7.423999999999999, Predicted: 7.803701400756836
Actual: 4.103, Predicted: 3.6517391204833984
Actual: 4.519, Predicted: 4.497413158416748
Actual: 14.021, Predicted: 13.92534351348877
Actual: 14.034, Predicted: 14.0799560546875
Actual: 4.85, Predicted: 5.281241416931152
Actual: 9.453, Predicted: 9.529918670654297
Actual: 6.222, Predicted: 6.207124710083008
Actual: 14.445, Predicted: 14.59064769744873
Actual: 11.062, Predicted: 11.079059600830078
Actual: 2.455, Predicted: 2.5452351570129395
Actual: 6.273, Predicted: 6.325991153717041
Actual: 4.303, Predicted: 4.232967376708984
Actual: 5.066, Predicted: 5.534148216247559
Actual: 10.941999999999998, Predicted: 10.881

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Calculate metrics
mae = mean_absolute_error(actual, predictions)
mse = mean_squared_error(actual, predictions)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

Mean Absolute Error (MAE): 0.24344677739633364
Mean Squared Error (MSE): 0.1440171438839742
Root Mean Squared Error (RMSE): 0.37949590759845386
