## Forecast 10 months global temperature for 2016
Allison Liu
February 19, 2024


In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [3]:
# Use the monthly global temperature time series data
file_path = '/Users/user/Downloads/monthly_csv.csv'
# Load the CSV file into a DataFrame
df = pd.read_csv(file_path)

In [5]:
# filter the Source = 'GCAG'
df = df[df['Source'] == 'GCAG']

In [7]:
df.shape

(1644, 3)

In [8]:
df

Unnamed: 0,Source,Date,Mean
0,GCAG,2016-12-06,0.7895
2,GCAG,2016-11-06,0.7504
4,GCAG,2016-10-06,0.7292
6,GCAG,2016-09-06,0.8767
8,GCAG,2016-08-06,0.8998
...,...,...,...
3278,GCAG,1880-05-06,-0.0738
3280,GCAG,1880-04-06,-0.0499
3282,GCAG,1880-03-06,-0.1357
3284,GCAG,1880-02-06,-0.1229


In [11]:
# Reverse data into chronological order
df = df.sort_values(by='Date', ascending=True)

In [12]:
df

Unnamed: 0,Source,Date,Mean
3286,GCAG,1880-01-06,0.0009
3284,GCAG,1880-02-06,-0.1229
3282,GCAG,1880-03-06,-0.1357
3280,GCAG,1880-04-06,-0.0499
3278,GCAG,1880-05-06,-0.0738
...,...,...,...
8,GCAG,2016-08-06,0.8998
6,GCAG,2016-09-06,0.8767
4,GCAG,2016-10-06,0.7292
2,GCAG,2016-11-06,0.7504


In [15]:
data = df['Mean'].values

In [17]:
def create_time_series_dataset(data, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:i+look_back])
        y.append(data[i+look_back])
    return np.array(X), np.array(y)

In [19]:
look_back = 4  # Number of past time steps to consider

X, y = create_time_series_dataset(data, look_back)

In [20]:
look_forward = 10  # Number of future time steps to predict (i.e., the horizon)
split_index = int(len(X) - look_forward) #The rest of the dataset is considered the training data
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [21]:
X_train = X_train.reshape(-1, look_back, 1) #(batch_size, time_steps, input_features)
X_test = X_test.reshape(-1, look_back, 1)

In [36]:
if 'model' in locals():
    del model

# We then set a random seed for reproducibility.
tf.random.set_seed(66)

# The LSTM layer provides memory to the network and allows it to learn long-term dependencies.
# Replacing the LSTM layer by the (commented) Dense layer will result in a Deep AR.
model = Sequential([
    LSTM(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(64, activation='relu', input_shape=(look_back, 1)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse') # Adam optimizer and MSE loss function for regression

In [37]:
model.fit(X_train, y_train, epochs=32, batch_size=64)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.src.callbacks.History at 0x1f210a9ad60>

In [39]:
y_pred = model.predict(X_test)
rmse = np.sqrt(np.mean((y_pred - y_test)**2))
print(rmse) 

0.2069164192618972
