In [1]:
import pandas as pd
import numpy as np
import os.path as osp
from tqdm.notebook import tqdm

import tensorflow
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.callbacks import EarlyStopping

import torch
import torch.nn as nn
from torch.autograd import Variable 
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Updated_CRU.csv')
df.head()

Unnamed: 0,time,lat,lon,timeseries-tas-monthly-mean,year,month,dayofyear,quarter
0,1901-01-16,26.25,-122.75,16.322001,1901,1,16,1
1,1901-01-16,26.25,-122.25,16.322001,1901,1,16,1
2,1901-01-16,26.25,-121.75,16.322001,1901,1,16,1
3,1901-01-16,26.25,-121.25,16.322001,1901,1,16,1
4,1901-01-16,26.25,-120.75,16.322001,1901,1,16,1


In [3]:
df.shape

(6644352, 8)

In [4]:
df_ft = df[['time', 'timeseries-tas-monthly-mean']].copy()

In [5]:
df_ft['time'] = pd.to_datetime(df_ft['time'])
df_ft.set_index('time', inplace=True)
df_ft.rename(columns={'timeseries-tas-monthly-mean': 'temperature'}, inplace=True)
df_ft = df_ft['temperature'].resample('D').mean()
df_ft = df_ft.reset_index()

In [6]:
df_ft.shape

(44165, 2)

In [7]:
df_ft.head()

Unnamed: 0,time,temperature
0,1901-01-16,2.729315
1,1901-01-17,
2,1901-01-18,
3,1901-01-19,
4,1901-01-20,


In [8]:
df_ft['year'] = df_ft['time'].dt.year
df_ft["week"] = df_ft['time'].dt.isocalendar().week
df_ft['month'] = df_ft['time'].dt.month
df_ft['quarter'] = df_ft['time'].dt.quarter
df_ft['dayofyear'] = df_ft['time'].dt.dayofyear
df_ft['dayofweek'] = df_ft['time'].dt.dayofweek
df_ft["is_month_start"] = df_ft['time'].dt.is_month_start
df_ft["is_month_end"] = df_ft['time'].dt.is_month_end
df_ft["is_quarter_start"] = df_ft['time'].dt.is_quarter_start
df_ft["is_quarter_end"] = df_ft['time'].dt.is_quarter_end
df_ft["is_year_start"] = df_ft['time'].dt.is_year_start
df_ft["is_year_end"] = df_ft['time'].dt.is_year_end
df_ft["days_in_month"] = df_ft['time'].dt.days_in_month
df_ft["is_leap_year"] = df_ft['time'].dt.is_leap_year
df_ft['is_weekend'] = np.where(df_ft['dayofweek'].isin([5, 6]), 1, 0)

In [9]:
def sin_transformer(period):
    return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))


def cos_transformer(period):
    return FunctionTransformer(lambda x: np.cos(x / period * 2 * np.pi))


df_ft["sin_week"] = sin_transformer(7).fit_transform(df_ft['week'])
df_ft["sin_month"] = sin_transformer(12).fit_transform(df_ft['month'])
df_ft["sin_quarter"] = sin_transformer(4).fit_transform(df_ft['quarter'])
df_ft["sin_dayofyear"] = sin_transformer(365).fit_transform(df_ft['dayofyear'])
df_ft['sin_day_of_week'] = sin_transformer(7).fit_transform(df_ft['dayofweek'])

df_ft["cos_week"] = cos_transformer(7).fit_transform(df_ft['week'])
df_ft["cos_month"] = cos_transformer(12).fit_transform(df_ft['month'])
df_ft["cos_quarter"] = cos_transformer(4).fit_transform(df_ft['quarter'])
df_ft["cos_dayofyear"] = cos_transformer(365).fit_transform(df_ft['dayofyear'])
df_ft['cos_day_of_week'] = cos_transformer(7).fit_transform(df_ft['dayofweek'])

In [13]:
import torch
import torch.nn as nn

# Define the LSTM model
model = Model_LSTM(num_features=1, hidden_units=128, timesteps=24, lstm_layers=2)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# Prepare the data for training
# Assume your data is stored in a variable called 'data'

# split the data into training and test sets
train_data = df[:5000000, :]
test_data = df[5000000:, :]

# Extract the temperature values and convert to a tensor
temp_train = torch.tensor(train_data[:, 0], dtype=torch.float32)
temp_test = torch.tensor(test_data[:, 0], dtype=torch.float32)

# Normalize the temperature values
temp_mean = temp_train.mean()
temp_std = temp_train.std()
temp_train = (temp_train - temp_mean) / temp_std
temp_test = (temp_test - temp_mean) / temp_std

# Reshape the temperature values to 3D tensor with shape (batch_size, timesteps, num_features)
# Assume you want to use a batch size of 256 and timesteps of 24
batch_size = 256
timesteps = 24
temp_train = temp_train.reshape(-1, timesteps, 1)
temp_test = temp_test.reshape(-1, timesteps, 1)

# Define the number of training and test samples
num_train_samples = temp_train.shape[0]
num_test_samples = temp_test.shape[0]

# Training loop
for epoch in range(num_epochs):

    # Shuffle the training data
    perm = torch.randperm(num_train_samples)
    temp_train = temp_train[perm]

    # Initialize the hidden state
    hidden = (torch.randn(lstm_layers, batch_size, hidden_units),
              torch.randn(lstm_layers, batch_size, hidden_units))

    # Iterate over the training batches
    for i in range(0, num_train_samples, batch_size):

        # Get the current batch
        temp_batch = temp_train[i:i+batch_size]

        # Forward pass
        output = model(temp_batch)

        # Compute the loss
        loss = criterion(output, temp_batch)

        # Zero the gradients
        optimizer.zero_grad()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    # Print the training loss
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Evaluation on test set
with torch.no_grad():

    # Initialize the hidden state
    hidden = (torch.randn(lstm_layers, batch_size, hidden_units),
              torch.randn(lstm_layers, batch_size, hidden_units))

# Initialize the test loss
test_loss = 0

# Iterate over the test batches
for i in range(0, num_test_samples, batch_size):

    # Get the current batch
    temp_batch = temp_test[i:i+batch_size]

    # Forward pass
    output = model(temp_batch)

    # Compute the loss
    loss = criterion(output, temp_batch)
    test_loss += loss.item()

# Print the test loss
test_loss /= num_test_samples / batch_size
print(f'Test Loss: {test_loss}')

NameError: name 'Model_LSTM' is not defined

In [42]:
train_series = df_ft.loc[(df_ft['time'] >= '1900-01-16') & (df_ft['time'] < '2010-01-16'), :]
test_series = df_ft.loc[(df_ft['time'] >= '2010-01-16'), :]

In [43]:
X = train_series.drop(['time', 'temperature'], axis=1)
y = train_series['temperature'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [44]:
train_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train.reshape(-1, 1))

X_train = train_scaler.transform(X_train)
y_train = target_scaler.transform(y_train.reshape(-1, 1))

X_val = train_scaler.transform(X_test)
y_val = target_scaler.transform(y_test.reshape(-1, 1))

In [45]:
train_series.drop('time', axis=1, inplace=True)
test_series.drop('time', axis=1, inplace=True)

scaler = StandardScaler()
scaler.fit(train_series)
scaled_train = scaler.transform(train_series)
scaled_test = scaler.transform(test_series)


def df_to_x_y(df, window_size=1):
    X = []
    y = []
    for i in range(len(df) - window_size):
        row = [[a] for a in df[i: i + window_size, 0]]
        X.append(row)
        label = df[i + window_size, 0]
        y.append(label)
    return np.array(X), np.array(y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_series.drop('time', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_series.drop('time', axis=1, inplace=True)


In [46]:
time_steps = 60
X_1, y_1 = df_to_x_y(scaled_train, time_steps)

In [47]:
print(X_1.shape, y_1.shape)

(39737, 60, 1) (39737,)


In [51]:
X_1.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'

In [48]:
39737 * 0.8

31789.600000000002

In [49]:
X_train, y_train = X_1[:31790], y_1[:31790]
X_test, y_test = X_1[31790:], y_1[31790:]

In [50]:
model = Sequential()
model.add(LSTM(128, activation='relu', return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mae')
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, shuffle=False)

early_stopping = EarlyStopping(
    min_delta=0.001,
    patience=20,
    restore_best_weights=True,
)

plt.plot(history.history["loss"], label="loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.legend(loc="best")
plt.xlabel("No. Of Epochs")
plt.ylabel("mse score")
plt.show()

Epoch 1/100
Epoch 2/100
113/994 [==>...........................] - ETA: 23s - loss: nan

KeyboardInterrupt: 

In [10]:
X_train_tensors = Variable(torch.Tensor(X_train))
X_test_tensors = Variable(torch.Tensor(X_val))

y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_val)) 

  X_train_tensors = Variable(torch.Tensor(X_train))


In [11]:
X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape[0], 1, X_test_tensors.shape[1])) 

In [12]:
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print("Testing Shape", X_test_tensors_final.shape, y_test_tensors.shape) 

Training Shape torch.Size([1046, 1, 25]) torch.Size([1046, 1])
Testing Shape torch.Size([262, 1, 25]) torch.Size([262, 1])


In [13]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

In [14]:
num_epochs = 1000
learning_rate = 0.001

input_size = 25
hidden_size = 2
num_layers = 1

output_size = 1

In [15]:
lstm1 = LSTM1(output_size, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1])

In [16]:
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate) 

In [17]:
for epoch in range(num_epochs):
    outputs = lstm1.forward(X_train_tensors_final) #forward pass
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
 
    # obtain the loss function
    loss = criterion(outputs, y_train_tensors)
 
    loss.backward() #calculates the loss of the loss function
 
    optimizer.step() #improve from loss, i.e backprop
    if epoch % 100 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 1.01582
Epoch: 100, loss: 0.39306
Epoch: 200, loss: 0.08312
Epoch: 300, loss: 0.04919
Epoch: 400, loss: 0.03550
Epoch: 500, loss: 0.02697
Epoch: 600, loss: 0.02303
Epoch: 700, loss: 0.02038
Epoch: 800, loss: 0.01915
Epoch: 900, loss: 0.01857


In [27]:
train_predict = lstm1(X_train)#forward pass
data_predict = train_predict.data.numpy() #numpy conversion
dataY_plot = df_y_mm.data.numpy()

data_predict = mm.inverse_transform(data_predict) #reverse transformation
dataY_plot = mm.inverse_transform(dataY_plot)
plt.figure(figsize=(10,6)) #plotting
plt.axvline(x=200, c='r', linestyle='--') #size of the training set

plt.plot(dataY_plot, label='Actuall Data') #actual plot
plt.plot(data_predict, label='Predicted Data') #predicted plot
plt.title('Time-Series Prediction')
plt.legend()
plt.show() 

TypeError: 'int' object is not callable

In [18]:
class Model_LSTM(nn.Module):
    def __init__(self, num_features, hidden_units, timesteps, lstm_layers=1):
        
        super().__init__()
        self.num_features = num_features  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = lstm_layers
        self.seq_len = timesteps
        # self.proj_size = 64
        dense1 = 1024
        dense2 = 512
        self.lstm = nn.LSTM(
            input_size=num_features,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers,
            dropout = 0.2,
            # proj_size = self.proj_size
        )
        # self.lstm_linear = nn.Linear(in_features=self.hidden_units*self.num_layers, out_features=1024)
        self.dropout = nn.Dropout(0.3)
        self.lstm_linear = nn.Linear(self.seq_len*self.hidden_units,dense1)
        self.linear_mid = nn.Linear(dense1,dense2)
        self.linear_out = nn.Linear(dense2,1)
    def forward(self, x):
        
        lstm_out, (hn, _) = self.lstm(x)
        
        ## all lstm layer hidden states
        lstm_out = lstm_out.reshape(lstm_out.shape[0], -1)
        out0 = self.lstm_linear(lstm_out)
        out0 = self.dropout(out0)
        
        out1 = self.linear_mid(out0)
        out1 = self.dropout(out1)
        out = self.linear_out(out1)
        return out