In [None]:
!nvidia-smi

Tue Apr 26 07:37:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   66C    P0    30W /  70W |   3108MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [None]:
class SequenceDataset(Dataset):
    def __init__(self, data, sequence_length=5):
      self.data = data
      self.data = torch.from_numpy(data).float().view(-1)
      self.sequence_length = sequence_length
    
    def __len__(self):
      return len(self.data) - self.sequence_length-1

    def __getitem__(self, idx):
      return self.data[idx : idx+self.sequence_length], self.data[idx+self.sequence_length]

In [None]:
# Loading data
data = pd.read_csv('temperature_filtered.csv', header=None)

df = pd.DataFrame()
df["unix"] = data[2]
df["temp"] = data[6]

train_size = int(len(df) * 0.8)

train_df, test_df = df[:train_size], df[train_size+1:]
print(f"train: {train_df.shape}; test: {test_df.shape}")

train: (268104, 2); test: (67026, 2)


In [None]:
df1 = data[6]
df1 = df1.dropna()

In [None]:
scalar = MinMaxScaler(feature_range=(0,1))
df1 = scalar.fit_transform(np.array(df1).reshape(-1,1))

In [None]:
training_size = int(len(df1)*0.75)
train_data, test_data = df1[0:training_size,:],df1[training_size:,:1]

In [None]:
sequence_length = 100
train_dataset = SequenceDataset(train_data, sequence_length)
test_dataset = SequenceDataset(test_data, sequence_length)

In [None]:
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size, drop_last=True)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
class LSTM_model(nn.Module):
  def __init__(self, input_dim, hidden_size, num_layers):
    super(LSTM_model, self).__init__()
    self.num_layers = num_layers
    self.input_size = input_dim
    self.hidden_size = hidden_size
    self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_size, num_layers=num_layers)
    self.dense1 = nn.Linear(hidden_size,32)
    self.dense2 = nn.Linear(32,1)

  def forward(self,x,hn,cn):
    out , (hn,cn) = self.lstm(x, (hn,cn))
    out = self.dense1(out[-1])
    final_out = self.dense2(out)
    return final_out,hn,cn

  def predict(self,x):
    hn, cn = self.init()
    final_out = self.dense(out[-1])
    return final_out

  def init(self):
    h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
    c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
    return h0,c0


In [None]:
input_dim = 1
hidden_size = 50
num_layers = 3

model = LSTM_model(input_dim, hidden_size, num_layers).to(device)


In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
def train(dataloader):
  hn, cn = model.init()
  model.train()
  for batch, item in enumerate(dataloader):
    x,y = item
    x = x.to(device)
    y = y.to(device)
    out, hn, cn = model(x.reshape(sequence_length, batch_size,1),hn,cn)
    loss = criterion(out.reshape(batch_size),y)
    hn = hn.detach()
    cn = cn.detach()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch == len(dataloader)-1:
      loss = loss.item()
      print(f"Train Loss: {loss}")

In [None]:
def test(dataloader):
  hn, cn = model.init()
  model.eval()
  for batch, item in enumerate(dataloader):
    x,y = item
    x = x.to(device)
    y = y.to(device)
    out, hn, cn = model(x.reshape(sequence_length,batch_size,1),hn,cn)
    loss = criterion(out.reshape(batch_size),y)

    if batch == len(dataloader)-1:
      loss = loss.item()
      print(f"Test Loss: {loss}")

In [None]:
epochs = 10

for epoch in range(epochs):
  print(f"Epoch: {epoch}/{epochs}")
  train(train_dataloader)
  test(test_dataloader)

Epoch: 0/10
Train Loss: 0.003728205105289817
Test Loss: 0.005388734396547079
Epoch: 1/10
Train Loss: 0.0037262276746332645
Test Loss: 0.005392014980316162
Epoch: 2/10
Train Loss: 0.0037236418575048447
Test Loss: 0.005398253910243511
Epoch: 3/10
Train Loss: 0.0037281804252415895
Test Loss: 0.0054739098995924
Epoch: 4/10
Train Loss: 0.003721444169059396
Test Loss: 0.005405885633081198
Epoch: 5/10
Train Loss: 0.0037166145630180836
Test Loss: 0.005429779179394245
Epoch: 6/10
Train Loss: 0.003707971889525652
Test Loss: 0.005377006717026234
Epoch: 7/10
Train Loss: 0.0038196758832782507
Test Loss: 0.005251774098724127
Epoch: 8/10
Train Loss: 0.0039138710126280785
Test Loss: 0.005272689275443554
Epoch: 9/10
Train Loss: 0.0033833803609013557
Test Loss: 0.004746627993881702


In [None]:
import math
from sklearn.metrics import mean_squared_error
import numpy as np

def calculate_metrics(data_loader):
  pred_arr = []
  y_arr = []
  with torch.no_grad():
    hn, cn = model.init()
    for batch, item in enumerate(data_loader):
      x, y = item
      x = x.to(device)
      y = y.to(device)
      x = x.view(sequence_length,batch_size,1)
      pred = model(x,hn,cn)[0]
      pred = scalar.inverse_transform(pred.detach().cpu().numpy()).reshape(-1)
      y = scalar.inverse_transform(y.detach().cpu().numpy().reshape(1,-1)).reshape(-1)
      pred_arr = pred_arr + list(pred)
      y_arr = y_arr + list(y)
    return math.sqrt(mean_squared_error(y_arr,pred_arr))

In [None]:
print(f"Training mse loss: {calculate_metrics(train_dataloader)}")
print(f"Test mse loss: {calculate_metrics(test_dataloader)}")

Training mse loss: 2.2831116487502707
Test mse loss: 1.699484758273376


In [None]:
# torch.save(model.state_dict(), "LSTM_model")