# File utils

In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import logging
import csv
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.basicConfig(level=logging.INFO)


def fill_na(mat):
    ix,iy = np.where(np.isnan(mat))
    for i,j in zip(ix,iy):
        if np.isnan(mat[i+1,j]):
            mat[i,j]=mat[i-1,j]
        else:
            mat[i,j]=(mat[i-1,j]+mat[i+1,j])/2.
    return mat


def read_temps(path):
    """Lit le fichier de températures"""
    data = []
    with open(path, "rt") as fp:
        reader = csv.reader(fp, delimiter=',')
        next(reader)
        for row in reader:
            if not row[1].replace(".","").isdigit():
                continue
            data.append([float(x) if x != "" else float('nan') for x in row[1:]])
    return torch.tensor(fill_na(np.array(data)), dtype=torch.float)



class Dataset_temp(Dataset):
    def __init__(self, data, target, lenght=50):
        self.data = data
        self.lenght = lenght
        self.size = self.data.shape[0]-self.lenght+1

    def __getitem__(self, index):
        return (self.data[:,index], self.target[:,:,index])

    def __len__(self):
        return self.size*self.data.shape[1]

class Dataset_temp2(Dataset):
    def __init__(self, data, target):
        self.data = data
        self.target = target

    def __getitem__(self, index):
        col = index//self.size
        lin = index%self.size
        return (self.data[lin:lin+self.lenght, col], col)

    def __len__(self):
        return self.data.shape[2]



In [214]:
class RNN(nn.Module):
    #  TODO:  Implémenter comme décrit dans la question 1
    def __init__(self, latent_dim, input_dim, output_dim):
        super().__init__()
        self.latent_size = latent_dim
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.act_encode = torch.tanh
        self.act_decode = torch.tanh

        # Network parameters
        self.linearX = nn.Linear(input_dim, latent_dim, bias=True)
        self.linearH = nn.Linear(latent_dim, latent_dim, bias=False)
        
        self.linearD = nn.Linear(latent_dim, output_dim, bias=True)
        

    def one_step(self, x, h):
        """ 
        compute the hidden state for one step of time
        dim(x) = batch x dimX
        dim(h) = batch x latent_size
        """
        return self.act_encode(self.linearX(x) + self.linearH(h))

    def forward(self, x):
        """
        Treat a batch of sequences,
        x -> batch of sequences, dim(X) = lenght_sequence x batch x dimX
        h -> init hidden state, dim(h) = batch x latent_size

        return a batch of hidden state sequences -> dim = lenght_sequence x batch x latent_size
        """
        length, batch, dim = x.shape
        res = torch.zeros((length, batch, self.latent_size), dtype=torch.float)
        res[0] = self.one_step(x[0], torch.zeros((batch, self.latent_size), dtype=torch.float)) 

        for i in range(1,length):
            res[i] = self.one_step(x[i], res[i-1].clone())

        return res

        
    def decode(self, h):
        """
        decode a batch of hidden state
        """
        return self.act_decode(self.linearD(h))
    

In [199]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)

In [200]:
input.shape

torch.Size([3, 5])

In [201]:
target.shape

torch.Size([3])

In [202]:
input

tensor([[ 0.7621, -0.2009, -0.3928,  1.3664,  0.9750],
        [-1.3933,  1.4714,  0.4377,  0.4443,  0.1470],
        [ 0.7814, -0.5653, -1.0182, -2.1153, -0.0603]], requires_grad=True)

In [203]:
target

tensor([0, 1, 1])

# File exo2

In [None]:
from utils import read_temps, device, RNN, Dataset_temp
import torch
from torch.utils.data import Dataset, DataLoader

#  TODO:  Question 2 : prédiction de la ville correspondant à une séquence

temp_test, temp_test_labels = read_temps("data/tempAMAL_test.csv").unsqueeze(1), torch.arange(30)
temp_train, temp_train_labels = read_temps("data/tempAMAL_train.csv").unsqueeze(1), torch.arange(30)
print(f"train shape {temp_train.shape}")
print(f"test shape {temp_test.shape}")

import ipdb; ipdb.set_trace()

BATCH_SIZE = 30

train_loader = DataLoader(Dataset_temp(temp_train, temp_train_labels), shuffle=True, batch_size=BATCH_SIZE)
test_loader = DataLoader(Dataset_temp(temp_test, temp_test_labels), shuffle=True, batch_size=BATCH_SIZE)



num_epochs = 50
latent_size = 20
input_dim = 1
output_dim = temp_train.shape[1]

model = RNN(latent_size, input_dim, output_dim)

optimizer = torch.optim.Adam(params=[model.Wx,model.Wh,model.Wd,model.bh,model.bd],lr=1e-3)
optimizer.zero_grad()

error = torch.nn.CrossEntropyLoss()

# Training loop
print("Training ...")

train_loss_list = []
test_loss_list = []

for epoch in range(num_epochs):
    model.train()
    for i, (sequences, labels) in enumerate(train_loader):
        
        optimizer.zero_grad()
        hidden_states = model(sequences)
        outputs = model.decode(hidden_states[-1])
        train_loss = error(outputs, sequences)
        train_loss.backward()
        optimizer.step()
        
        #writer.add_scalar('Loss/train', train_loss, epoch)

    model.eval()
    for i, (sequences, labels) in enumerate(test_loader):
        with torch.no_grad():
            hidden_states = model(sequences)
            outputs = model.decode(hidden_states[-1])
        test_loss = error(outputs, sequences)
        
        #writer.add_scalar('Loss/test', test_loss, epoch)
  #if(epoch%10==0):
    print(f"Itérations {epoch}: train loss {train_loss}, test loss {test_loss}")




# TME TEST

In [215]:
class Dataset_temp(Dataset):
    def __init__(self, data, target, lenght=50):
        self.data = data
        self.lenght = lenght
        self.size = self.data.shape[0]-self.lenght+1

    def __getitem__(self, index):
        col = index//self.size
        lin = index%self.size
        return (self.data[lin:lin+self.lenght, col], col)

    def __len__(self):
        return self.size*self.data.shape[1]

In [236]:
temp_train = read_temps("data/tempAMAL_train.csv").unsqueeze(2)
temp_test = read_temps("data/tempAMAL_test.csv").unsqueeze(2)

In [237]:
temp_train = temp_train[:, :10]
temp_test = temp_test[:, :10]

In [238]:
BATCH_SIZE = 30
train_loader = DataLoader(Dataset_temp(temp_train, None, 50), shuffle=True, batch_size=BATCH_SIZE)
test_loader = DataLoader(Dataset_temp(temp_test, None, 50), shuffle=True, batch_size=BATCH_SIZE)

In [239]:
num_epochs = 50
latent_size = 20
input_dim = 1
output_dim = 10 #number of class
lr=1e-3

model = RNN(latent_size, input_dim, output_dim)
#model = RNN(input_dim, latent_size, output_dim)

optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
optimizer.zero_grad()

criterion = torch.nn.CrossEntropyLoss()

In [240]:
# Training loop
print("Training ...")

with torch.autograd.set_detect_anomaly(True):
    for epoch in range(num_epochs):
        model.train()
        for i, (sequences, labels) in enumerate(train_loader):

            optimizer.zero_grad()

            hidden_states = model(sequences.permute(1,0,2))
            outputs = model.decode(hidden_states[-1])
            
            train_loss = criterion(outputs, labels)
            train_loss.backward()
            optimizer.step()

            #writer.add_scalar('Loss/train', train_loss, epoch)

        model.eval()
        for i, (sequences, labels) in enumerate(test_loader):
            with torch.no_grad():

                hidden_states = model(sequences.permute(1,0,2))
                outputs = model.decode(hidden_states[-1])
                test_loss = criterion(outputs, labels)

            #writer.add_scalar('Loss/test', test_loss, epoch)
      #if(epoch%10==0):
        print(f"Itérations {epoch}: train loss {train_loss}, test loss {test_loss}")

Training ...


KeyboardInterrupt: 

In [232]:
sequences.shape

torch.Size([7, 3, 1])

In [234]:
sequences.permute(1,0,2).shape

torch.Size([3, 7, 1])

In [228]:
hidden_states[-1].shape

torch.Size([3, 5])

In [223]:
labels.shape

torch.Size([7])

In [224]:
outputs.shape

torch.Size([3, 30])

In [225]:
outputs

tensor([[ 0.6793,  0.1211, -0.6252, -0.1566,  0.7562, -0.6923, -0.7007,  0.0796,
          0.0245,  0.7754,  0.2657,  0.0636, -0.8547, -0.7040,  0.6100, -0.3331,
          0.0033,  0.1281, -0.4460, -0.5006,  0.8644,  0.5762, -0.2942, -0.4256,
          0.3152,  0.4934, -0.0391, -0.6734,  0.0656,  0.6186],
        [ 0.6793,  0.1211, -0.6252, -0.1566,  0.7562, -0.6923, -0.7007,  0.0796,
          0.0245,  0.7754,  0.2657,  0.0636, -0.8547, -0.7040,  0.6100, -0.3331,
          0.0033,  0.1281, -0.4460, -0.5006,  0.8644,  0.5762, -0.2942, -0.4256,
          0.3152,  0.4934, -0.0391, -0.6734,  0.0656,  0.6186],
        [ 0.6793,  0.1211, -0.6252, -0.1566,  0.7562, -0.6923, -0.7007,  0.0796,
          0.0245,  0.7754,  0.2657,  0.0636, -0.8547, -0.7040,  0.6100, -0.3331,
          0.0033,  0.1281, -0.4460, -0.5006,  0.8644,  0.5762, -0.2942, -0.4256,
          0.3152,  0.4934, -0.0391, -0.6734,  0.0656,  0.6186]],
       grad_fn=<TanhBackward>)

In [38]:
labels

tensor([1, 0, 2])

In [39]:
outputs

tensor([[-0.1091],
        [-0.1091],
        [-0.1091]], grad_fn=<TanhBackward>)

In [40]:
hidden_states

tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

        [[ 0.7301, -1.0000,  1.0000,  1.0000,  1.0000],
         [ 0.7301, -1.0000,  1.0000,  1.0000,  1.0000],
         [ 0.7301, -1.0000,  1.0000,  1.0000,  1.0000]],

        [[ 0.5154, -1.0000,  1.0000,  1.0000,  1.0000],
         [ 0.5154, -1.0000,  1.0000,  1.0000,  1.0000],
         [ 0.5153, -1.0000,  1.0000,  1.0000,  1.0000]]], grad_fn=<CopySlices>)

In [41]:
hidden_states[-1]

tensor([[ 0.5154, -1.0000,  1.0000,  1.0000,  1.0000],
        [ 0.5154, -1.0000,  1.0000,  1.0000,  1.0000],
        [ 0.5153, -1.0000,  1.0000,  1.0000,  1.0000]],
       grad_fn=<SelectBackward>)

In [11]:
temp_train.shape

torch.Size([33342, 30, 1])

In [9]:
temp_train, temp_train_labels = read_temps("data/tempAMAL_train.csv").unsqueeze(1), torch.arange(30)
temp_train.shape

torch.Size([33342, 1, 30])

In [None]:
from utils import read_temps, device, RNN, Dataset_temp
import torch
from torch.utils.data import Dataset, DataLoader

#  TODO:  Question 2 : prédiction de la ville correspondant à une séquence

temp_test, temp_test_labels = read_temps("data/tempAMAL_test.csv").unsqueeze(1), torch.arange(30)
temp_train, temp_train_labels = read_temps("data/tempAMAL_train.csv").unsqueeze(1), torch.arange(30)
print(f"train shape {temp_train.shape}")
print(f"test shape {temp_test.shape}")

import ipdb; ipdb.set_trace()

BATCH_SIZE = 30

train_loader = DataLoader(Dataset_temp(temp_train, temp_train_labels), shuffle=True, batch_size=BATCH_SIZE)
test_loader = DataLoader(Dataset_temp(temp_test, temp_test_labels), shuffle=True, batch_size=BATCH_SIZE)



num_epochs = 50
latent_size = 20
input_dim = 1
output_dim = temp_train.shape[1]

model = RNN(latent_size, input_dim, output_dim)

optimizer = torch.optim.Adam(params=[model.Wx,model.Wh,model.Wd,model.bh,model.bd],lr=1e-3)
optimizer.zero_grad()

error = torch.nn.CrossEntropyLoss()

# Training loop
print("Training ...")

train_loss_list = []
test_loss_list = []

for epoch in range(num_epochs):
    model.train()
    for i, (sequences, labels) in enumerate(train_loader):
        
        optimizer.zero_grad()
        hidden_states = model(sequences)
        outputs = model.decode(hidden_states[-1])
        train_loss = error(outputs, sequences)
        train_loss.backward()
        optimizer.step()
        
        #writer.add_scalar('Loss/train', train_loss, epoch)

    model.eval()
    for i, (sequences, labels) in enumerate(test_loader):
        with torch.no_grad():
            hidden_states = model(sequences)
            outputs = model.decode(hidden_states[-1])
        test_loss = error(outputs, sequences)
        
        #writer.add_scalar('Loss/test', test_loss, epoch)
  #if(epoch%10==0):
    print(f"Itérations {epoch}: train loss {train_loss}, test loss {test_loss}")


