In [48]:
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import torch
import sys
import cvxpy as cp
import tqdm
import importlib
import sklearn
import pandas as pd
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
class Net(nn.Module):

    def __init__(self,input_size):
        super(Net, self).__init__()
        #input_size should be the length of the features vector, specified at the begining 
        self.fc1 = nn.Linear(input_size, 2**8)
        self.fc2 = nn.Linear(2**8, 2**8)
        self.fc3 = nn.Linear(2**8, 2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [81]:
class Dataset(object):

    def __getitem__(self, index):
        raise NotImplementedError

    def __len__(self):
        raise NotImplementedError

    def __add__(self, other):
        return ConcatDataset([self, other])

class CryptoData(Dataset):
    
    def __init__(self, file_path,transform):
        self.data = pd.read_csv(file_path)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, i don't use ToTensor() method of torchvision.transforms
        # so you can convert numpy ndarray shape to tensor in PyTorch (H, W, C) --> (C, H, W)
        image = self.data.iloc[index, 1:51]
        image = np.array([image])
        label = self.data.loc[index, 'label']
        label = np.array([image])
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

In [108]:
dataframe = pd.read_csv('/Users/linusbleistein/Documents/Cours ENS/Cours mathématiques/Deep learning 2020-2021/data_project/processed_data.csv')
print(dataframe.head())

   Unnamed: 0          0           1           2         3          4  \
0           0  35.914223  340.111783  244.191304  1.745412  93.341758   
1           1   1.550017   13.919287    0.000182 -0.000060   0.000081   
2           2   0.028112    0.283912    0.000314  0.000403   0.034164   
3           3   0.735473    6.987971    0.023240  0.035425   1.564482   
4           4   0.785418    7.461572    0.025315  0.037810   1.743876   

              5         6             7             8  ...           41  \
0 -2.694931e-07 -0.660855  6.313410e-03  9.161955e-08  ...  1955.011107   
1 -4.859265e-09 -0.000003  9.368407e-09  1.002605e-09  ...     2.950600   
2  2.537544e-08 -0.000005  3.713815e-06  2.054972e-08  ...     0.566343   
3 -5.215645e-08 -0.000381  7.435807e-05  3.000300e-08  ...    57.357101   
4  4.026456e-08 -0.000415  8.040126e-05 -5.799183e-08  ...    61.479207   

           42          43           44           45          46            47  \
0  581.310317 -788.282830 -24

In [109]:
dataframe=dataframe.drop(dataframe.columns[0],axis=1)

In [110]:
dataframe = dataframe.to_numpy()

In [117]:
dataframe = torch.Tensor(dataframe)
dataframe = torch.utils.data.TensorDataset(dataframe[:,0:-1],dataframe[:,-1])

In [141]:
my_dataloader = torch.utils.data.DataLoader(dataframe,batch_size=10)

In [144]:
net=Net(input_size=50)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.00001, momentum=0.9)

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(my_dataloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        labels = labels.long()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 0.682
[1,  4000] loss: 0.617
[1,  6000] loss: 0.605
[1,  8000] loss: 0.597
[1, 10000] loss: 0.589
[1, 12000] loss: 0.580
[1, 14000] loss: 0.573
[1, 16000] loss: 0.568
[1, 18000] loss: 0.560
[1, 20000] loss: 0.552
[1, 22000] loss: 0.545
[1, 24000] loss: 0.537
[1, 26000] loss: 0.531
[1, 28000] loss: 0.524
[1, 30000] loss: 0.517
[1, 32000] loss: 0.510
[1, 34000] loss: 0.505
[1, 36000] loss: 0.498
[1, 38000] loss: 0.490
[1, 40000] loss: 0.483
[1, 42000] loss: 0.477
[1, 44000] loss: 0.471
[1, 46000] loss: 0.464
[1, 48000] loss: 0.458
[1, 50000] loss: 0.452
[1, 52000] loss: 0.446
[1, 54000] loss: 0.441
[1, 56000] loss: 0.434
[1, 58000] loss: 0.429
[1, 60000] loss: 0.424
[1, 62000] loss: 0.419
[1, 64000] loss: 0.414
[1, 66000] loss: 0.410
[1, 68000] loss: 0.405
[1, 70000] loss: 0.401
[2,  2000] loss: 0.393
[2,  4000] loss: 0.389
[2,  6000] loss: 0.385
[2,  8000] loss: 0.382
[2, 10000] loss: 0.379
[2, 12000] loss: 0.376
[2, 14000] loss: 0.374
[2, 16000] loss: 0.371
[2, 18000] 

In [148]:
net(dataframe[300][0])

tensor([0.9544, 0.0450], grad_fn=<SigmoidBackward>)