In [1]:
import os
import zipfile
from tensorflow import keras
import pandas as pd
import torch as torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from brevage_sales import brevage_preprocessing, Brevage_model
from animal10 import animals10_preprocessing, Animals10_model
from training_functions import train_model
# mean_squared_error
from sklearn.metrics import mean_squared_error

In [2]:
kaggle_datasets = ["rockyt07/stock-market-sensex-nifty-all-time-dataset",
            "minahilfatima12328/performance-trends-in-education",
            "alessiocorrado99/animals10",
            "sebastianwillmann/beverage-sales"]
data_dir = "data/"

download = False

In [3]:
if download:
    !mkdir -p {data_dir}
    for dataset in kaggle_datasets:
        if not os.path.exists(os.path.join(data_dir, dataset.split("/")[-1])):    
            !mkdir -p {data_dir/dataset}
            !kaggle datasets download -d {dataset} -p {data_dir}/{dataset} --unzip


In [4]:
# mnist download
(mnist_X_train_full, mnist_y_train_full), (mnist_X_test, mnist_y_test) = (keras.datasets.mnist.load_data())

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# Data Processing

In [6]:
brevage_df = pd.read_csv('./data/sebastianwillmann/beverage-sales/synthetic_beverage_sales_data.csv')
# on ne garde que 1 000 000 lignes
brevage_df = brevage_df.sample(n=1000000, random_state=42).reset_index(drop=True)
train_dataset, val_dataset, test_dataset = brevage_preprocessing(brevage_df,test_size=0.2,val_size=0.2,random_state=1)


In [7]:
model = Brevage_model(train_dataset.count_features(), mode='relu').to(device)
criterion = nn.MSELoss()
learning_rate = 0.001
num_epochs = 50
batch_size = 128
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [8]:
history = train_model(model, criterion, optimizer, num_epochs,train_loader, val_loader)

50
Epoch 1/50, Training Loss: 79944.4404, Validation Loss: 15027.3685
Epoch 2/50, Training Loss: 5480.7439, Validation Loss: 3053.0740
Epoch 3/50, Training Loss: 2753.1905, Validation Loss: 2522.0291
Epoch 4/50, Training Loss: 2126.0622, Validation Loss: 1823.0052
Epoch 5/50, Training Loss: 1378.7503, Validation Loss: 1030.2340
Epoch 6/50, Training Loss: 676.3966, Validation Loss: 462.8135
Epoch 7/50, Training Loss: 364.1133, Validation Loss: 284.6931
Epoch 8/50, Training Loss: 264.0679, Validation Loss: 265.5199
Epoch 9/50, Training Loss: 213.8418, Validation Loss: 190.7018
Epoch 10/50, Training Loss: 181.9879, Validation Loss: 163.1285
Epoch 11/50, Training Loss: 164.4441, Validation Loss: 150.3543
Epoch 12/50, Training Loss: 145.7959, Validation Loss: 143.1860
Epoch 13/50, Training Loss: 132.6470, Validation Loss: 137.4848
Epoch 14/50, Training Loss: 122.3203, Validation Loss: 106.6841
Epoch 15/50, Training Loss: 115.3542, Validation Loss: 123.3352
Epoch 16/50, Training Loss: 105.23

In [12]:
y_pred = model(test_dataset.features.to(device)).cpu().detach().numpy()
mse = mean_squared_error(test_dataset.targets.numpy(), y_pred)
print(f'Test MSE: {mse}')
history["final_test_loss"]  = mse

Test MSE: 38.87211227416992


In [None]:
print(history)

{'train_loss': [76762.47895996094, 4917.480402141927, 2385.2715601302084, 1355.4028592252605, 546.267305398763, 285.83973342610676, 208.83184728515624, 173.3265512467448, 150.59456974527995, 134.25248311035156, 122.50164867024739, 116.35622235677083, 112.61448102539063, 107.04140088378907, 103.76343618326823, 99.66126768229167, 95.9821802360026, 94.33911462320964, 91.18894970499674, 88.02515171468099, 84.88403221150716, 83.3407439436849, 81.36441092081705, 79.31362923502604, 78.35122577555339, 76.03748635701497, 73.95446583658854, 72.45813270914714, 70.24104186238607, 69.9657667972819, 66.68204318888347, 65.17111615193684, 64.77353048421224, 61.88393656494141, 61.30457855102539, 59.284649084472655, 57.61360815063477, 56.9302004699707, 55.20521035746256, 54.72456991923014, 54.28219326944987, 53.1172882039388, 51.10087702718099, 49.87973960306803, 51.0055470296224, 49.062449031982425, 48.737331572672524, 47.79843918273926, 47.24748483378092, 47.339593358357746], 'val_loss': [13465.639988

In [None]:
# # Create data loaders
# train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# ce que l'on a besoin d'enregistrer
# - train loss pour chaque epoch
# - val loss pour chaque epoch
# - temps d'entrainement
# - final test loss
# - relu ou gelu
# - parameters du modèle (dépend du dataset)
# - paramètres d'entrainement :
    # - nombre d'epochs
    # - batch size
    # - learning rate


# history = {
#     'train_loss': [],
#     'val_loss': [],
#     'final_test_loss': None,
#     'activation_function': 'relu',
#     'model_parameters': model.state_dict(),
#     'training_parameters': {
#         'num_epochs': num_epochs,
#         'batch_size': batch_size,
#         'learning_rate': learning_rate
#     }
# }

In [3]:
animals_path = "data/alessiocorrado99/animals10/raw-img"
train_dataset, val_dataset, test_dataset, class_to_idx, idx_to_class = animals10_preprocessing(animals_path, test_size=0.2, val_size=0.2, image_size=256, random_state=1, subset=1)

print(len(train_dataset), len(val_dataset), len(test_dataset))

15707 5236 5236


In [4]:
animal_model = Animals10_model(num_classes=len(class_to_idx), mode='relu').to(device)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
num_epochs = 50
batch_size = 256
optimizer = torch.optim.Adam(animal_model.parameters(), lr=learning_rate)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
history = train_model(animal_model, criterion, optimizer, num_epochs,train_loader, val_loader)

50
Epoch 1/50, Training Loss: 1.9643, Validation Loss: 1.8303
Epoch 2/50, Training Loss: 1.7423, Validation Loss: 1.6657
Epoch 3/50, Training Loss: 1.5793, Validation Loss: 1.7456
Epoch 4/50, Training Loss: 1.4635, Validation Loss: 1.4875


KeyboardInterrupt: 

AttributeError: 'Animals10Dataset' object has no attribute 'features'