In [1]:
import os
import zipfile
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import torch as torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from brevage_sales import brevage_preprocessing
from training_functions import train_model

In [2]:
kaggle_datasets = ["rockyt07/stock-market-sensex-nifty-all-time-dataset",
            "minahilfatima12328/performance-trends-in-education",
            "alessiocorrado99/animals10",
            "sebastianwillmann/beverage-sales"]
data_dir = "data/"

download = False

In [3]:
if download:
    !mkdir -p {data_dir}
    for dataset in kaggle_datasets:
        if not os.path.exists(os.path.join(data_dir, dataset.split("/")[-1])):    
            !mkdir -p {data_dir/dataset}
            !kaggle datasets download -d {dataset} -p {data_dir}/{dataset} --unzip


In [4]:
# mnist download
(mnist_X_train_full, mnist_y_train_full), (mnist_X_test, mnist_y_test) = (keras.datasets.mnist.load_data())

In [5]:
if torch.backends.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# Data Processing

In [6]:
df = pd.read_csv('./data/sebastianwillmann/beverage-sales/synthetic_beverage_sales_data.csv')
print(df.head())


  Order_ID Customer_ID Customer_Type             Product     Category  \
0     ORD1     CUS1496           B2B          Vio Wasser        Water   
1     ORD1     CUS1496           B2B               Evian        Water   
2     ORD1     CUS1496           B2B              Sprite  Soft Drinks   
3     ORD1     CUS1496           B2B  Rauch Multivitamin       Juices   
4     ORD1     CUS1496           B2B        Gerolsteiner        Water   

   Unit_Price  Quantity  Discount  Total_Price             Region  Order_Date  
0        1.66        53      0.10        79.18  Baden-Württemberg  2023-08-23  
1        1.56        90      0.10       126.36  Baden-Württemberg  2023-08-23  
2        1.17        73      0.05        81.14  Baden-Württemberg  2023-08-23  
3        3.22        59      0.10       170.98  Baden-Württemberg  2023-08-23  
4        0.87        35      0.10        27.40  Baden-Württemberg  2023-08-23  


In [7]:

brevage_df = brevage_preprocessing(df)


In [8]:
from sklearn.model_selection import train_test_split


X_brevage = brevage_df.drop('Total_Price', axis=1)
y_brevage = brevage_df['Total_Price']


X_temp, X_test, y_temp, y_test = train_test_split(X_brevage, y_brevage, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=1)

In [9]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Normalisation
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


In [10]:
def activation_function(mode):
    if mode == 'relu':
        return nn.ReLU()
    elif mode == 'gelu':
        return nn.GELU()
    else :
        raise ValueError("bad activation function (relu,gelu)")

class brevage_model(nn.Module):
    def __init__(self, input_dim, mode):
        super(brevage_model, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.activation = activation_function(mode)
        self.mode = mode
        
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x

In [11]:
model = brevage_model(X_train.shape[1], mode='relu').to(device)
criterion = nn.MSELoss()
learning_rate = 0.001
num_epochs = 50
batch_size = 128


optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_dataset = TensorDataset(torch.tensor(X_train_scaled, dtype=torch.float32).to(device), torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device))
val_dataset = TensorDataset(torch.tensor(X_val_scaled, dtype=torch.float32).to(device), torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1).to(device))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [12]:
history = train_model(model, criterion, optimizer, num_epochs,train_loader, val_loader)

50
Epoch 1/50, Training Loss: 9782.2428, Validation Loss: 207.4329
Epoch 2/50, Training Loss: 127.0845, Validation Loss: 82.6993
Epoch 3/50, Training Loss: 79.6622, Validation Loss: 57.9631
Epoch 4/50, Training Loss: 68.3534, Validation Loss: 62.1137
Epoch 5/50, Training Loss: 62.2760, Validation Loss: 62.6424


In [None]:
print(history)

{'train_loss': [9662.527640269745], 'val_loss': [132.54855216998854], 'final_test_loss': None, 'activation_function': 'relu', 'training_parameters': {'num_epochs': 50, 'batch_size': 128, 'learning_rate': 0.001}}


In [None]:
# ce que l'on a besoin d'enregistrer
# - train loss pour chaque epoch
# - val loss pour chaque epoch
# - temps d'entrainement
# - final test loss
# - relu ou gelu
# - parameters du modèle (dépend du dataset)
# - paramètres d'entrainement :
    # - nombre d'epochs
    # - batch size
    # - learning rate
    

history = {
    'train_loss': [],
    'val_loss': [],
    'final_test_loss': None,
    'activation_function': 'relu',
    'model_parameters': model.state_dict(),
    'training_parameters': {
        'num_epochs': num_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
}