In [1]:
import os
from tensorflow import keras
import pandas as pd
import torch as torch
import torch.nn as nn
import itertools
import json
from torch.utils.data import DataLoader
from brevage_sales import brevage_preprocessing, Brevage_model
from animal10 import animals10_preprocessing, Animals10_model
from training_functions import train_model, evaluate_model
from mnist import mnist_preprocessing, mnist_model
from student_performance import student_model, student_preprocessing
from food_price import preprocess, lstm


In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# Data Downloading


In [None]:
kaggle_datasets = ["umitka/food-price-inflation",
            "minahilfatima12328/performance-trends-in-education",
            "alessiocorrado99/animals10",
            "sebastianwillmann/beverage-sales"]
data_dir = "data/"

download = True

In [None]:
if download:
    os.makedirs(data_dir, exist_ok=True)
    for dataset in kaggle_datasets:
        if not os.path.exists(os.path.join(data_dir, dataset.split("/")[-1])):    
            os.makedirs(os.path.join(data_dir, dataset.split("/")[-1]), exist_ok=True)
            !kaggle datasets download -d {dataset} -p {data_dir}/{dataset} --unzip


In [5]:

def grid_search(param_dict):

    keys = param_dict.keys()
    values = param_dict.values()
    
    for combination_of_values in itertools.product(*values):
        yield dict(zip(keys, combination_of_values))

In [None]:
TRAIN_BREVAGE = True
TRAIN_ANIMALS = False # training beaucoup trop long, fonctionnel mais on a pas la puissance necessaire pour le trainter, seulement 7 entrainements fait pour la visualisation
TRAIN_MNIST = True
TRAIN_STUDENTS = True
TRAIN_FOOD = True

# Brevage price forcasting

In [7]:
brevage_df = pd.read_csv('./data/sebastianwillmann/beverage-sales/synthetic_beverage_sales_data.csv')
# on ne garde que 1 000 000 lignes



In [8]:
def brevage_model_training(brevage_df, learning_rate, num_epochs, batch_size, mode, random_state, use_batch_norm):
    torch.manual_seed(random_state)
    
    brevage_df = brevage_df.copy()
    brevage_df = brevage_df.sample(n=1000000, random_state=random_state).reset_index(drop=True)
    
    train_dataset, val_dataset, test_dataset = brevage_preprocessing(brevage_df,test_size=0.2,val_size=0.2,random_state=random_state)
    brevage_model = Brevage_model(train_dataset.count_features(), mode=mode,use_batch_norm = use_batch_norm)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(brevage_model.parameters(), lr=learning_rate)
    optimizer = torch.optim.Adam(brevage_model.parameters(), lr=learning_rate)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    history = train_model(brevage_model, criterion, optimizer, num_epochs,train_loader, val_loader)
    test_results = evaluate_model(brevage_model, test_loader, device=torch.device("cpu"),loss_type = 'mse')
    print(test_results)

    history['final_test_loss'] = test_results
    
    history['dataset'] = 'brevage'
    history['random_state'] = random_state
    return history, brevage_model

In [9]:
brevage_model_param = {
    'mode' : ['relu','gelu'],
    'batch_size' : [64,2048],
    'random_state' : [1,2,3],
    'use_batch_norm' : [True, False]}

In [10]:
if TRAIN_BREVAGE:
    histories = []       
    for param in grid_search(brevage_model_param):
        hist, brevage_model_ = brevage_model_training(brevage_df, learning_rate=0.001, num_epochs=100, batch_size=param['batch_size'], mode=param['mode'], random_state=param['random_state'], use_batch_norm=param['use_batch_norm'])
        histories.append(hist)
    json.dump(histories, open('results/brevage_histories.json', 'w'))

# Animals prediction

In [11]:
animals_path = "data/alessiocorrado99/animals10/raw-img"

In [12]:
def animals_model_training(animals_path, learning_rate, num_epochs, batch_size, mode, random_state, use_batch_norm):
    torch.manual_seed(random_state)
    
    train_dataset, val_dataset, test_dataset, class_to_idx, idx_to_class = animals10_preprocessing(animals_path, test_size=0.2, val_size=0.2, image_size=128, random_state=random_state, subset=0.5)
    animal_model = Animals10_model(num_classes=len(class_to_idx), mode=mode,use_batch_norm=use_batch_norm).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(animal_model.parameters(), lr=learning_rate)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    history = train_model(animal_model, criterion, optimizer, num_epochs,train_loader, val_loader, device)
    test_results = evaluate_model(animal_model, test_loader, device, loss_type = 'cross_entropy')
    history['final_test_loss'] = test_results
    history['random_state'] = random_state

    history['dataset'] = 'animals'
    return history, animal_model

In [13]:
animals_model_param = {
    'mode' : ['relu','gelu'],
    'batch_size' : [128,1024],
    'random_state' : [1,2,3],
    'use_batch_norm' : [True, False]}

In [14]:
if TRAIN_ANIMALS:
    histories = []       
    for param in grid_search(animals_model_param):
        hist, animal_model_ = animals_model_training(animals_path, learning_rate=0.001, num_epochs=30, batch_size=param['batch_size'], mode=param['mode'], random_state=param['random_state'], use_batch_norm=param['use_batch_norm'])
        histories.append(hist)
    json.dump(histories, open('results/animals_histories.json', 'w'))

# MNIST Digit Detection


In [15]:
# mnist download
(mnist_X_train_full, mnist_y_train_full), (mnist_X_test, mnist_y_test) = (keras.datasets.mnist.load_data())

In [16]:
def mnist_model_training(mnist_X_train_full, mnist_y_train_full, mnist_X_test, mnist_y_test, learning_rate, num_epochs, batch_size, mode, random_state, use_batch_norm):
    torch.manual_seed(random_state)
    
    train_dataset, val_dataset, test_dataset, scaler = mnist_preprocessing(mnist_X_train_full, mnist_y_train_full, mnist_X_test, mnist_y_test, val_size=0.2, random_state=random_state)
    mnist_model_ = mnist_model(mode=mode, use_batch_norm=use_batch_norm).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mnist_model_.parameters(), lr=learning_rate)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    history = train_model(mnist_model_, criterion, optimizer, num_epochs,train_loader, val_loader, device)
    test_results = evaluate_model(mnist_model_, test_loader, device, loss_type = 'cross_entropy')
    history['final_test_loss'] = test_results
    history['random_state'] = random_state

    history['dataset'] = 'mnist'
    return history, mnist_model_

In [17]:
mnist_model_param = {
    'mode' : ['relu','gelu'],
    'batch_size' : [64,1024],
    'random_state' : [1,2,3],
    'use_batch_norm' : [True, False]}

In [18]:
if TRAIN_MNIST:
    histories = []       
    for param in grid_search(mnist_model_param):
        hist, mnist_model_ = mnist_model_training(mnist_X_train_full, mnist_y_train_full, mnist_X_test, mnist_y_test, learning_rate=0.001, num_epochs=40, batch_size=param['batch_size'], mode=param['mode'], random_state=param['random_state'], use_batch_norm=param['use_batch_norm'])
        histories.append(hist)
    json.dump(histories, open('results/mnist_histories.json', 'w'))

# Student Grade Forcasting


In [19]:
student_df = pd.read_csv('./data/minahilfatima12328/performance-trends-in-education/StudentPerformanceFactors.csv')

In [20]:
def student_model_training(student_df, learning_rate, num_epochs, batch_size, mode, random_state, use_batch_norm):
    torch.manual_seed(random_state)
    
 
    train_dataset, val_dataset, test_dataset, scaler_X, scaler_y = student_preprocessing(student_df, val_size=0.2, test_size=0.2, random_state=random_state)
    input_size = train_dataset.tensors[0].shape[1]
    student_model_ = student_model(input_dim=input_size, mode=mode, use_batch_norm=use_batch_norm).to(device)
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(student_model_.parameters(), lr=learning_rate, weight_decay=1e-3)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    history = train_model(student_model_, criterion, optimizer, num_epochs,train_loader, val_loader, device)
    test_results = evaluate_model(student_model_, test_loader, device,loss_type='mse')
    history['final_test_loss'] = test_results
    history['dataset'] = 'student'
    history['random_state'] = random_state

    return history, student_model_


In [21]:
student_model_param = {
    'mode' : ['relu','gelu'],
    'batch_size' : [32,1024],
    'random_state' : [1,2,3],
    'use_batch_norm' : [True, False]}


In [22]:
if TRAIN_STUDENTS:
    histories = []       
    for param in grid_search(student_model_param):
        hist, student_model_ = student_model_training(student_df, learning_rate=0.001, num_epochs=100, batch_size=param['batch_size'], mode=param['mode'], random_state=param['random_state'], use_batch_norm=param['use_batch_norm'])
        histories.append(hist)
    json.dump(histories, open('results/student_histories.json', 'w'))

# Food price inflation

In [23]:
inflation_df = pd.read_csv('data/umitka/food-price-inflation/food_price_inflation.csv')

In [24]:
def food_price_model_training(inflation_df, learning_rate, num_epochs, batch_size, mode, random_state, use_batch_norm):
    torch.manual_seed(random_state)
    
    train_set, val_set, test_set, scaler_X, scaler_y = preprocess(
        inflation_df, 
        split_ratio_train_test=0.8, 
        split_ratio_train_valid=0.8, device=device)
    input_size = train_set.tensors[0].shape[2]
    
    inflation_model = lstm(input_size=input_size, mode = mode,use_batch_norm=use_batch_norm).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(inflation_model.parameters(), lr=learning_rate)

    train_loader = DataLoader(train_set, batch_size=batch_size)
    val_loader = DataLoader(val_set, batch_size=batch_size)
    test_loader = DataLoader(test_set, batch_size=batch_size)
    history = train_model(inflation_model, criterion, optimizer, num_epochs,train_loader, val_loader,device)
    test_results = evaluate_model(inflation_model, test_loader, device, loss_type= 'mse')
    history['final_test_loss'] = test_results
    history['dataset'] = 'food_price'
    history['random_state'] = random_state

    return history, inflation_model

In [25]:
food_price_model_param = {
    'mode' : ['relu','gelu'],
    'batch_size' : [32,1024],
    'random_state' : [1,2,3],
    'use_batch_norm' : [True, False]}

In [26]:
if TRAIN_FOOD:
    histories = []
    for param in grid_search(food_price_model_param):
        hist, inflation_model_ = food_price_model_training(inflation_df, learning_rate=0.001, num_epochs=100, batch_size=param['batch_size'], mode=param['mode'], random_state=param['random_state'], use_batch_norm=param['use_batch_norm'])
        histories.append(hist)
    json.dump(histories, open('results/food_price_histories.json', 'w'))