In [137]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [138]:
from sophius.db import *
import sqlite3
import pandas as pd

DEVICE_NAME = 'NVIDIA GeForce RTX 4090'

with database:
    database.create_tables([Experiments, Devices, Runs, Models, ModelEpochs])
    
# dev, _ = Devices.get_or_create(name=torch.cuda.get_device_name())
dev, _ = Devices.get_or_create(name=DEVICE_NAME)  

with sqlite3.connect('../data/models.db') as conn:
    # experiments
    exp_df = pd.read_sql('SELECT * FROM experiments', conn)
    exp_df.id += 1
    exp_df.drop(columns='hash', inplace=True)
    for col in ['opt_params', 'sch_params']:
        exp_df[col] = exp_df[col].apply(eval)
    
    # runs and models
    runs_df = pd.read_sql('SELECT * FROM models', conn)
    runs_df.id += 1
    runs_df.exp_id = runs_df.exp_id.astype(int) + 1    
    
    # convert to numeric and round
    numeric_cols = runs_df.columns.tolist()
    numeric_cols.remove('hash')
    for col in numeric_cols:
        runs_df[col] = pd.to_numeric(runs_df[col], errors='coerce')
    for col in ['val_acc', 'train_acc']:
        runs_df[col] = runs_df[col].round(4)
    runs_df.time = runs_df.time.round(3)
    
    models_df = runs_df[['hash', 'flops', 'macs', 'params']].drop_duplicates(subset='hash')
    models_df['id'] = pd.RangeIndex(1, len(models_df) + 1)
    
    runs_df['model_id'] = pd.merge(runs_df, models_df, on='hash', how='left')['id_y']
    runs_df = runs_df[['exp_id', 'model_id', 'val_acc', 'train_acc', 'time']]
    runs_df['device_id'] = dev.id    
    
    # epochs
    epochs_df = pd.read_sql('SELECT * FROM model_epochs', conn)
    for col in epochs_df.columns:
        epochs_df[col] = pd.to_numeric(epochs_df[col], errors='coerce')
    epochs_df.exp_id += 1
    epochs_df.rename(columns={'model_id': 'run_id'}, inplace=True)
    epochs_df.run_id += 1
    
    # round values
    epochs_df.epoch = epochs_df.epoch.astype(int)
    for col in ['val_acc', 'train_acc']:
        epochs_df[col] = epochs_df[col].round(4)
    epochs_df.loss = epochs_df.loss.round(2)
    epochs_df.time = epochs_df.time.round(3)

In [139]:
# runs_df['model_id'] = runs_df.hash.apply(lambda x: models_df.loc[x, 'id'])
# runs_df.model_id

In [140]:
# runs_df[runs_df.hash.duplicated(keep=False)].sort_values(by='hash')

In [141]:
# model_id = pd.merge(runs_df['hash'], models_df[['hash', 'id']], on='hash')['id']
# runs_df['model_id'] = model_id

In [142]:
runs_df

Unnamed: 0,exp_id,model_id,val_acc,train_acc,time,device_id
0,1,1,0.6631,0.8660,14.334,1
1,1,2,0.3999,0.4053,14.215,1
2,1,3,0.6422,0.7010,14.050,1
3,1,4,0.4792,0.4886,16.747,1
4,1,5,0.4366,0.4397,18.272,1
...,...,...,...,...,...,...
8526,1,8511,0.7519,0.9996,180.765,1
8527,1,8512,0.7482,0.8583,78.988,1
8528,1,8513,0.6813,0.9691,54.347,1
8529,1,8514,0.7368,0.8413,25.946,1


In [143]:
exp_params = exp_df.to_dict(orient='records')
exp_params

[{'id': 1,
  'val_size': 10000,
  'batch_size': 256,
  'num_epoch': 50,
  'random_seed': 42,
  'optimizer': 'AdamW',
  'opt_params': {'lr': 0.001},
  'scheduler': 'ExponentialLR',
  'sch_params': {'gamma': 0.95},
  'in_shape': '(3, 32, 32)',
  'out_shape': '10'}]

In [144]:
print('Insert experiments')    
for exp_params in exp_df.to_dict(orient='records'):    
    Experiments.insert(**exp_params).on_conflict_ignore().execute()

Insert experiments


In [145]:
epochs_df

Unnamed: 0,epoch,loss,train_acc,val_acc,time,run_id,exp_id
0,0,240.27,0.5451,0.5290,0.406,1,1
1,1,190.27,0.6085,0.5800,0.708,1,1
2,2,171.38,0.6453,0.6024,1.006,1,1
3,3,159.32,0.6731,0.6152,1.303,1,1
4,4,150.31,0.6932,0.6242,1.602,1,1
...,...,...,...,...,...,...,...
426545,45,1.18,0.9974,0.7528,50.568,8531,1
426546,46,1.00,0.9981,0.7551,51.626,8531,1
426547,47,0.87,0.9983,0.7556,52.685,8531,1
426548,48,0.77,0.9982,0.7558,53.740,8531,1


In [146]:
for exp_params in exp_df.to_dict(orient='records'):
    print(exp_params)


{'id': 1, 'val_size': 10000, 'batch_size': 256, 'num_epoch': 50, 'random_seed': 42, 'optimizer': 'AdamW', 'opt_params': {'lr': 0.001}, 'scheduler': 'ExponentialLR', 'sch_params': {'gamma': 0.95}, 'in_shape': '(3, 32, 32)', 'out_shape': '10'}


In [147]:
Experiments.insert(**exp_params).on_conflict_ignore().execute()

1

In [148]:
from tqdm import tqdm

print('Insert experiments')    
for exp_params in exp_df.to_dict(orient='records'):    
    Experiments.insert(**exp_params).on_conflict_ignore().execute()
    
print('Insert models')
Models.insert_many(models_df.to_dict(orient='records')).on_conflict_ignore().execute()

print('Insert runs')
Runs.insert_many(runs_df.to_dict(orient='records')).on_conflict_ignore().execute()

print('Insert epochs')
for i in tqdm(epochs_df.run_id.unique()):
    mask = epochs_df.run_id == i
    ModelEpochs.insert_many(epochs_df[mask].to_dict(orient='records')).on_conflict_ignore().execute()

Insert experiments
Insert models
Insert runs
Insert epochs


100%|██████████| 8531/8531 [00:50<00:00, 167.99it/s]
