# Smokes tests

In [10]:
%load_ext autoreload
%reload_ext autoreload

%autoreload 2

import mlflow as mlf
import sys
import os
sys.path.append('../')

from model.utils import load_model, load_dataset
from model.train import train_model

## Smoke tests for rebertuito model

In [None]:
ds_path = os.path.abspath(os.path.join('..', 'model', 'tweets_parsed.csv'))

ds = load_dataset(ds_path, force=False)
for key in ds.keys():
  for v in ds[key]:
    print(key, v)
    break

model, tokenizer = load_model(base_model='pysentimiento/robertuito-base-uncased')

## Smoke test for ReBERTuito+BLSTM

In [None]:
ds_path = os.path.abspath(os.path.join('..', 'model', 'tweets_parsed.csv'))

ds = load_dataset(ds_path, force=False)
for key in ds.keys():
  for v in ds[key]:
    print(key, v)
    break

model, tokenizer = load_model(base_model='pysentimiento/robertuito-base-uncased', blstm=True)

In [None]:
train_model(limit=8)

# Analysis runs

In [2]:

import os
import sys
sys.path.append('../model')

import mlflow as mlf
from transformers import (
    AutoModelForSequenceClassification
)

runs_path = os.path.abspath(os.path.join('..', 'model', 'mlruns'))

mlf.set_tracking_uri('file:/ ' + runs_path)

columns = [
  'run_id', 'status', 'params.lstm_hidden_dim', 'params.lstm_num_layers',
  'metrics.train_runtime', 'metrics.eval_macro_f1', 'metrics.train_loss', 'metrics.eval_macro_recall', 'metrics.eval_macro_precision',
  'artifact_uri',

]

runs = mlf.search_runs(
  filter_string='status="FINISHED"',
  order_by=['metrics.eval_macro_f1 DESC']
)[columns]

runs

Unnamed: 0,run_id,status,params.lstm_hidden_dim,params.lstm_num_layers,metrics.train_runtime,metrics.eval_macro_f1,metrics.train_loss,metrics.eval_macro_recall,metrics.eval_macro_precision,artifact_uri
0,0dfbb860725d4bcaa12549371f948a64,FINISHED,,,1853.7268,0.818916,0.201745,0.815298,0.822694,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
1,0bf6535d0d94489594598a407d120bab,FINISHED,256.0,4.0,,0.806071,0.033026,0.80258,0.825698,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
2,d7710812eef0481fbf6f78716417717b,FINISHED,128.0,8.0,,0.804081,0.025706,0.803563,0.821344,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
3,70018245cfed43d0b3f2013f05651a2e,FINISHED,256.0,8.0,,0.798647,0.063731,0.798242,0.814408,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
4,1129e41f8157498a8fecde9c7111edbf,FINISHED,128.0,2.0,,0.798098,0.025547,0.795116,0.816364,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
5,58846fea2cb143f797adc96e128dc654,FINISHED,256.0,12.0,,0.291733,0.526668,0.333334,0.259777,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
6,321288cfa85745b6a6714b9992455992,FINISHED,256.0,2.0,,0.291733,0.531261,0.333334,0.259777,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
7,2292347854434103bbcaf7ee97e6848d,FINISHED,128.0,12.0,,0.291733,0.526222,0.333334,0.259777,file:///E:/Media/Python/ID-v3-Scrapper/model/m...
8,66ed1b8e048f43a686b4f64706f5df67,FINISHED,128.0,4.0,,0.291733,0.531302,0.333334,0.259777,file:///E:/Media/Python/ID-v3-Scrapper/model/m...


## Import best Robertuito model

In [11]:
run_torch = runs[runs['run_id'] == '0dfbb860725d4bcaa12549371f948a64'].iloc[0]
roubertuito = AutoModelForSequenceClassification.from_pretrained(f'{run_torch["artifact_uri"]}/model'.replace('file:///', ''))
roubertuito

## Import Robertuito+BiLSTM model

In [None]:
run_torch = runs[runs['run_id'] == 'd7710812eef0481fbf6f78716417717b'].iloc[0]
robertuito_blstm = mlf.pytorch.load_model(run_torch['artifact_uri'] + '/model')
robertuito_blstm