In [23]:
import sys
sys.path.insert(0, '../')
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
! ls ../models/

model_emb_binary.joblib model_emb_binary.sav    model_w2v_binary.joblib


# Train Model, and Upload (without removing)

In [20]:
from green_mood_tracker.data import get_data, clean
from green_mood_tracker.roberta_trainer import RobertaTrainer
from termcolor import colored

In [22]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
# Get and clean data
EXPERIMENT = "[GB] [London] [green_mood_tracker] RoBERTa"

params = dict(nrows=100,
              upload=True,
              local=False,
              rm=False,
              mlflow=True,  # set to True to log params to mlflow
              experiment_name=EXPERIMENT
              )

print("############   Loading Data   ############")
df = get_data(**params)
df = clean(df, 'text')
y_train = df.polarity
X_train = df.text
del df
print("shape: {}".format(X_train.shape))
print("size: {} Mb".format(X_train.memory_usage() / 1e6))
# Train and save model, locally and
t = RobertaTrainer(X=X_train, y=y_train, **params)
del X_train, y_train
print(colored("############  Training model   ############", "red"))
t.train()
print(colored("############  Evaluating model ############", "blue"))
t.evaluate()
print(colored("############   Saving model    ############", "green"))
t.save_model(**params)

############   Loading Data   ############
shape: (100,)
size: 0.00088 Mb
[31m############  Training model   ############[0m


Some layers from the model checkpoint at roberta-base were not used when initializing TFRobertaForSequenceClassification: ['lm_head']
- This IS expected if you are initializing TFRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3
train 309.3
[34m############  Evaluating model ############[0m
[34maccuracy train: 0.7346938848495483 || accuracy test: 0.7666666507720947[0m
[32m############   Saving model    ############[0m
[32mroBERTa.tf saved locally[0m
[32m=> roBERTa.tf uploaded to bucket green-mood-tracker-01 inside models/RoBERTa/v0/roBERTa.tf[0m


# Upload the Model (Only)

In [24]:
from green_mood_tracker.gcp import storage_upload_models

In [25]:
storage_upload_models(bucket_name='green-mood-tracker-01', model_name='RoBERTa',
                      model_version='test', model_filename='roBERTa.tf', rm=False)

Uploading roBERTa.tf!
[32m=> roBERTa.tf uploaded to bucket green-mood-tracker-01 inside models/RoBERTa/test/roBERTa.tf[0m


# Download the Model from GCP