In [1]:
import os 
import mlflow as mf 
import joblib
import torch
import transformers
import numpy as np
from tqdm import tqdm

from torch.utils.data import DataLoader
import ftzard.utils.mlflow as mf_utils


from hydra import initialize, compose
from warnings import filterwarnings

from transformers import DataCollatorWithPadding, AutoTokenizer
import dagstermill as dgm

from sklearn.metrics import classification_report, accuracy_score, f1_score


filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_path = '../..'
config_path = f'../../config/'
config_name = 'config'

In [3]:
with initialize(version_base=None, config_path=config_path):
    cfg = compose(config_name=config_name)
    tracking_uri, experiment_name = cfg.MLFLOW_TRACKING_URI, cfg.MLFLOW_EXPERIMENT_NAME
    mlflow_model_name = cfg.MLFLOW_MODEL_NAME 
    

In [4]:
os.environ['MLFLOW_TRACKING_URI'] = tracking_uri
run_name = 'inference'
alias = 'champion'
print('Mlflow Experiment Name: ', experiment_name)
print('Mlflow Run Name: ', run_name)
print('Mlflow Model Name: ', mlflow_model_name)
print("Mlflow Model Alias: ", alias)

Mlflow Experiment Name:  senetiment_analysis
Mlflow Run Name:  inference
Mlflow Model Name:  FalconSentiAnalysis
Mlflow Model Alias:  champion


In [5]:
datasets = joblib.load(f"{base_path}/data/tokenized_dataset.joblib")["datasets"]
print(datasets['test'])

Dataset({
    features: ['label', 'input_ids', 'attention_mask'],
    num_rows: 480
})


In [6]:
## Load Model ##
model_uri = f"models:/{mlflow_model_name}@{alias}"
components = mf.transformers.load_model(model_uri, return_type="components")

2024/06/23 15:04:03 INFO mlflow.transformers: 'models:/FalconSentiAnalysis@champion' resolved as '/app/ftzard/pipeline/notebooks/mlruns/1/28996e8c11b04b1abaeec00ced1ab090/artifacts'
Loading checkpoint shards: 100%|████████████████████████████████████████████| 2/2 [00:14<00:00,  7.34s/it]
Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-7b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
tokenizer = components["tokenizer"]
model = components["model"]
del components
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

In [8]:
print(model)

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): FalconForSequenceClassification(
      (transformer): FalconModel(
        (word_embeddings): Embedding(65024, 4544)
        (h): ModuleList(
          (0-31): 32 x FalconDecoderLayer(
            (self_attention): FalconAttention(
              (rotary_emb): FalconRotaryEmbedding()
              (query_key_value): lora.Linear(
                (base_layer): FalconLinear(in_features=4544, out_features=4672, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.056109821762334554, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4544, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4672, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B)

In [9]:
experiment_id = mf_utils.create_experiment(exp_name=experiment_name)

The provided experiment name senetiment_analysis already exists, the run will be logged in this experiment.
                                 


In [12]:

'''
Preparing Data For Evaluation
'''
eval_dataloader = DataLoader(
    datasets["test"],
    batch_size=32,
    shuffle=False,
    collate_fn=DataCollatorWithPadding(tokenizer=tokenizer),
)

device = model.device

print("Device of PEFT MODEL: ", device)

def get_predictions(batch):
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    # token_type_ids = batch["token_type_ids"].to(device)

    with torch.no_grad():
        outputs = model(**{"input_ids":input_ids, "attention_mask":attention_mask})
        logits = outputs.logits
        predicted_labels = torch.argmax(logits, dim=-1) # Move predicted_labels to the same device

    return predicted_labels.cpu().numpy(), logits

print('Getting predictions from best model....')

run_id = mf_utils.get_run_id_by_name(run_name=run_name, 
                                             experiment_ids=[experiment_id],
                                            nested = True)
with mf.start_run(run_id=run_id, run_name=run_name, experiment_id=experiment_id):
    all_predictions, all_logits = [], []
    for batch in tqdm(eval_dataloader):
        predictions, logits = get_predictions(batch)
        all_predictions.extend(predictions)
        all_logits.extend(logits)
    mf.log_params("#ofReocrds", len(all_logits))

Device of PEFT MODEL:  cuda:0
Getting predictions from best model....


100%|█████████████████████████████████████████████████████████████████████| 15/15 [00:06<00:00,  2.39it/s]


## ONLY WHEN YOU HAVE A LABELLED TEST SET 

In [13]:
print('Fetching true labels from dataset....')
true = []
for batch in tqdm(eval_dataloader):
    true.extend(batch['labels'])

print('                  Classification Report         ')
print("-----------------------------------------------------")
print(classification_report(true, all_predictions))

Fetching true labels from dataset....


100%|████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 419.20it/s]

                  Classification Report         
-----------------------------------------------------
              precision    recall  f1-score   support

           0       0.89      0.91      0.90       240
           1       0.91      0.89      0.90       240

    accuracy                           0.90       480
   macro avg       0.90      0.90      0.90       480
weighted avg       0.90      0.90      0.90       480






In [14]:
with mf.start_run(run_id=run_id, run_name=run_name, experiment_id=experiment_id):
    mf.log_metric("accuracy", accuracy_score(true, all_predictions))
    mf.log_metric("f1_score", accuracy_score(true, all_predictions))

In [15]:
outputs = {"data": datasets["test"],
           "predicted_labels": all_predictions,
          "logits": [i.cpu().numpy() for i in all_logits]}

In [16]:
# save_path = f"{base_path}/data/predictions.joblib"
# with open(save_path, 'wb') as f:
#     joblib.dump(outputs, f)

In [17]:
dgm.yield_result(outputs, output_name="predictions_logits")

{'data': Dataset({
     features: ['label', 'input_ids', 'attention_mask'],
     num_rows: 480
 }),
 'predicted_labels': [1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
