In [1]:
!export PYTHONWARNINGS="ignore"

In [2]:

import os 
import mlflow as mf 
import torch
import joblib
import transformers
import numpy as np
from tqdm import tqdm
import logging

from mlflow.models import infer_signature
from torch.utils.data import DataLoader
import ftzard.utils.mlflow as mf_utils
from mlflow.models.signature import ModelSignature, infer_signature
from mlflow.pyfunc import PythonModel
from mlflow.types.schema import Schema, TensorSpec
from hydra import initialize, compose
from warnings import filterwarnings

from transformers import (AutoModelForSequenceClassification, 
                            AutoTokenizer, BitsAndBytesConfig,
                            DataCollatorWithPadding, pipeline)

from sklearn.metrics import classification_report
from peft import get_peft_model, PeftConfig, PeftModel
from ftzard.utils.dvc import get_current_date_time
import dagstermill as dgm

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
!nvidia-smi

Tue Jul  2 11:18:09 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.48.07    Driver Version: 515.48.07    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 41%   54C    P8     1W / 260W |     18MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:02:00.0 Off |                  N/A |
| 43%   64C    P8    38W / 260W |      8MiB / 11264MiB |      0%      Default |
|       

In [4]:
base_path = '/app/ftzard'
config_path = f'{base_path}/config/'
try:
    os.symlink(config_path, "config_link")
except Exception as e:
    print("Symlink already created...")
config_name = 'config'

Symlink already created...


In [5]:
datasets = joblib.load(f"{base_path}/data/tokenized_dataset.joblib")

INFO:datasets:PyTorch version 2.0.1 available.


In [6]:
with initialize(version_base=None, config_path="config_link"):
    cfg = compose(config_name=config_name)
    tracking_uri, experiment_name = cfg.MLFLOW.TRACKING.URI, cfg.MLFLOW.EXPERIMENT.NAME
    mlflow_model_name = cfg.MLFLOW.MODEL.NAME
    

In [7]:
os.environ['MLFLOW_TRACKING_URI'] = tracking_uri
base_run_name = "CHOOSE-BEST-MODEL"
run_name = get_current_date_time()
previous_run_name = 'HP-TUNING'
model_name = cfg.HUGGINGFACE.MODEL.NAME

print("Base Run Name: ", base_run_name)
print('Mlflow Previous Run Name: ', previous_run_name)
print('Mlflow Experiment Name: ', experiment_name)
print('Mlflow Run Name: ', run_name)
print('Model Name: ', model_name)

Base Run Name:  CHOOSE-BEST-MODEL
Mlflow Previous Run Name:  HP-TUNING
Mlflow Experiment Name:  senetiment_analysis
Mlflow Run Name:  2024-07-02_11:18
Model Name:  tiiuae/falcon-7b


In [8]:
experiment_id = mf_utils.create_experiment(exp_name=experiment_name)
previous_run_id = mf_utils.get_run_id_by_name(run_name=previous_run_name, 
                                             experiment_ids=[experiment_id],
                                            nested = True)
print('The Previous Run Id is: ', previous_run_id)
if not previous_run_id:
    raise IOError("Cannot find previous run")

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.


The provided experiment name senetiment_analysis already exists, the run will be logged in this experiment.
                                 
The Previous Run Id is:  254a177528514030a9a79e93926b2035


In [9]:
logger.info(f"Previous Run ID: {previous_run_id}")

INFO:__main__:Previous Run ID: 254a177528514030a9a79e93926b2035


In [13]:
client = mf.MlflowClient()
base_child_runs =  client.search_runs(experiment_id, 
            filter_string=f"tags.mlflow.parentRunId='{previous_run_id}'")

In [17]:
base_child_runs = sorted(base_child_runs, key = lambda x: -x.info.start_time)

In [18]:
print('Latest Run:', base_child_runs[0])

Latest Run: <Run: data=<RunData: metrics={}, params={}, tags={'mlflow.parentRunId': '254a177528514030a9a79e93926b2035',
 'mlflow.runName': '2024-07-01_8:26',
 'mlflow.source.name': '/app/.pixi/envs/default/lib/python3.11/site-packages/ipykernel_launcher.py',
 'mlflow.source.type': 'LOCAL',
 'mlflow.user': 'root'}>, info=<RunInfo: artifact_uri='/app/ftzard/pipeline/notebooks/mlruns/1/9953da46234843f1a475e042a89a9b2c/artifacts', end_time=1719824002015, experiment_id='1', lifecycle_stage='active', run_id='9953da46234843f1a475e042a89a9b2c', run_name='2024-07-01_8:26', run_uuid='9953da46234843f1a475e042a89a9b2c', start_time=1719822426789, status='FINISHED', user_id='root'>, inputs=<RunInputs: dataset_inputs=[]>>


In [19]:
latest_previous_child_run_id = base_child_runs[0].info.run_id
child_runs =  client.search_runs(experiment_id, 
            filter_string=f"tags.mlflow.parentRunId='{latest_previous_child_run_id}'")

In [20]:
child_runs.__len__()

4

In [21]:
datasets = datasets["datasets"]

In [22]:
print(datasets['test'])

Dataset({
    features: ['label', 'input_ids', 'attention_mask'],
    num_rows: 480
})


In [23]:
for child in child_runs:
    print('Name: ', child.info.run_name, '| Id: ', child.info.run_id, 
          "| Accuracy: ", np.round(child.data.metrics["eval_accuracy"], 3),
         "| Train Loss: ", np.round(child.data.metrics["train_loss"], 4),
         "| Eval Loss: ", np.round(child.data.metrics["eval_loss"], 4),
         "| Delta: ", np.round(np.round(child.data.metrics["train_loss"], 4)
                               - np.round(child.data.metrics["eval_loss"], 4), 2))

Name:  2024-07-01_8:26_trial_4 | Id:  acf3d09f0575420d8e6109223861b7cd | Accuracy:  0.814 | Train Loss:  0.7663 | Eval Loss:  0.439 | Delta:  0.33
Name:  2024-07-01_8:26_trial_3 | Id:  d86ae004ad9b436ca757d8ee9af0b44a | Accuracy:  0.533 | Train Loss:  1.5018 | Eval Loss:  0.7829 | Delta:  0.72
Name:  2024-07-01_8:26_trial_2 | Id:  0a517780063642ecadd0fbeece580d28 | Accuracy:  0.795 | Train Loss:  0.8481 | Eval Loss:  0.4766 | Delta:  0.37
Name:  2024-07-01_8:26_trial_1 | Id:  ff351744c39747a0ad96867671e49953 | Accuracy:  0.823 | Train Loss:  0.7326 | Eval Loss:  0.4064 | Delta:  0.33


In [24]:
best_run = None
max_acc = -1000000000
for child in child_runs:
    if child.data.metrics["eval_accuracy"]>max_acc:
        best_run = child
        max_acc = child.data.metrics["eval_accuracy"]

In [25]:
print(best_run)

<Run: data=<RunData: metrics={'epoch': 1.0,
 'eval_accuracy': 0.8232142857142857,
 'eval_balanced_accuracy': 0.8241445474896067,
 'eval_loss': 0.4063585102558136,
 'eval_runtime': 76.5358,
 'eval_samples_per_second': 14.634,
 'eval_steps_per_second': 0.915,
 'total_flos': 1403618202316800.0,
 'train_loss': 0.7325710114978609,
 'train_runtime': 266.3046,
 'train_samples_per_second': 3.755,
 'train_steps_per_second': 0.237}, params={'_name_or_path': 'tiiuae/falcon-7b',
 'accelerator_config': "{'split_batches': False, 'dispatch_batches': None, "
                       "'even_batches': True, 'use_seedable_sampler': True, "
                       "'non_blocking': False, 'gradient_accumulation_kwargs': "
                       'None}',
 'activation': 'gelu',
 'adafactor': 'False',
 'adam_beta1': '0.9',
 'adam_beta2': '0.999',
 'adam_epsilon': '1e-08',
 'add_cross_attention': 'False',
 'alibi': 'False',
 'apply_residual_connection_post_layernorm': 'False',
 'architectures': "['FalconForCausal

In [26]:
artifact_path = best_run.info.artifact_uri
print('Aritifacts Path: ', artifact_path)

Aritifacts Path:  /app/ftzard/pipeline/notebooks/mlruns/1/ff351744c39747a0ad96867671e49953/artifacts


In [27]:
# Loading fine-tuned model from Hugging Face
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

config = PeftConfig.from_pretrained(artifact_path)
peft_base_model = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="cuda:1",
    trust_remote_code=True,
)

peft_model = PeftModel.from_pretrained(peft_base_model, artifact_path)

peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token
peft_model.config.pad_token_id = peft_tokenizer.pad_token_id


Loading checkpoint shards: 100%|████████████████████████████████████████████| 2/2 [00:15<00:00,  7.62s/it]
Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-7b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
print("PEFT CONFIG: ", config)

PEFT CONFIG:  LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='tiiuae/falcon-7b', revision=None, task_type='SEQ_CLS', inference_mode=True, r=32, target_modules={'dense_4h_to_h', 'query_key_value', 'dense_h_to_4h', 'dense'}, lora_alpha=16, lora_dropout=0.05187051351110231, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=['classifier', 'score'], init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None)


In [29]:

'''
Preparing Data For Evaluation
'''
eval_dataloader = DataLoader(
    datasets["test"],
    batch_size=32,
    shuffle=False,
    collate_fn=DataCollatorWithPadding(tokenizer=peft_tokenizer),
)

device = peft_model.device

print("Device of PEFT MODEL: ", device)

def get_predictions(batch):
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    # token_type_ids = batch["token_type_ids"].to(device)

    with torch.no_grad():
        outputs = peft_model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predicted_labels = torch.argmax(logits, dim=-1) # Move predicted_labels to the same device

    return predicted_labels.cpu().numpy()

print('Getting predictions from best model....')

all_predictions = []
for batch in tqdm(eval_dataloader):
    predictions = get_predictions(batch)
    all_predictions.extend(predictions)

print('Fetching true labels from dataset....')
true = []
for batch in tqdm(eval_dataloader):
    true.extend(batch['labels'])

Device of PEFT MODEL:  cuda:1
Getting predictions from best model....


100%|█████████████████████████████████████████████████████████████████████| 15/15 [00:08<00:00,  1.77it/s]


Fetching true labels from dataset....


100%|████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 438.15it/s]


In [30]:
print('                  Classification Report         ')
print("-----------------------------------------------------")
print(classification_report(true, all_predictions))

                  Classification Report         
-----------------------------------------------------
              precision    recall  f1-score   support

           0       0.91      0.80      0.85       240
           1       0.82      0.92      0.87       240

    accuracy                           0.86       480
   macro avg       0.87      0.86      0.86       480
weighted avg       0.87      0.86      0.86       480



In [35]:
with mf.start_run(run_id=previous_run_id):
    with mf.start_run(run_id=latest_previous_child_run_id, nested=True):
        result = mf.transformers.log_model(
            registered_model_name = mlflow_model_name,
            transformers_model = {"model":peft_model, "tokenizer":peft_tokenizer},
            artifact_path="",
            pip_requirements=["--no-deps"],
            task = "text-classification",
        )



2024/07/02 11:28:41 INFO mlflow.transformers: Overriding save_pretrained to False for PEFT models, following the Transformers behavior. The PEFT adaptor and config will be saved, but the base model weights will not and reference to the HuggingFace Hub repository will be logged instead.
2024/07/02 11:28:41 INFO mlflow.transformers: Skipping saving pretrained model weights to disk as the save_pretrained is set to False. The reference to HuggingFace Hub repository tiiuae/falcon-7b will be logged instead.
Registered model 'FalconSentiAnalysis' already exists. Creating a new version of this model...
Created version '4' of model 'FalconSentiAnalysis'.


In [36]:
#Change Alias in the UI
client.set_registered_model_alias(mlflow_model_name, "champion", 4)