In [1]:
#!pip install fsspec==2023.9.2
!pip install -q transformers

In [1]:
!pip show transformers


Name: transformers
Version: 4.36.2
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /opt/conda/lib/python3.10/site-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: 


In [2]:
import torch

# Call torch.cuda.memory_summary() to print the memory summary
print(torch.cuda.memory_summary(device=None, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------------------|
| Active memory         |       0 B  |       0 B  |       0 B  |       0 B  |
|       from large pool |       0 B  |       0 B  |       0 B  |       0 B  |
|       from small pool |       0 B  |       0 B  |       0 B  |       0 B  |
|---------------------------------------------------------------

In [3]:
import torch
import gc
torch.cuda.empty_cache()
gc.collect()

0

### Data preparation
- collating data
- balancing data
- text preprocessing

In [5]:
import pandas as pd
import numpy as np

df = pd.read_csv(r'gs://sentiment_response/pjs_all.csv')

df['nps'] = df['nps'].replace('10 (Extremely likely)',10)
df['nps'] = df['nps'].replace('0 (Not at all likely)',0)
df['nps'] = df['nps'].astype(int)

#target variable will nps split into demoters, passives and promoters
df['nps_group'] = np.where(df['nps'] >= 9,1,
                  np.where(df['nps'] <= 6,0,2))

df = df[df['nps_group'].isin([0,1])]

In [6]:
#df.info()

gb = df.groupby('nps_group')['nps'].count()
gb

nps_group
0     53460
1    127693
Name: nps, dtype: int64

In [42]:
max_rows = 350

df1 = df[df['nps_group'] == 0].head(max_rows)
df2 = df[df['nps_group'] == 1].head(max_rows)


df = pd.concat([df1,df2], axis = 0)

gb = df.groupby('nps_group')['nps'].count()
gb

nps_group
0    350
1    350
Name: nps, dtype: int64

In [43]:
import re
def text_preprocessing(text):
    """
    - Remove entity mentions (eg. '@united')
    - Correct errors (eg. '&amp;' to '&')
    @param    text (str): a string to be processed.
    @return   text (Str): the processed string.
    """
    # Remove '@name'
    text = re.sub(r'(@.*?)[\s]', ' ', text)

    # Replace '&amp;' with '&'
    text = re.sub(r'&amp;', '&', text)

    # Remove trailing whitespace
    text = re.sub(r'\s+', ' ', text).strip()

    return text

df['response'] = df['response'].apply(lambda x: text_preprocessing(x))

In [44]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df['response'],df['nps_group'], test_size = 0.33, random_state=42)

### Pre-processing for DeBERTa

In [45]:
from transformers import DebertaTokenizer, DebertaModel
import torch

tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base', do_lower_case=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def preprocessing_for_deberta(data):
    """Perform required preprocessing steps for pretrained DeBERTa.
    @param    data (list): List of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # Create empty lists to store outputs
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in data:
        # `encode_plus` will:
        #    (1) Tokenize the sentence
        #    (2) Add the `[CLS]` and `[SEP]` token to the start and end
        #    (3) Truncate/Pad sentence to max length
        #    (4) Map tokens to their IDs
        #    (5) Create attention mask
        #    (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text=sent,                      # Tokenize sentence
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=MAX_LEN,             # Max length to truncate/pad
            pad_to_max_length=True,         # Pad sentence to max length
            return_attention_mask=True      # Return attention mask
        )

        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))

    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks


In [46]:
#need to specify the maximum string length from responses
responses = df['response'].to_numpy()
# Encode our concatenated data
encoded_response = [tokenizer.encode(sent, add_special_tokens=True) for sent in responses]

# Find the maximum length
max_len = max([len(sent) for sent in encoded_response])
print('Max length: ', max_len)

Max length:  490


In [47]:
# Specify `MAX_LEN`
MAX_LEN = 500

# # Print sentence 0 and its encoded token ids
# token_ids = list(preprocessing_for_bert([X_train[0]])[0].squeeze().numpy())
# print('Original: ', X_train[0])
# print('Token IDs: ', token_ids)

# Run function `preprocessing_for_bert` on the train set and the validation set
print('Tokenizing data...')
train_inputs, train_masks = preprocessing_for_deberta(X_train)
val_inputs, val_masks = preprocessing_for_deberta(X_test)


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Tokenizing data...




In [48]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Convert other data types to torch.Tensor
train_labels = torch.tensor(y_train.to_numpy())
val_labels = torch.tensor(y_test.to_numpy())

# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
batch_size = 8

# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# Create the DataLoader for our validation set
val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)



In [14]:
import torch
import torch.nn as nn
from transformers import DebertaModel, DebertaTokenizer

class DebertaClassifier(nn.Module):
    """DeBERTa Model for Classification Tasks.
    """
    def __init__(self, freeze_deberta=False):
        """
        @param    deberta: a DebertaModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_deberta (bool): Set `False` to fine-tune the DeBERTa model
        """
        super(DebertaClassifier, self).__init__()
        # Specify hidden size of DeBERTa, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 30, 2

        # Instantiate DeBERTa model
        self.deberta = DebertaModel.from_pretrained('microsoft/deberta-base')

        # Instantiate a one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(H, D_out)
        )

        # Freeze the DeBERTa model
        if freeze_deberta:
            for param in self.deberta.parameters():
                param.requires_grad = False

    def forward(self, input_ids, attention_mask):
        """
        Feed input to DeBERTa and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that holds attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to DeBERTa
        outputs = self.deberta(input_ids=input_ids,
                               attention_mask=attention_mask)

        # Extract the last hidden state of the token `[CLS]` for the classification task
        last_hidden_state_cls = outputs.last_hidden_state[:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits


In [15]:
total_steps = len(train_dataloader) * 3

print(f'Total_steps: {total_steps}, so {len(train_dataloader)} per epoch')

Total_steps: 17589, so 5863 per epoch


In [17]:
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW
def initialize_model(epochs=2):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    deberta_classifier = DebertaClassifier()

    # Tell PyTorch to run the model on CPU
    deberta_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(deberta_classifier.parameters(),
                      lr=5e-5,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_dataloader) * epochs

    print(f'Total_steps: {total_steps}')

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0.1*total_steps, # Default value
                                                num_training_steps=total_steps)
    return deberta_classifier, optimizer, scheduler


In [18]:
import random
import time
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight

# Specify loss function
loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=val_dataloader, epochs=2, evaluation=True):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    train_loss_list = []
    train_accuracy_list = []
    val_loss_list = []
    val_acc_list = []
    batch_list = []
    epoch_list = []
    epoch_n = -1
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Train Accuracy':^12}| {'Val Loss':^10} | {'Val Acc':^9} | {'f1_score':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)
            preds = torch.argmax(logits, dim=1).flatten()
            train_accuracy = (preds == b_labels).cpu().numpy().mean() * 100

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels)
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20 batches
            if (step % 2931 == 0 and step != 0) or (step == len(train_dataloader) - 1): #(epoch_n != epoch_i) and (step != 0):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                #print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Perform validation every n batches
                if evaluation:
                    # Calculate validation loss and accuracy
                    val_loss, val_accuracy, f1_score = evaluate(model, val_dataloader)

                    # Print validation results
                    print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {train_accuracy:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {f1_score:^9.2f} | {'-':^9}")
                    train_loss_list.append(batch_loss / batch_counts)
                    train_accuracy_list.append(train_accuracy)
                    val_loss_list.append(val_loss)
                    val_acc_list.append(val_accuracy)
                    batch_list.append(step)
                    epoch_list.append(epoch_i)

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

            epoch_n = epoch_i

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy, f1_score = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch

            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {train_accuracy:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {f1_score:^9.2f} | {time_elapsed:^9.2f}")
            print("-"*70)
        print("\n")
    loss_info = pd.DataFrame(list(zip( epoch_list, batch_list, train_loss_list, val_loss_list, val_acc_list )), columns = ['epoch','batch', 'train loss', 'val loss','val accuracy'])
    print("Training complete!")

    return loss_info



# def evaluate(model, val_dataloader):
#     """After the completion of each training epoch, measure the model's performance
#     on our validation set.
#     """
#     # Put the model into the evaluation mode. The dropout layers are disabled during
#     # the test time.
#     model.eval()

#     # Tracking variables
#     val_accuracy = []
#     val_loss = []

#     # For each batch in our validation set...
#     for batch in val_dataloader:
#         # Load batch to GPU
#         b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

#         # Compute logits
#         with torch.no_grad():
#             logits = model(b_input_ids, b_attn_mask)

#         # Compute loss
#         loss = loss_fn(logits, b_labels)
#         val_loss.append(loss.item())

#         # Get the predictions
#         preds = torch.argmax(logits, dim=1).flatten()

#         # Calculate the accuracy rate
#         accuracy = (preds == b_labels).cpu().numpy().mean() * 100
#         val_accuracy.append(accuracy)

#     # Compute the average accuracy and loss over the validation set.
#     val_loss = np.mean(val_loss)
#     val_accuracy = np.mean(val_accuracy)

#     return val_loss, val_accuracy


def evaluate(model, val_dataloader):
    model.eval()

    val_accuracy = []
    val_loss = []
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in val_dataloader:
            b_input_ids, b_attn_mask, b_labels = (t.to(device) for t in batch)

            logits = model(b_input_ids, b_attn_mask)
            loss = loss_fn(logits, b_labels)
            val_loss.append(loss.item())

            preds = torch.argmax(logits, dim=1).flatten()
            accuracy = (preds == b_labels).cpu().numpy().mean() * 100
            val_accuracy.append(accuracy)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(b_labels.cpu().numpy())

    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    # Calculate weighted F1 score
    class_weights = compute_class_weight('balanced', classes =  np.unique(all_labels), y = all_labels)
    f1 = f1_score(all_labels, all_preds, average='macro', labels=np.unique(all_labels))

    return val_loss, val_accuracy, f1


In [20]:
set_seed(42)    # Set seed for reproducibility
DebertaClassifier, optimizer, scheduler = initialize_model(epochs=2)
loss_info = train(DebertaClassifier, train_dataloader, val_dataloader, epochs=2, evaluation=True)


Total_steps: 11726
Start training...

 Epoch  |  Batch  |  Train Loss  | Train Accuracy|  Val Loss  |  Val Acc  | f1_score  |  Elapsed 
----------------------------------------------------------------------
   1    |  2931   |   0.248138   |  100.000000  |  0.183732  |   95.32   |   0.95    |     -    
   1    |  5862   |   0.209904   |  75.000000   |  0.207889  |   94.16   |   0.94    |     -    
----------------------------------------------------------------------
   1    |    -    |   0.229024   |  75.000000   |  0.207889  |   94.16   |   0.94    | 10024.52 
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  | Train Accuracy|  Val Loss  |  Val Acc  | f1_score  |  Elapsed 
----------------------------------------------------------------------
   2    |  2931   |   0.167128   |  87.500000   |  0.160575  |   96.52   |   0.97    |     -    
   2    |  5862   |   0.115243   |  100.000000  |  0.129192  |   96.77   |   0.97    |     -

In [40]:
import torch.nn.functional as F
import matplotlib as mpl
import matplotlib.pyplot as plt

def bert_predict(model, test_dataloader):
    """Perform a forward pass on the trained BERT model to predict probabilities
    on the test set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    all_logits = []

    # For each batch in our test set...
    for batch in test_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)
        all_logits.append(logits)

    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)

    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits).cpu().numpy()

    return probs

from sklearn.metrics import accuracy_score, roc_curve, auc

def evaluate_roc(probs, y_true):
    """
    - Print AUC and accuracy on the test set
    - Plot ROC
    @params    probs (np.array): an array of predicted probabilities with shape (len(y_true), 2)
    @params    y_true (np.array): an array of the true values with shape (len(y_true),)
    """
    # preds = probs[:, 1]
    # fpr, tpr, threshold = roc_curve(y_true, preds)
    # roc_auc = auc(fpr, tpr)
    #print(f'AUC: {roc_auc:.4f}')

    # Get accuracy over the test set
    accuracy = accuracy_score(y_true, probs)
    print(f'Accuracy: {accuracy*100:.2f}%')

    # # Plot ROC AUC
    # plt.title('Receiver Operating Characteristic')
    # plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    # plt.legend(loc = 'lower right')
    # plt.plot([0, 1], [0, 1],'r--')
    # plt.xlim([0, 1])
    # plt.ylim([0, 1])
    # plt.ylabel('True Positive Rate')
    # plt.xlabel('False Positive Rate')
    # plt.show()
    return accuracy

In [49]:
# Compute predicted probabilities on the test set
probs = bert_predict(deberta3, val_dataloader)

# #converting softmax probabilities to classification
# classification = probs.argmax(axis = 1)
# # # Evaluate the Bert classifier
# evaluate_roc(classification, y_test)

  probs = F.softmax(all_logits).cpu().numpy()


In [51]:
#converting softmax probabilities to classification
classification = probs.argmax(axis = 1)
# # Evaluate the Bert classifier
accuracy = evaluate_roc(classification, y_test)

Accuracy: 98.27%


In [53]:
y_test

709     0
231     1
74      1
699     0
1308    0
       ..
1253    0
1225    0
470     1
1300    0
395     1
Name: nps_group, Length: 231, dtype: int64

In [21]:
#### saving model to checkpoint first
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: Traceback (most recent call last):
  File "/opt/conda/bin/huggingface-cli", line 8, in <module>
    sys.exit(main())
  File "/opt/conda/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py", line 49, in main
    service.run()
  File "/opt/conda/lib/python3.10/site-packages/huggingface_hub/comma

In [22]:
your_token = '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'
tokenizer.push_to_hub("sentiment-transport", use_auth_token=your_token)



CommitInfo(commit_url='https://huggingface.co/nxnag/sentiment-transport/commit/09edb32abad0324d05c61cd5d87b7f7e3ee3ae11', commit_message='Upload tokenizer', commit_description='', oid='09edb32abad0324d05c61cd5d87b7f7e3ee3ae11', pr_url=None, pr_revision=None, pr_num=None)

In [24]:
DebertaClassifier.push_to_hub("sentiment-transport", token=your_token)

AttributeError: 'DebertaClassifier' object has no attribute 'push_to_hub'

## Saving the model and recording its production

In [59]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'surveys-402414'

In [60]:
#saving the model 
from datetime import datetime
REGION = 'europe-west2'
EXPERIMENT = '01'
SERIES = '01'

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET = PROJECT_ID
URI = f"gs://{BUCKET}/{SERIES}/{EXPERIMENT}"
DIR = f"temp/{EXPERIMENT}"
BLOB = r"{SERIES}/{EXPERIMENT}/models/{TIMESTAMP}/model/sentiment_classifier.pt"


In [61]:
FRAMEWORK = 'pytorch'
TASK = 'classification'
MODEL_TYPE = 'deberta'
EXPERIMENT_NAME = f'experiment-{SERIES}-{EXPERIMENT}-{FRAMEWORK}-{TASK}-{MODEL_TYPE}'
RUN_NAME = f'run-{TIMESTAMP}'

In [62]:
#Required packages
from google.cloud import aiplatform
from google.cloud import storage
import json

from datetime import datetime
import os

from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value

In [63]:
aiplatform.init(project=PROJECT_ID, location=REGION)

In [64]:
!rm -rf {DIR}
!mkdir -p {DIR}

## Initialising experiment

In [65]:
aiplatform.init(experiment = EXPERIMENT_NAME)

In [66]:
expRun = aiplatform.ExperimentRun.create(run_name = RUN_NAME, experiment = EXPERIMENT_NAME)

Associating projects/240414127532/locations/europe-west2/metadataStores/default/contexts/experiment-01-01-pytorch-classification-deberta-run-20240118133149 to Experiment: experiment-01-01-pytorch-classification-deberta


In [67]:
#log parameters to the experiment run:
expRun.log_params({'experiment': EXPERIMENT, 'series': SERIES, 'project_id': PROJECT_ID})

In [68]:
accuracy = '96.77%'
expRun.log_metrics({'test_accuracy': accuracy})

## Saving model for later use 

In [35]:
model_save_name = 'deberta_classifier.pt'
path = F"{model_save_name}"
torch.save(DebertaClassifier.state_dict(), path)

In [36]:
torch.save(DebertaClassifier.state_dict(),  'gs://sentiment_response/bert_sentiment.pt')

RuntimeError: Parent directory gs://sentiment_response does not exist.

In [35]:
# Upload the model to GCS
bucket = storage.Client().bucket('sentiment_response')
blob = bucket.blob('deberta/deberta_classifier.pt')
blob.upload_from_filename('deberta_classifier.pt')

In [None]:
blob = bucket.blob('test2/deberta_classifier.pt')
blob.upload_from_filename('deberta_classifier.pt')

In [36]:
!gsutil ls gs://sentiment_response/deberta

gs://sentiment_response/deberta/deberta_classifier.pt


In [37]:
from google.cloud import storage

def download_model_weight(bucket_name, source_blob_name, destination_file_path):
    """Downloads a model weight file from Google Cloud Storage."""
    storage_client = storage.Client()

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)

    # Download the model weight file
    blob.download_to_filename(destination_file_path)

if __name__ == "__main__":
    bucket_name = "sentiment_response"
    source_blob_name = r"deberta/deberta_classifier.pt"  # Path to the model weight file in the bucket
    destination_file_path = "model_weights.pt"  # Local path where you want to save the model weight file

    download_model_weight(bucket_name, source_blob_name, destination_file_path)

In [38]:
deberta3, optimizer, scheduler = initialize_model(epochs=2)

Total_steps: 11726


In [41]:
#model_save_name = 'bert_classifier.pt'
path = r"model_weights.pt"
deberta3.load_state_dict(torch.load(path))

<All keys matched successfully>

In [29]:
!pip install huggingface_hub



In [30]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [32]:
!pip install --upgrade transformers



In [31]:
#your_token = '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'
deberta2.push_to_hub("sentiment-transport")

AttributeError: 'DebertaClassifier' object has no attribute 'push_to_hub'

In [27]:
type(deberta2)

__main__.DebertaClassifier

In [70]:
#logging where the model has been saved
expRun.log_params({'model.save': r'gs://sentiment_response/deberta/deberta_classifier.pt'})

In [72]:
modelmatch = aiplatform.Model.list(filter = f'display_name={SERIES}_{EXPERIMENT} AND labels.series={SERIES} AND labels.experiment={EXPERIMENT}')

upload_model = True
if modelmatch:
    print("Model Already in Registry:")
    if RUN_NAME in modelmatch[0].version_aliases:
        print("This version already loaded, no action taken.")
        upload_model = False
        model = aiplatform.Model(model_name = modelmatch[0].resource_name)
    else:
        print('Loading model as new default version.')
        parent_model = modelmatch[0].resource_name

else:
    print('This is a new model, creating in model registry')
    parent_model = ''

if upload_model:
    model = aiplatform.Model.upload(
        display_name = f'{SERIES}_{EXPERIMENT}',
        model_id = f'model_{SERIES}_{EXPERIMENT}',
        parent_model =  parent_model,
        serving_container_image_uri = 'europe-docker.pkg.dev/vertex-ai/training/pytorch-gpu.1-13:latest',
        artifact_uri =r'gs://sentiment_response/01/01/models/20231024142940/model',
        is_default_version = True,
        version_aliases = [RUN_NAME],
        version_description = RUN_NAME,
        labels = {'series' : f'{SERIES}', 'experiment' : f'{EXPERIMENT}', 'experiment_name' : f'{EXPERIMENT_NAME}', 'run_name' : f'{RUN_NAME}'}        
    )

Model Already in Registry:
Loading model as new default version.
Creating Model
Create Model backing LRO: projects/240414127532/locations/europe-west2/models/model_01_01/operations/3039895113858809856
Model created. Resource name: projects/240414127532/locations/europe-west2/models/model_01_01@3
To use this Model in another session:
model = aiplatform.Model('projects/240414127532/locations/europe-west2/models/model_01_01@3')


In [73]:
print(f'Review the model in the Vertex AI Model Registry:\nhttps://console.cloud.google.com/vertex-ai/locations/{REGION}/models/{model.name}?project={PROJECT_ID}')

Review the model in the Vertex AI Model Registry:
https://console.cloud.google.com/vertex-ai/locations/europe-west2/models/model_01_01?project=surveys-402414


In [74]:
#update model descriptions
expRun.log_params({
    'model.uri': model.uri,
    'model.display_name': model.display_name,
    'model.name': model.name,
    'model.resource_name': model.resource_name,
    'model.version_id': model.version_id,
    'model.versioned_resource_name': model.versioned_resource_name
})

In [75]:
#complete experiment run
expRun.update_state(state = aiplatform.gapic.Execution.State.COMPLETE)

In [76]:
exp = aiplatform.Experiment(experiment_name = EXPERIMENT_NAME)

In [77]:
exp.get_data_frame()

Unnamed: 0,experiment_name,run_name,run_type,state,param.model.save,param.model.versioned_resource_name,param.series,param.model.display_name,param.experiment,param.model.name,param.model.resource_name,param.model.uri,param.model.version_id,param.project_id,metric.test_accuracy
0,experiment-01-01-pytorch-classification-deberta,run-20240118133149,system.ExperimentRun,COMPLETE,gs://sentiment_response/deberta/deberta_classi...,projects/240414127532/locations/europe-west2/m...,1,01_01,1,model_01_01,projects/240414127532/locations/europe-west2/m...,gs://sentiment_response/01/01/models/202310241...,3.0,surveys-402414,96.77%
1,experiment-01-01-pytorch-classification-deberta,run-20240117211743,system.ExperimentRun,RUNNING,,,1,,1,,,,,surveys-402414,96.77%
2,experiment-01-01-pytorch-classification-deberta,run-20231102140225,system.ExperimentRun,RUNNING,,,1,,1,,,,,surveys-402414,0.86747
3,experiment-01-01-pytorch-classification-deberta,run-20231101142133,system.ExperimentRun,COMPLETE,gs://sentiment_response/01/01/models/202310241...,projects/240414127532/locations/europe-west2/m...,1,01_01,1,model_01_01,projects/240414127532/locations/europe-west2/m...,gs://absa-classification/01/01/models/20231101...,2.0,surveys-402414,0.759036


## Now need to have a current version which is managed in the model registry