In [None]:
%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import argparse
import re
import time
import glob
import joblib
import sys

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn import preprocessing
import torch
from transformers import TrainingArguments, Trainer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import EarlyStoppingCallback
from transformers.integrations import AzureMLCallback
from transformers import AutoTokenizer, DataCollatorWithPadding
from torchsummary import summary

sys.path.append(os.path.join(os.path.join(os.getcwd(), ".."), 'project'))
from train_transformer import get_model, adjust_tokenizer, compute_metrics, get_encode_labels, tokenize_function, generate_tokenized_dataset, get_datasets, test_model
from utils import *
# from utils import get_valid_runs, get_highest_performing_model, get_dataset


In [None]:
from azureml.core import Run
import azureml
import mlflow
from azureml.core import Workspace, Dataset, Environment

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)
print("MLflow version:", mlflow.version.VERSION)


In [None]:
ws = Workspace.from_config()
# mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')


In [None]:
from azureml.core import Experiment

script_folder = './project'
os.makedirs(script_folder, exist_ok=True)

exp = Experiment(workspace=ws, name='transformer_hp')
# mlflow.set_experiment('transformer_hp')

In [None]:
dir = 'output_120'
model_directory = f'{dir}/outputs/model'
print(f'the output path: [{model_directory}]')

model = AutoModelForSequenceClassification.from_pretrained(model_directory, num_labels=121)
tokenizer = AutoTokenizer.from_pretrained(model_directory)
le=joblib.load(model_directory + '/labelEncoder.joblib')
print('Model objects and their dependencies are loaded')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
model.zero_grad()
print(device)

In [None]:
counter = 0
parameter_target = None
for name, parameter in model.named_parameters():
    if name == 'bert.encoder.layer.2.attention.self.query.weight':
        parameter_target = parameter
        print(name)
        counter += 1
    # print(parameter)

counter

In [None]:
model_base = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=121)

counter = 0
parameter_target_base = None
for name, parameter in model_base.named_parameters():
    if name == 'bert.encoder.layer.2.attention.self.query.weight':
        parameter_target_base = parameter
        print(name)
        counter += 1
    # print(parameter)

counter

In [None]:
parameter_target.to(torch.device('cpu')) == parameter_target_base.to(torch.device('cpu'))


In [None]:
model_base_2 = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=121)

counter = 0
parameter_target_base_2 = None
for name, parameter in model_base_2.named_parameters():
    if name == 'bert.encoder.layer.2.attention.self.query.weight':
        parameter_target_base_2 = parameter
        print(name)
        counter += 1
    # print(parameter)

counter

In [None]:
parameter_target.to(torch.device('cpu')) == parameter_target_base_2.to(torch.device('cpu'))


In [None]:
parameter_target.to(torch.device('cpu')) == parameter_target_base_2.to(torch.device('cpu'))


In [None]:
model_base = AutoModelForSequenceClassification.from_pretrained('bert-base-cased', num_labels=12)

li_shapes = []
counter = 0
parameter_target_base = None
for name, parameter in model_base.named_parameters():
    x = parameter.shape[0]
    y = parameter.shape[0]
    li_shapes.append(x * y)
    # if name == 'bert.encoder.layer.2.attention.self.query.weight':
    #     parameter_target_base = parameter
    #     print(name)
    #     counter += 1
    # print(parameter)

counter

In [None]:
sum(li_shapes)
