In [None]:
!pip install sentence-transformers transformers datasets

In [None]:
import numpy as np
import pandas as pd
import math
import itertools
import random
import torch
import os
import gzip
import json
from tqdm import tqdm
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error
from sentence_transformers import SentenceTransformer, util, losses, models
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from transformers import AutoModelForMaskedLM, DataCollatorForWholeWordMask, DataCollatorForLanguageModeling, pipeline
from transformers import AdamW, get_linear_schedule_with_warmup, TrainerCallback
from sklearn.model_selection import StratifiedKFold
import shutil
from datasets import load_metric
import gc
gc.enable()
from sklearn.svm import SVR, LinearSVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso, BayesianRidge, Perceptron, SGDRegressor

In [None]:
df=pd.read_csv("/content/common_lit_data.csv",encoding="cp1252")

In [None]:
df.head()

Unnamed: 0,Title,Excerpt,BT Easiness,Kaggle split
0,Patty's Suitors,When the young people returned to the ballroom...,-0.340259,Train
1,Two Little Women on a Holiday,"All through dinner time, Mrs. Fayre was somewh...",-0.315372,Train
2,Patty Blossom,"As Roger had predicted, the snow departed as q...",-0.580118,Train
3,THE WATER-BABIES\nA Fairy Tale for a Land-Baby,Mr. Grimes was to come up next morning to Sir ...,-1.785965,Test
4,HOW THE ARGONAUTS WERE DRIVEN INTO THE UNKNOWN...,And outside before the palace a great garden w...,-1.054013,Train


In [None]:
df.columns

Index(['Title', 'Excerpt', 'BT Easiness', 'Kaggle split'], dtype='object')

In [None]:
def prepare_dataset(df_org,n_quantiles,SEED=10):
    df=df_org.copy()
    _, bins = pd.qcut(df["BT Easiness"],n_quantiles,retbins=True)
    bins[len(bins)-1]=np.Inf
    df['target_bins'] = pd.cut(df["BT Easiness"], bins, labels=np.arange(0,len(bins)-1),right=False)
    df=df[['Excerpt','BT Easiness','target_bins']].sort_values(['target_bins','BT Easiness'])
    df['target_bins']=df['target_bins'].map({0:"Advanced",1:"Intermediate",2:"Elementary"})
    df['target_bins']=df['target_bins'].map({"Advanced":2,"Intermediate":1,"Elementary":0})
    df=df.rename(columns={'Excerpt':'text','target_bins':'label',"BT Easiness":"target_score"})
    df.reset_index(drop=True,inplace=True)
    #df_1=df.iloc[0:1000]
    #df_2=df.iloc[1800:2800]
    #df_3=df.iloc[3500:4500]

    #df_4=df.iloc[1000:1200]
    #df_5=df.iloc[1600:1850]
    #df_6=df.iloc[3300:2550]

    #df_train=pd.concat([df_1,df_2,df_3],ignore_index=True)
    #df_val=pd.concat([df_4,df_5,df_6],ignore_index=True)

    #df_train=df_train.sample(frac=1,random_state=SEED)
    #df_val=df_val.sample(frac=1,random_state=SEED)

    cols_to_select=["text","target_score","label"]
    
    return df[cols_to_select]

In [None]:
X=prepare_dataset(df,3)

In [None]:
X.head()

Unnamed: 0,text,target_score,label
0,"The commutator is peculiar, consisting of only...",-3.676268,2
1,The Dunwich horror itself came between Lammas ...,-3.66836,2
2,"The iron cylinder weighs 23 kilogrammes; but, ...",-3.642892,2
3,As to surface-slope its measurement—from nearl...,-3.639936,2
4,"The tree is dioecious, bearing male catkins on...",-3.636834,2


In [None]:
X.shape

(4724, 3)

In [None]:
X['label'].value_counts()

2    1575
0    1575
1    1574
Name: label, dtype: int64

In [None]:
    
def get_folds(X,n_splits,SEED):
    
    X['fold'] = -1

    skf = StratifiedKFold(n_splits=n_splits, random_state=SEED, shuffle=True)
    gen_skf = skf.split(X.text, y=X.label)

    for fold, (idx_train, idx_val) in enumerate(gen_skf):
        X.loc[idx_val, 'fold'] = fold
    
    X['fold'] = X['fold'].astype('int8')
    return X

In [None]:
X=get_folds(X,5,12)

In [None]:
X.head()

Unnamed: 0,text,target_score,label,fold
0,"The commutator is peculiar, consisting of only...",-3.676268,2,1
1,The Dunwich horror itself came between Lammas ...,-3.66836,2,3
2,"The iron cylinder weighs 23 kilogrammes; but, ...",-3.642892,2,1
3,As to surface-slope its measurement—from nearl...,-3.639936,2,4
4,"The tree is dioecious, bearing male catkins on...",-3.636834,2,1


In [None]:
def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 28
seed_everything(seed=SEED)
MAX_LENGTH = 350

In [None]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [None]:
def predict_fast(model_name=None, data=None, init_model=None, tokenizer=None, num_labels=3, 
                 is_multilabel=False, output_logits=False, use_softmax=False):
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  tokenizer = AutoTokenizer.from_pretrained(model_name) if model_name else tokenizer
  config = AutoConfig.from_pretrained(model_name, num_labels=num_labels) if model_name else None
  model = AutoModelForSequenceClassification.from_pretrained(model_name, config=config) if model_name else init_model
  model.to(device)
  model.eval()
  y_pred = []
  batches = chunks(data, 32)
  for batch in tqdm(batches):
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=MAX_LENGTH)
    input_ids = inputs['input_ids'].to(device)
    attention = inputs['attention_mask'].to(device)
    inputs = {
        'input_ids': input_ids,
        'attention_mask': attention
    }
    with torch.no_grad():        
          outputs = model(**inputs)
    if not use_softmax:
      logits = outputs[0].detach().cpu().argmax(-1).squeeze().numpy().tolist() ##multi class
    else:
      logits = nn.functional.softmax(outputs.logits, dim=-1).detach().cpu().numpy().squeeze().tolist()
    if is_multilabel and not output_logits:
    
      logits = np.argmax(logits, axis=-1)
    y_pred.extend(logits)
  del model
  gc.collect()
  return y_pred

In [None]:
def train_model(
        model_dir,
        out_dir,
        data,
        data_labels,
        test_data=None,
        test_labels=None,
        do_eval=False,
        do_epoch_eval=False,
        do_save_best=False,
        hyperparams={'bs': 16, 'lr': 1e-4, 'ep': 5, 'bias': False, 'init': None},
        cfg={'num_labels': 1, 'logging_steps': 500, 'is_multilabel': False, 'keep_layers': None}
        ):
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    
    train_encodings = tokenizer(data, truncation=True, padding=True, max_length=MAX_LENGTH)
    if test_data:
        test_encodings = tokenizer(test_data, truncation=True, padding=True, max_length=MAX_LENGTH)
    

    class LitDataset(torch.utils.data.Dataset):
        def __init__(self, encodings, labels):
            self.encodings = encodings
            self.labels = labels

        def __getitem__(self, idx):
            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
            item['labels'] = torch.tensor(self.labels[idx])
            return item

        def __len__(self):
            return len(self.labels)

    train_dataset = LitDataset(train_encodings, data_labels)
    if test_data:
        test_dataset = LitDataset(test_encodings, test_labels)
    
    train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=hyperparams['bs'])
    training_steps = len(train_dataloader) * hyperparams['ep'] 
    warmup_steps = math.ceil(training_steps  * 0.06)

    training_args = TrainingArguments(
        output_dir=out_dir,          # output directory
        num_train_epochs=hyperparams['ep'],              # total number of training epochs
        per_device_train_batch_size=hyperparams['bs'],  # batch size per device during training
        per_device_eval_batch_size=1,   # batch size for evaluationing rate scheduler
        logging_dir='/tmp/logs',            # directory for storing logs
        logging_steps=cfg['logging_steps'],
        seed=SEED,
        weight_decay=hyperparams['weight_decay'],
        learning_rate=hyperparams['lr'],
        save_strategy='no'
    )
    config = AutoConfig.from_pretrained(
        model_dir,
        num_labels=cfg['num_labels'],
        hidden_dropout_prob=hyperparams['hidden_dropout'],
        attention_probs_dropout_prob=hyperparams['attention_probs_dropout'])
    model = AutoModelForSequenceClassification.from_pretrained(model_dir, num_labels=cfg['num_labels'])
 #   if hyperparams['init']:
 #       model = reinitialize_layers(model, hyperparams['init'])
    model.config = AutoConfig.from_pretrained(model_dir, num_labels=cfg['num_labels'])
    model.num_labels = cfg['num_labels']
    if cfg['keep_layers']:
        new_layers = torch.nn.ModuleList([layer_module for i, layer_module in enumerate(model.base_model.encoder.layer) 
                                                                                            if i in cfg['keep_layers']])
        model.base_model.encoder.layer = new_layers
        model.config.num_hidden_layers = len(cfg['keep_layers'])

    optimizer = AdamW(model.parameters(), correct_bias=hyperparams['bias'], lr=hyperparams['lr'])
    scheduler = get_linear_schedule_with_warmup(optimizer=optimizer, num_training_steps=training_steps, 
                                                num_warmup_steps=warmup_steps)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    scores = []
    best_score =1.0
    metric = load_metric("accuracy")

    class EvalCallback(TrainerCallback):
        def on_log(self, args, state, control, **kwargs):
            if do_save_best:
                model = kwargs['model']
                y_pred = predict_fast(init_model=model, tokenizer=tokenizer, data=test_data, 
                                    num_labels=cfg['num_labels'], is_multilabel=cfg['is_multilabel'])
                model.train()
                #curr_score = rms(test_labels, y_pred) if not cfg['is_multilabel'] 
                                        #else metric.compute(predictions=y_pred, references=test_labels)['accuracy']
                curr_score = metric.compute(predictions=y_pred, references=test_labels)['accuracy']
                print('Accuracy: ', curr_score)

                if len(scores) == 0 or curr_score > max(scores):
                    print(f'max accuaracy is {curr_score} ')
                    best_score = curr_score
                    save_dir = os.path.join(out_dir, 'best')
                    model.save_pretrained(save_dir)
                    tokenizer.save_pretrained(save_dir)
                    with open(os.path.join(save_dir, 'hyperparams.txt'), 'w') as f:
                        hyperparams['score'] = curr_score
                        hyperparams['step'] = state.global_step
                        hyperparams['trainset_size'] = len(data_labels)
                        f.write(json.dumps(hyperparams))
                scores.append(curr_score)

    trainer = Trainer(
        model=model,                         # the instantiated 🤗 Transformers model to be trained
        args=training_args,                  # training arguments, defined above
        train_dataset=train_dataset,         # training dataset
        optimizers=(optimizer, scheduler),
        callbacks=[EvalCallback]             # evaluation dataset
    )

    trainer.train()

    if not do_save_best:
        model.save_pretrained(out_dir)
        tokenizer.save_pretrained(out_dir)
    print('Training done')

    if do_save_best:
        del model
        gc.collect()
        #return min(scores)
        return max(scores)

In [None]:
train_tx=X.loc[X["fold"]!=1]["text"].values.tolist()
val_tx=X.loc[X["fold"]==1]["text"].values.tolist()

In [None]:
train_tx[1]

'As to surface-slope its measurement—from nearly 600 trials—was found to be such a delicate operation that the result would be of doubtful utility. This would affect the application of all formulas into which it entered. The water surface was ascertained, on the average of its oscillations, to be sensibly level across, not convex, as supposed by some writers. There were 565 sets of vertical velocity measurements combined into forty-six series. The forty-six average curves were all very flat and convex downstream—except near an irregular bank—and were approximately parabolas with horizontal axes; the data determined the parameters only very roughly; the maximum velocity line was usually below the service, and sank in a rectangular channel, from the center outward down to about mid-depth near the banks. Its depression seemed not to depend on the depth, slope, velocity, or wind; probably the air itself, being a continuous source of surface retardation, would permanently depress the maximu

In [None]:
train_sc=X.loc[X["fold"]!=1]["label"].values
val_sc=X.loc[X["fold"]==1]["label"].values

In [None]:
train_sc[1]

2

In [None]:
# Train an deberta model
BASE_PATH = '/content'
out_dir = os.path.join(BASE_PATH, 'output_model/')

model_name = 'microsoft/deberta-v3-base'
# Train a DEBERTA model
#model_name = 'microsoft/deberta-large'
hyperparams = {
  'bs': 3,
  'lr': 9e-6,
  'weight_decay': 0.1,
  'ep': 4,
  'bias': True,
  'init': None,
  'hidden_dropout': 0.1,
  'attention_probs_dropout': 0.1
}
cfg = {
  'num_labels': 3,
  'is_multilabel': False,
  'logging_steps': 20,
  'keep_layers': None,
  'soft_labels': None
}

DEBERTA_PRETRAINED = os.path.join(BASE_PATH, 'models/deberta-v3-base')

#train_tx=X.loc[X["fold"]!=1]["text"].values.tolist()
#val_tx=X.loc[X["fold"]==1]["text"].values.tolist()

train_model(
    model_dir=model_name,
    out_dir=out_dir,
    data=train_tx,
    data_labels=train_sc,
    test_data=val_tx,
    test_labels=val_sc,
    do_save_best=True,
    hyperparams=hyperparams,
    cfg=cfg
)

loading configuration file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e6f9db57345f0f60c9f837fa97bcb27b1ed31e99feb33d732d7d8c80cb8f8459.de97182a9f32a68819030ba8f3f6ff2ba47276be3864425925523202f54cc79c
Model config DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_

Step,Training Loss
20,1.1245
40,1.0857
60,1.1018
80,1.0947
100,1.1069
120,1.1004
140,1.118
160,1.0973
180,1.0894
200,1.0885


30it [00:18,  1.62it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.3333333333333333
max accuaracy is 0.3333333333333333 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:18,  1.58it/s]


Accuracy:  0.3333333333333333


30it [00:19,  1.57it/s]


Accuracy:  0.3333333333333333


30it [00:19,  1.54it/s]


Accuracy:  0.3333333333333333


30it [00:19,  1.51it/s]


Accuracy:  0.3333333333333333


30it [00:19,  1.51it/s]


Accuracy:  0.3333333333333333


30it [00:20,  1.49it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.3343915343915344
max accuaracy is 0.3343915343915344 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.49it/s]


Accuracy:  0.3333333333333333


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.362962962962963
max accuaracy is 0.362962962962963 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.48it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.382010582010582
max accuaracy is 0.382010582010582 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.4804232804232804
max accuaracy is 0.4804232804232804 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.5058201058201058
max accuaracy is 0.5058201058201058 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.5957671957671957
max accuaracy is 0.5957671957671957 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.5851851851851851


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6074074074074074
max accuaracy is 0.6074074074074074 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.48it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6137566137566137
max accuaracy is 0.6137566137566137 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.5830687830687831


30it [00:20,  1.47it/s]


Accuracy:  0.5873015873015873


30it [00:20,  1.48it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6571428571428571
max accuaracy is 0.6571428571428571 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6412698412698413


30it [00:20,  1.46it/s]


Accuracy:  0.6211640211640211


30it [00:20,  1.48it/s]


Accuracy:  0.6423280423280423


30it [00:20,  1.48it/s]


Accuracy:  0.6444444444444445


30it [00:20,  1.47it/s]


Accuracy:  0.564021164021164


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6603174603174603
max accuaracy is 0.6603174603174603 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6433862433862434


30it [00:20,  1.47it/s]


Accuracy:  0.5830687830687831


30it [00:20,  1.48it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6708994708994709
max accuaracy is 0.6708994708994709 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6772486772486772
max accuaracy is 0.6772486772486772 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.46it/s]


Accuracy:  0.6412698412698413


30it [00:20,  1.47it/s]


Accuracy:  0.6158730158730159


30it [00:20,  1.48it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6857142857142857
max accuaracy is 0.6857142857142857 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6243386243386243


30it [00:20,  1.46it/s]


Accuracy:  0.6783068783068783


30it [00:20,  1.48it/s]


Accuracy:  0.6433862433862434


30it [00:20,  1.48it/s]


Accuracy:  0.6730158730158731


30it [00:20,  1.47it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.6973544973544974
max accuaracy is 0.6973544973544974 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6518518518518519


30it [00:20,  1.47it/s]


Accuracy:  0.5968253968253968


30it [00:20,  1.47it/s]


Accuracy:  0.6444444444444445


30it [00:20,  1.47it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6656084656084656


30it [00:20,  1.47it/s]


Accuracy:  0.6793650793650794


30it [00:20,  1.47it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.7005291005291006
max accuaracy is 0.7005291005291006 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6465608465608466


30it [00:20,  1.46it/s]


Accuracy:  0.6656084656084656


30it [00:20,  1.48it/s]


Accuracy:  0.6666666666666666


30it [00:20,  1.47it/s]


Accuracy:  0.6433862433862434


30it [00:20,  1.47it/s]


Accuracy:  0.6888888888888889


30it [00:20,  1.46it/s]


Accuracy:  0.6264550264550265


30it [00:20,  1.47it/s]


Accuracy:  0.638095238095238


30it [00:20,  1.47it/s]


Accuracy:  0.7005291005291006


30it [00:20,  1.47it/s]


Accuracy:  0.6105820105820106


30it [00:20,  1.47it/s]


Accuracy:  0.6455026455026455


30it [00:20,  1.47it/s]


Accuracy:  0.6317460317460317


30it [00:20,  1.47it/s]


Accuracy:  0.6497354497354497


30it [00:20,  1.47it/s]


Accuracy:  0.5714285714285714


30it [00:20,  1.47it/s]


Accuracy:  0.6486772486772486


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]


Accuracy:  0.6857142857142857


30it [00:20,  1.47it/s]


Accuracy:  0.6867724867724868


30it [00:20,  1.47it/s]


Accuracy:  0.6772486772486772


30it [00:20,  1.47it/s]


Accuracy:  0.692063492063492


30it [00:20,  1.47it/s]


Accuracy:  0.691005291005291


30it [00:20,  1.46it/s]


Accuracy:  0.6804232804232804


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.7206349206349206
max accuaracy is 0.7206349206349206 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.692063492063492


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.48it/s]


Accuracy:  0.6116402116402117


30it [00:20,  1.48it/s]


Accuracy:  0.692063492063492


30it [00:20,  1.47it/s]


Accuracy:  0.6571428571428571


30it [00:20,  1.46it/s]


Accuracy:  0.6275132275132275


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.48it/s]


Accuracy:  0.7037037037037037


30it [00:20,  1.48it/s]


Accuracy:  0.7153439153439154


30it [00:20,  1.47it/s]


Accuracy:  0.671957671957672


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.6772486772486772


30it [00:20,  1.47it/s]


Accuracy:  0.6455026455026455


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.47it/s]


Accuracy:  0.7174603174603175


30it [00:20,  1.47it/s]


Accuracy:  0.635978835978836


30it [00:20,  1.47it/s]


Accuracy:  0.6888888888888889


30it [00:20,  1.47it/s]


Accuracy:  0.6465608465608466


30it [00:20,  1.47it/s]


Accuracy:  0.7164021164021164


30it [00:20,  1.47it/s]


Accuracy:  0.6624338624338625


30it [00:20,  1.47it/s]


Accuracy:  0.6730158730158731


30it [00:20,  1.47it/s]


Accuracy:  0.7047619047619048


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.725925925925926
max accuaracy is 0.725925925925926 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.5682539682539682


30it [00:20,  1.46it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.7291005291005291
max accuaracy is 0.7291005291005291 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.7248677248677249


30it [00:20,  1.46it/s]


Accuracy:  0.6783068783068783


30it [00:20,  1.48it/s]


Accuracy:  0.6761904761904762


30it [00:20,  1.48it/s]


Accuracy:  0.691005291005291


30it [00:20,  1.47it/s]


Accuracy:  0.6984126984126984


30it [00:20,  1.47it/s]


Accuracy:  0.725925925925926


30it [00:20,  1.47it/s]


Accuracy:  0.71005291005291


30it [00:20,  1.47it/s]


Accuracy:  0.725925925925926


30it [00:20,  1.47it/s]


Accuracy:  0.7164021164021164


30it [00:20,  1.47it/s]


Accuracy:  0.7164021164021164


30it [00:20,  1.47it/s]


Accuracy:  0.7111111111111111


30it [00:20,  1.47it/s]


Accuracy:  0.6973544973544974


30it [00:20,  1.47it/s]


Accuracy:  0.6603174603174603


30it [00:20,  1.47it/s]


Accuracy:  0.728042328042328


30it [00:20,  1.47it/s]


Accuracy:  0.635978835978836


30it [00:20,  1.47it/s]


Accuracy:  0.7185185185185186


30it [00:20,  1.47it/s]


Accuracy:  0.6211640211640211


30it [00:20,  1.47it/s]


Accuracy:  0.7195767195767195


30it [00:20,  1.47it/s]


Accuracy:  0.6582010582010582


30it [00:20,  1.47it/s]


Accuracy:  0.6984126984126984


30it [00:20,  1.47it/s]


Accuracy:  0.692063492063492


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]


Accuracy:  0.725925925925926


30it [00:20,  1.47it/s]


Accuracy:  0.6793650793650794


30it [00:20,  1.47it/s]


Accuracy:  0.7111111111111111


30it [00:20,  1.47it/s]


Accuracy:  0.7174603174603175


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.7407407407407407
max accuaracy is 0.7407407407407407 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6952380952380952


30it [00:20,  1.47it/s]


Accuracy:  0.7386243386243386


30it [00:20,  1.48it/s]


Accuracy:  0.6455026455026455


30it [00:20,  1.47it/s]


Accuracy:  0.6814814814814815


30it [00:20,  1.46it/s]


Accuracy:  0.71005291005291


30it [00:20,  1.47it/s]


Accuracy:  0.6444444444444445


30it [00:20,  1.47it/s]


Accuracy:  0.6888888888888889


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]


Accuracy:  0.7238095238095238


30it [00:20,  1.47it/s]


Accuracy:  0.6571428571428571


30it [00:20,  1.47it/s]


Accuracy:  0.6074074074074074


30it [00:20,  1.47it/s]


Accuracy:  0.6793650793650794


30it [00:20,  1.47it/s]


Accuracy:  0.6328042328042328


30it [00:20,  1.47it/s]


Accuracy:  0.6507936507936508


30it [00:20,  1.47it/s]


Accuracy:  0.7142857142857143


30it [00:20,  1.47it/s]


Accuracy:  0.7174603174603175


30it [00:20,  1.47it/s]


Accuracy:  0.7037037037037037


30it [00:20,  1.47it/s]


Accuracy:  0.6571428571428571


30it [00:20,  1.47it/s]


Accuracy:  0.6814814814814815


30it [00:20,  1.47it/s]


Accuracy:  0.6201058201058202


30it [00:20,  1.47it/s]


Accuracy:  0.7322751322751323


30it [00:20,  1.47it/s]


Accuracy:  0.6529100529100529


30it [00:20,  1.48it/s]


Accuracy:  0.6666666666666666


30it [00:20,  1.47it/s]


Accuracy:  0.6867724867724868


30it [00:20,  1.47it/s]


Accuracy:  0.6370370370370371


30it [00:20,  1.47it/s]


Accuracy:  0.6433862433862434


30it [00:20,  1.47it/s]


Accuracy:  0.6592592592592592


30it [00:20,  1.47it/s]


Accuracy:  0.6867724867724868


30it [00:20,  1.47it/s]


Accuracy:  0.6835978835978836


30it [00:20,  1.47it/s]


Accuracy:  0.7216931216931217


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.6793650793650794


30it [00:20,  1.47it/s]


Accuracy:  0.6603174603174603


30it [00:20,  1.47it/s]


Accuracy:  0.6529100529100529


30it [00:20,  1.47it/s]


Accuracy:  0.6962962962962963


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.671957671957672


30it [00:20,  1.47it/s]


Accuracy:  0.6835978835978836


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.48it/s]


Accuracy:  0.7037037037037037


30it [00:20,  1.47it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6984126984126984


30it [00:20,  1.47it/s]


Accuracy:  0.7185185185185186


30it [00:20,  1.47it/s]


Accuracy:  0.6751322751322751


30it [00:20,  1.48it/s]


Accuracy:  0.6666666666666666


30it [00:20,  1.47it/s]


Accuracy:  0.691005291005291


30it [00:20,  1.47it/s]


Accuracy:  0.7248677248677249


30it [00:20,  1.47it/s]


Accuracy:  0.7185185185185186


30it [00:20,  1.47it/s]


Accuracy:  0.6317460317460317


30it [00:20,  1.47it/s]
Configuration saved in /content/output_model/best/config.json


Accuracy:  0.7439153439153439
max accuaracy is 0.7439153439153439 


Model weights saved in /content/output_model/best/pytorch_model.bin
tokenizer config file saved in /content/output_model/best/tokenizer_config.json
Special tokens file saved in /content/output_model/best/special_tokens_map.json
30it [00:20,  1.47it/s]


Accuracy:  0.6751322751322751


30it [00:20,  1.47it/s]


Accuracy:  0.728042328042328


30it [00:20,  1.48it/s]


Accuracy:  0.6994708994708995


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.47it/s]


Accuracy:  0.6878306878306878


30it [00:20,  1.47it/s]


Accuracy:  0.6603174603174603


30it [00:20,  1.47it/s]


Accuracy:  0.6867724867724868


30it [00:20,  1.47it/s]


Accuracy:  0.6962962962962963


30it [00:20,  1.48it/s]


Accuracy:  0.6253968253968254


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6835978835978836


30it [00:20,  1.47it/s]


Accuracy:  0.6201058201058202


30it [00:20,  1.47it/s]


Accuracy:  0.6285714285714286


30it [00:20,  1.47it/s]


Accuracy:  0.6645502645502646


30it [00:20,  1.47it/s]


Accuracy:  0.6603174603174603


30it [00:20,  1.47it/s]


Accuracy:  0.6666666666666666


30it [00:20,  1.47it/s]


Accuracy:  0.6973544973544974


30it [00:20,  1.47it/s]


Accuracy:  0.6666666666666666


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]


Accuracy:  0.7005291005291006


30it [00:20,  1.47it/s]


Accuracy:  0.6814814814814815


30it [00:20,  1.46it/s]


Accuracy:  0.6772486772486772


30it [00:20,  1.47it/s]


Accuracy:  0.638095238095238


30it [00:20,  1.47it/s]


Accuracy:  0.6476190476190476


30it [00:20,  1.47it/s]


Accuracy:  0.6624338624338625


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.47it/s]


Accuracy:  0.6730158730158731


30it [00:20,  1.47it/s]


Accuracy:  0.6835978835978836


30it [00:20,  1.47it/s]


Accuracy:  0.6825396825396826


30it [00:20,  1.46it/s]


Accuracy:  0.6571428571428571


30it [00:20,  1.47it/s]


Accuracy:  0.6804232804232804


30it [00:20,  1.47it/s]


Accuracy:  0.671957671957672


30it [00:20,  1.47it/s]


Accuracy:  0.671957671957672


30it [00:20,  1.46it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.47it/s]


Accuracy:  0.6804232804232804


30it [00:20,  1.47it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6476190476190476


30it [00:20,  1.47it/s]


Accuracy:  0.6846560846560846


30it [00:20,  1.47it/s]


Accuracy:  0.6962962962962963


30it [00:20,  1.47it/s]


Accuracy:  0.6687830687830688


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.47it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6656084656084656


30it [00:20,  1.47it/s]


Accuracy:  0.6761904761904762


30it [00:20,  1.47it/s]


Accuracy:  0.6772486772486772


30it [00:20,  1.47it/s]


Accuracy:  0.674074074074074


30it [00:20,  1.47it/s]


Accuracy:  0.6603174603174603


30it [00:20,  1.47it/s]


Accuracy:  0.6253968253968254


30it [00:20,  1.47it/s]


Accuracy:  0.6486772486772486


30it [00:20,  1.47it/s]


Accuracy:  0.692063492063492


30it [00:20,  1.47it/s]


Accuracy:  0.7058201058201058


30it [00:20,  1.47it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.47it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6656084656084656


30it [00:20,  1.47it/s]


Accuracy:  0.6645502645502646


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.6613756613756614


30it [00:20,  1.47it/s]


Accuracy:  0.6571428571428571


30it [00:20,  1.47it/s]


Accuracy:  0.671957671957672


30it [00:20,  1.47it/s]


Accuracy:  0.6931216931216931


30it [00:20,  1.47it/s]


Accuracy:  0.6730158730158731


30it [00:20,  1.48it/s]


Accuracy:  0.6582010582010582


30it [00:20,  1.48it/s]


Accuracy:  0.6465608465608466


30it [00:20,  1.48it/s]


Accuracy:  0.6592592592592592


30it [00:20,  1.47it/s]


Accuracy:  0.6698412698412698


30it [00:20,  1.47it/s]


Accuracy:  0.6867724867724868


30it [00:20,  1.47it/s]


Accuracy:  0.6867724867724868


30it [00:20,  1.47it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.48it/s]


Accuracy:  0.6634920634920635


30it [00:20,  1.47it/s]


Accuracy:  0.6708994708994709


30it [00:20,  1.47it/s]


Accuracy:  0.6624338624338625


30it [00:20,  1.47it/s]


Accuracy:  0.6592592592592592


30it [00:20,  1.47it/s]


Accuracy:  0.6582010582010582


30it [00:20,  1.47it/s]


Accuracy:  0.6582010582010582


30it [00:20,  1.47it/s]


Accuracy:  0.6613756613756614


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.6698412698412698


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Training completed. Do not forget to share your model on huggingface.co/models =)




Accuracy:  0.6677248677248677


30it [00:20,  1.47it/s]


Accuracy:  0.6677248677248677
Training done


0.7439153439153439

In [None]:
!zip -r output_model.zip output_model

  adding: output_model/ (stored 0%)
  adding: output_model/best/ (stored 0%)
  adding: output_model/best/tokenizer.json (deflated 77%)
  adding: output_model/best/hyperparams.txt (deflated 33%)
  adding: output_model/best/spm.model (deflated 50%)
  adding: output_model/best/tokenizer_config.json (deflated 45%)
  adding: output_model/best/pytorch_model.bin (deflated 19%)
  adding: output_model/best/special_tokens_map.json (deflated 54%)
  adding: output_model/best/config.json (deflated 54%)
  adding: output_model/best/added_tokens.json (stored 0%)


In [None]:
#https://github.com/mathislucka/kaggle_clrp_1st_place_solution/blob/main/notebooks/04_clrp_training.ipynb

In [None]:
from torch.quantization import quantize_dynamic
model_ckpt = "/content/output_model/best"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
model_quantized = quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8)

loading file /content/output_model/best/spm.model
loading file /content/output_model/best/tokenizer.json
loading file /content/output_model/best/added_tokens.json
loading file /content/output_model/best/special_tokens_map.json
loading file /content/output_model/best/tokenizer_config.json
loading configuration file /content/output_model/best/config.json
Model config DebertaV2Config {
  "_name_or_path": "/content/output_model/best",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention

In [None]:
save_dir = os.path.join('/content', 'quantized')
#model_quantized.save_pretrained(save_dir)
torch.save(model_quantized.state_dict(), "/content/quantized/quantized_model.bin")

In [None]:
text="Temporality is obviously an organised structure, and these three so-called elements of time: past, present, future, must not be envisaged as a collection of 'data' to be added together...but as the structured moments of an original synthesis. Otherwise we shall immediately meet with this paradox: the past is no longer, the future is not yet, as for the instantaneous present, everyone knows that it is not at all: it is the limit of infinite division, like the dimensionless point."

In [None]:
tokenizer = AutoTokenizer.from_pretrained('/content/output_model/best')

loading file /content/output_model/best/spm.model
loading file /content/output_model/best/tokenizer.json
loading file /content/output_model/best/added_tokens.json
loading file /content/output_model/best/special_tokens_map.json
loading file /content/output_model/best/tokenizer_config.json


In [None]:
inputs=tokenizer([text], return_tensors="pt")

In [None]:
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_quantized=model_quantized.to("cpu")
inputs=inputs.to("cpu")
model_quantized(**inputs)
#predictions = outputs.logits.argmax(-1)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.6772,  2.1413, -1.1826]]), hidden_states=None, attentions=None)

In [None]:
#NotImplementedError: Could not run 'quantized::linear_dynamic' with arguments from the 'CUDA' backend. 
#This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom 
#build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, 
#please visit https://fburl.com/ptmfixes

In [None]:
model_quantized(**inputs).logits.argmax(-1).detach().numpy()[0]

1

In [None]:
!zip -r quantized_model.zip quantized

  adding: quantized/ (stored 0%)
  adding: quantized/quantized_model.bin (deflated 33%)
  adding: quantized/config.json (deflated 54%)


In [None]:
model_ckpt1 = "/content/output_model/best"
model_ckpt2 = "/content/quantized"
tokenizer2 = AutoTokenizer.from_pretrained(model_ckpt)
model_q = AutoModelForSequenceClassification.from_pretrained(model_ckpt2)

#Error no file named pytorch_model.bin, 
#tf_model.h5, model.ckpt.index or 
#flax_model.msgpack found in directory /content/quantized.

loading file /content/output_model/best/spm.model
loading file /content/output_model/best/tokenizer.json
loading file /content/output_model/best/added_tokens.json
loading file /content/output_model/best/special_tokens_map.json
loading file /content/output_model/best/tokenizer_config.json
loading configuration file /content/quantized/config.json
Model config DebertaV2Config {
  "_name_or_path": "/content/quantized",
  "architectures": [
    "DebertaV2ForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "

In [None]:
model_ckpt="/content/drive/MyDrive/PreviouslyStored/text_complexity_best_model"
tokenizer2 = AutoTokenizer.from_pretrained(model_ckpt)
model_q = AutoModelForSequenceClassification.from_pretrained(model_ckpt)

Some weights of the model checkpoint at /content/drive/MyDrive/PreviouslyStored/text_complexity_best_model were not used when initializing DebertaV2ForSequenceClassification: ['deberta.encoder.layer.7.attention.output.dense.scale', 'deberta.encoder.layer.9.output.dense._packed_params._packed_params', 'deberta.encoder.layer.2.attention.output.dense.zero_point', 'deberta.encoder.layer.0.attention.output.dense.zero_point', 'deberta.encoder.layer.6.output.dense.scale', 'deberta.encoder.layer.11.intermediate.dense._packed_params._packed_params', 'deberta.encoder.layer.6.attention.output.dense._packed_params.dtype', 'deberta.encoder.layer.7.attention.self.value_proj._packed_params.dtype', 'deberta.encoder.layer.11.output.dense._packed_params._packed_params', 'deberta.encoder.layer.7.intermediate.dense._packed_params.dtype', 'deberta.encoder.layer.9.attention.output.dense.scale', 'deberta.encoder.layer.5.attention.self.query_proj.scale', 'deberta.encoder.layer.2.attention.self.value_proj._pac

In [None]:
model_ckpt="/content/drive/MyDrive/PreviouslyStored/text_complexity_best_model"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt)

In [None]:
text2="Some of these people were scientists. They tried to find out which areas have more chances of having an earthquake. People from our village talked to them many times. "

In [None]:
inputs=tokenizer2([text], return_tensors="pt")

In [None]:
inputs=tokenizer2([text2], return_tensors="pt")

In [None]:
inputs2=tokenizer([text], return_tensors="pt")

In [None]:
model_q=model_q.to("cpu")
inputs=inputs.to("cpu")
model_q(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[0.1366, 0.0132, 0.1016]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
model=model.to("cpu")
inputs2=inputs.to("cpu")
model(**inputs2)

SequenceClassifierOutput(loss=None, logits=tensor([[-3.0663, -1.4858,  4.9078]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
model=model.to("cpu")
inputs=inputs.to("cpu")
model(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[ 4.1139, -1.7466, -3.4888]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
#right after training
model_q=model_q.to("cpu")
inputs=inputs.to("cpu")
model_q(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.0607, -0.0294, -0.0325]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
model_q(**inputs).logits.argmax(-1).detach().numpy()[0]

1

In [None]:

save_dir = os.path.join('/content/drive/MyDrive', 'quantized_complexity_model')
#/content/drive/MyDrive
#model_quantized.save_pretrained(save_dir)
torch.save(model_quantized.state_dict(), "/content/drive/MyDrive/quantized_complexity_model/pytorch_model.bin")