In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import sys
sys.path.append('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/src')

In [None]:
import classifier
import tester
import pandas as pd
import numpy as np
import csv
import sklearn
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

## Prepare the data

In [None]:
# prepare devdata
devdata_list = []
with open('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv')as f:
    f_csv = csv.reader(f)
    for row in f_csv:
        devdata_list.append(','.join(row))

devdata_df = pd.DataFrame(list(map(lambda x: x.split('\t'), devdata_list)), columns = ['polarity', 'aspect_category', 'target_term', 'character_offsets', 'sentence'])
devdata_df.head(2)

Unnamed: 0,polarity,aspect_category,target_term,character_offsets,sentence
0,positive,LOCATION#GENERAL,neighborhood,54:66,"great food, great wine list, great service in ..."
1,negative,RESTAURANT#GENERAL,place,15:20,I thought this place was totally overrated.


In [None]:
# prepare traindata
traindata_list = []
with open('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv')as f:
    f_csv = csv.reader(f)
    for row in f_csv:
        traindata_list.append(','.join(row))

traindata_df = pd.DataFrame(list(map(lambda x: x.split('\t'), traindata_list)), columns = ['polarity', 'aspect_category', 'target_term', 'character_offsets', 'sentence'])
traindata_df.head(2)

Unnamed: 0,polarity,aspect_category,target_term,character_offsets,sentence
0,positive,AMBIENCE#GENERAL,seating,18:25,short and sweet – seating is great:it's romant...
1,positive,AMBIENCE#GENERAL,trattoria,25:34,This quaint and romantic trattoria is at the t...


## Some useful variables

In [None]:
aspect_categories = list(set(traindata_df.aspect_category.to_list()))

In [None]:
len(aspect_categories)

12

In [None]:
devdata_df.polarity.value_counts(), traindata_df.polarity.value_counts()

(positive    264
 negative     98
 neutral      14
 Name: polarity, dtype: int64, positive    1055
 negative     390
 neutral       58
 Name: polarity, dtype: int64)

## Solve Package Version Problem

In [None]:
import sys
sys.version

'3.7.12 (default, Jan 15 2022, 18:48:18) \n[GCC 7.5.0]'

In [None]:
!pip install transformers --quiet
!pip install stanza --quiet

In [None]:
# change your version according to the .__version__ result
!pip uninstall nltk -y --quiet
!pip uninstall gensim -y --quiet
!pip install nltk==3.6.0 --quiet
!pip install gensim==4.1.2 --quiet

In [None]:
import nltk       # >= 3.6.0
import sklearn    # >=0.24.0
import pandas as pd     # >= 1.3.0
import gensim     # >= 4.1.2
import stanza     # == 1.3.0
import torch      # >= 1.10.0
import transformers # version 4.16.x.

'nltk version:' + nltk.__version__, \
'sklearn version:' + sklearn.__version__, \
'pandas version:' + pandas.__version__, \
'gensim version:' + gensim.__version__, \
'stanza version:' + stanza.__version__,\
'torch version:' + torch.__version__,\
'transformers version:' + transformers.__version__

('nltk version:3.6',
 'sklearn version:1.0.2',
 'pandas version:1.3.5',
 'gensim version:4.1.2',
 'stanza version:1.3.0',
 'torch version:1.10.0+cu111',
 'transformers version:4.17.0')

## Complete Classifier class

In [None]:
import transformers
from transformers import RobertaTokenizer, TFRobertaModel
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [None]:
class Classifier:
    """Copied from some random RoBerta tutorials and I don't understand much about how the sentiments were generated"""

    def __init__(self):
    # initialising the class and loading the BERT model from HuggingFace and giving max embeddings to get for each columns.
        # load BERT models
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli')
        self.bert_model = TFRobertaModel.from_pretrained('roberta-large-mnli')

        # parameters for getting embedding
        self.max_token_dict = {'asp_cat_emb':16, 'asp_term_emb':24, 'review_emb':50}
        self.src_column_dict = {'asp_cat_emb':'aspect_category', 'asp_term_emb':'aspect_term', 'review_emb':'review'}
        
        # loading variable encoder
        self.encoder = LabelEncoder()

        # model
        self.model = self.create_model()


    def train(self, trainfile, devfile=None):
        # loading
        print('loading the train file...')
        df = self.loadfile(trainfile)
        X = self.get_embeddings(df) # 自变量
        Y = df['sentiment'].values
        Y = self.encoder.fit_transform(np.array(Y).reshape(-1,1))
        
        # optimizer and scheduler
        print('optimizing and scheduling...')
        optim = tf.keras.optimizers.Adam(learning_rate=0.001)
        # rlrp = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
        
        # loss
        print('calculating loss...')
        self.model.compile(optimizer=optim, loss= 'sparse_categorical_crossentropy', metrics = ['accuracy'])
        
        # training
        print('training & fit...')
        temp_start = time.time()
        # self.model.fit(X, Y, epochs =50, callbacks=[rlrp], verbose = 0)
        self.model.fit(X, Y, epochs =50, verbose = 0)

        temp_end = time.time()
        print('It takes ', str(round((temp_end - temp_start), 3)), ' seconds to train')


    def predict(self, datafile):
        """Predicts class labels for the input instances in file 'datafile'
        Returns the list of predicted labels
        """
        # loading the test file
        df = self.loadfile(datafile)
        X = self.get_embeddings(df)
        
        # Predictions
        pred = self.model.predict(X)
        
        # Encoding to original class labels
        pred = np.argmax(pred, axis= 1)
        pred = self.encoder.inverse_transform(pred)
        
        return list(pred)


    def loadfile(self, csv_file):
        '''
        Load the files as pandas dataframe object
        '''
        columns = ['sentiment', 'aspect_category', 'aspect_term', 'slice', 'review']
        df = pd.read_csv(csv_file, sep='\t', names = columns, header = None)
        return df


    def get_embeddings(self, df, to_print=False):
        '''
        Load embeddings  from the RoBERTa Model
        '''
        emb_list = []
        for col, MAX_LENGTH in self.max_token_dict.items():
            str_inp = df[self.src_column_dict[col]].values
            inputs = self.tokenizer([str(i) for i in str_inp],
                                    max_length = MAX_LENGTH,
                                    pad_to_max_length = True,
                                    return_tensors="pt",
                                    truncation=True)
        
            inputs = [np.array(v) for _, v in inputs.items()]
            if to_print:
                print(inputs)

            out = self.bert_model.predict(inputs)
            emb_list.append(out[0])

        return np.concatenate(emb_list, axis =1)


    def create_model(self):
        '''
        Final Classifier NN model. 
        Takes embeddings as input and predicts the class encoded label.
        '''
        model= tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(2048))
        model.add(tf.keras.layers.Dense(512))
        model.add(tf.keras.layers.Dense(256))
        model.add(tf.keras.layers.Dense(128))

        model.add(tf.keras.layers.Flatten())

        model.add(tf.keras.layers.Dense(4000))
        model.add(tf.keras.layers.Dropout(0.2))
        
        model.add(tf.keras.layers.Dense(1250))
        model.add(tf.keras.layers.Dropout(0.2))
        
        model.add(tf.keras.layers.Dense(512))
        model.add(tf.keras.layers.Dropout(0.2))
        
        model.add(tf.keras.layers.Dense(256,activation= tf.nn.leaky_relu))
        model.add(tf.keras.layers.Dropout(0.2))
        
        model.add(tf.keras.layers.Dense(64)) #tf.nn.leaky_relu
        model.add(tf.keras.layers.Dropout(0.2))
        
        model.add(tf.keras.layers.Dense(16,activation= tf.nn.leaky_relu))
        model.add(tf.keras.layers.Dropout(0.1))
        
        model.add(tf.keras.layers.Dense(units=3, activation='softmax'))
        
        return model

In [None]:
import sys
sys.argv

['/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py',
 '-f',
 '/root/.local/share/jupyter/runtime/kernel-c7a1d6ac-2cd5-4365-a390-a4f07f15b2e8.json']

In [None]:
# test.py
import random as rn
import os
import time, sys
import numpy as np

def set_reproducible():
    # The below is necessary to have reproducible behavior.
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '0'
    # The below is necessary for starting Numpy generated random numbers
    # in a well-defined initial state.
    np.random.seed(17)
    # The below is necessary for starting core Python generated random numbers
    # in a well-defined state.
    rn.seed(12345)

def load_label_output(filename):
    with open(filename, 'r', encoding='UTF-8') as f:
        return [line.strip().split("\t")[0] for line in f if line.strip()]

def eval_list(glabels, slabels):
    if (len(glabels) != len(slabels)):
        print("\nWARNING: label count in system output (%d) is different from gold label count (%d)\n" % (
        len(slabels), len(glabels)))
    n = min(len(slabels), len(glabels))
    incorrect_count = 0
    for i in range(n):
        if slabels[i] != glabels[i]: incorrect_count += 1
    acc = (n - incorrect_count) / n
    return acc*100

def train_and_eval(classifier, trainfile, devfile, testfile, run_id):
    print(f"\nRUN: {run_id}")
    print("  %s.1. Training the classifier..." % str(run_id))
    # classifier.train(trainfile, devfile)
    classifier.train(trainfile, devfile)
    print()
    print("  %s.2. Eval on the dev set..." % str(run_id), end="")
    slabels = classifier.predict(devfile)
    glabels = load_label_output(devfile)
    devacc = eval_list(glabels, slabels)
    print(" Acc.: %.2f" % devacc)
    testacc = -1
    if testfile is not None:
        # Evaluation on the test data
        print("  %s.3. Eval on the test set..." % str(run_id), end="")
        slabels = classifier.predict(testfile)
        glabels = load_label_output(testfile)
        testacc = eval_list(glabels, slabels)
        print(" Acc.: %.2f" % testacc)
    print()
    return (devacc, testacc)

In [None]:
set_reproducible()
n_runs = 5
datadir = "../data/"
trainfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv'
devfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv'
testfile = None

# Runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier =  Classifier()
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
    devaccs.append(np.round(devacc, 2))
    testaccs.append(np.round(testacc, 2))
print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Test accs:", testaccs)
print()
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 1
  1.1. Training the classifier...
loading the train file...
optimizing and scheduling...
calculating loss...
training & fit...
It takes  43.988  seconds to train

  1.2. Eval on the dev set... Acc.: 77.66



Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 2
  2.1. Training the classifier...
loading the train file...
optimizing and scheduling...
calculating loss...
training & fit...
It takes  44.222  seconds to train

  2.2. Eval on the dev set... Acc.: 85.37



Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 3
  3.1. Training the classifier...
loading the train file...
optimizing and scheduling...
calculating loss...
training & fit...
It takes  43.872  seconds to train

  3.2. Eval on the dev set... Acc.: 85.11



Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 4
  4.1. Training the classifier...
loading the train file...
optimizing and scheduling...
calculating loss...
training & fit...
It takes  43.934  seconds to train

  4.2. Eval on the dev set... Acc.: 69.15



Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 5
  5.1. Training the classifier...
loading the train file...
optimizing and scheduling...
calculating loss...
training & fit...
It takes  43.962  seconds to train

  5.2. Eval on the dev set... Acc.: 84.84


Completed 5 runs.
Dev accs: [77.66, 85.37, 85.11, 69.15, 84.84]
Test accs: [-1, -1, -1, -1, -1]

Mean Dev Acc.: 80.43 (6.34)
Mean Test Acc.: -1.00 (0.00)

Exec time: 498.59 s. ( 99 per run )


In [None]:
devaccs

[86.97, 87.23, 88.03, 86.44, 88.03]

In [None]:
testaccs

[-1, -1, -1, -1, -1]

In [None]:
# # 每一句的sentiment存起来
# for i in devaccs:
#     with open('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/saved_lists/devaccs.txt', 'a') as f:
#         f.write(i)
#         f.write('\n')

# for i in testaccs:
#     with open('/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/saved_lists/testaccs.txt', 'a') as f:
#         f.write(i)
#         f.write('\n')

# f.close()

In [None]:
# # 重新reload lists
# devaccs = []
# testaccs = []

# with open('test1.txt', 'r') as f1:
#     list1 = f1.readlines()
#     print(list1)

# for i in range(0, len(list1)):
#     list1[i] = list1[i].rstrip('\n')

## Followings are trash

## 单独抽出来看一句话的情感

In [None]:
set_reproducible()
n_runs = 5
datadir = "../data/"
trainfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv'
devfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv'
testfile = None

slabels = classifier.predict(devfile)

In [None]:
len(slabels)

376

In [None]:
slabels[:10]

['positive',
 'negative',
 'positive',
 'negative',
 'positive',
 'negative',
 'negative',
 'positive',
 'positive',
 'negative']

In [None]:
glabels = load_label_output(devfile)
glabels[:10]

['positive',
 'negative',
 'positive',
 'negative',
 'neutral',
 'negative',
 'negative',
 'positive',
 'positive',
 'negative']

In [None]:
classifier.loadfile(devfile)

Unnamed: 0,sentiment,aspect_category,aspect_term,slice,review
0,positive,LOCATION#GENERAL,neighborhood,54:66,"great food, great wine list, great service in ..."
1,negative,RESTAURANT#GENERAL,place,15:20,I thought this place was totally overrated.
2,positive,FOOD#QUALITY,Fish,0:4,Fish is so very fresh.
3,negative,SERVICE#GENERAL,manager,19:26,"I showed it to the manager, and he smilingly a..."
4,neutral,DRINKS#QUALITY,margaritas,63:73,"The food we ordered was excellent, although I ..."
...,...,...,...,...,...
371,positive,RESTAURANT#GENERAL,PLACE,58:63,I CAN EAT HERE EVERY DAY OF THE WEEK REALLY LO...
372,neutral,RESTAURANT#MISCELLANEOUS,Bark,52:56,Though it's been crowded most times I've gone ...
373,positive,FOOD#QUALITY,food,4:8,The food is excellent!
374,negative,FOOD#QUALITY,chow fun and chow see,3:24,My chow fun and chow see was really bland and ...


In [None]:
classifier.get_embeddings(classifier.loadfile(devfile))[:2]

array([[[-0.057926  ,  0.34858805, -0.5094472 , ...,  0.49975532,
         -0.38258582,  0.77565813],
        [-0.027873  ,  0.48880327, -0.7928846 , ...,  0.2693749 ,
         -0.47329918,  0.6508599 ],
        [-0.07967386,  0.5496016 , -0.8315101 , ...,  0.32427025,
         -0.5050349 ,  0.65480244],
        ...,
        [-0.8333128 ,  0.38822085, -0.52785254, ...,  0.31770483,
         -0.292842  ,  1.8338207 ],
        [-0.8333128 ,  0.38822085, -0.52785254, ...,  0.31770483,
         -0.292842  ,  1.8338207 ],
        [-0.8333128 ,  0.38822085, -0.52785254, ...,  0.31770483,
         -0.292842  ,  1.8338207 ]],

       [[-0.09319579,  0.39213178, -0.5307334 , ...,  0.4637822 ,
         -0.40383434,  0.82813793],
        [-0.01807237,  0.57009214, -1.0802944 , ...,  0.1709255 ,
         -0.5365272 ,  0.73843217],
        [-0.08067247,  0.54335475, -1.1716623 , ...,  0.02692217,
         -0.6566367 ,  0.82870704],
        ...,
        [-0.97984093,  0.52103263, -0.6923128 , ...,  

In [None]:
classifier.get_embeddings(classifier.loadfile(devfile)).shape

(376, 90, 1024)

In [None]:
print(sklearn.metrics.classification_report(glabels, slabels))

              precision    recall  f1-score   support

    negative       0.81      0.63      0.71        98
     neutral       0.00      0.00      0.00        14
    positive       0.86      0.97      0.91       264

    accuracy                           0.85       376
   macro avg       0.55      0.54      0.54       376
weighted avg       0.81      0.85      0.83       376



In [None]:
classifier.get_embeddings(classifier.loadfile(devfile), to_print=True)

In [None]:
# # 一个句子一个句子来看
# def predict(datafile):
#     """Predicts class labels for the input instances in file 'datafile'
#     Returns the list of predicted labels
#     """
    
#     # loading the test file
#     columns = ['sentiment','aspect_category','aspect_term','slice','review']
#     df2 = pd.read_csv(datafile,sep='\t',names = columns, header = None)
    
#     tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli')
#     bert_model = TFRobertaModel.from_pretrained('roberta-large-mnli')

#     # parameters for getting embedding
#     max_token_dict = {'asp_cat_emb':16,'asp_term_emb':24,'review_emb':50}
#     src_column_dict = {'asp_cat_emb':'aspect_category','asp_term_emb':'aspect_term','review_emb':'review'}

#     encoder = LabelEncoder()
    
#     # get embeddings
#     emb_list = []
#     for col, MAX_LENGTH in max_token_dict.items():
#         str_inp = df2[src_column_dict[col]].values
#         inputs = tokenizer([str(i) for i in str_inp],
#                       max_length = MAX_LENGTH,
#                       pad_to_max_length = True, return_tensors="pt",truncation=True)
    
#         inputs = [np.array(v) for _,v in inputs.items()]
#         out = bert_model.predict(inputs)
#         emb_list.append(out[0])
  
#     X2 = np.concatenate(emb_list,axis =1)

#     # Predictions
#     model= tf.keras.Sequential()
#     model.add(tf.keras.layers.Dense(2048))
#     model.add(tf.keras.layers.Dense(512))
#     model.add(tf.keras.layers.Dense(256))
#     model.add(tf.keras.layers.Dense(128))
#     model.add(tf.keras.layers.Flatten())
#     model.add(tf.keras.layers.Dense(4000))
#     model.add(tf.keras.layers.Dropout(0.2))
#     model.add(tf.keras.layers.Dense(1250))
#     model.add(tf.keras.layers.Dropout(0.2))
#     model.add(tf.keras.layers.Dense(512))
#     model.add(tf.keras.layers.Dropout(0.2))
#     model.add(tf.keras.layers.Dense(256,activation= tf.nn.leaky_relu))
#     model.add(tf.keras.layers.Dropout(0.2))
#     model.add(tf.keras.layers.Dense(64)) #tf.nn.leaky_relu
#     model.add(tf.keras.layers.Dropout(0.2))
#     model.add(tf.keras.layers.Dense(16,activation= tf.nn.leaky_relu))
#     model.add(tf.keras.layers.Dropout(0.1))
#     model.add(tf.keras.layers.Dense(units=3, activation='softmax'))

#     pred = model.predict(X2)
    
#     # Encoding to original class labels
#     pred = np.argmax(pred, axis= 1)
#     y2_pred = encoder.inverse_transform(pred)
    
#     return list(y2_pred)

In [None]:
predict(devfile)

## Test file

In [None]:
import time, sys
import numpy as np


def set_reproducible():
    # The below is necessary to have reproducible behavior.
    import random as rn
    import os
    os.environ['PYTHONHASHSEED'] = '0'
    # The below is necessary for starting Numpy generated random numbers
    # in a well-defined initial state.
    np.random.seed(17)
    # The below is necessary for starting core Python generated random numbers
    # in a well-defined state.
    rn.seed(12345)



def load_label_output(filename):
    with open(filename, 'r', encoding='UTF-8') as f:
        return [line.strip().split("\t")[0] for line in f if line.strip()]



def eval_list(glabels, slabels):
    if (len(glabels) != len(slabels)):
        print("\nWARNING: label count in system output (%d) is different from gold label count (%d)\n" % (
        len(slabels), len(glabels)))
    n = min(len(slabels), len(glabels))
    incorrect_count = 0
    for i in range(n):
        if slabels[i] != glabels[i]: incorrect_count += 1
    acc = (n - incorrect_count) / n
    return acc*100



def train_and_eval(classifier, trainfile, devfile, testfile, run_id):
    print(f"\nRUN: {run_id}")
    print("  %s.1. Training the classifier..." % str(run_id))
    # classifier.train(trainfile, devfile)
    classifier.train(trainfile, devfile)
    print()
    print("  %s.2. Eval on the dev set..." % str(run_id), end="")
    slabels = classifier.predict(devfile)
    glabels = load_label_output(devfile)
    devacc = eval_list(glabels, slabels)
    print(" Acc.: %.2f" % devacc)
    testacc = -1
    if testfile is not None:
        # Evaluation on the test data
        print("  %s.3. Eval on the test set..." % str(run_id), end="")
        slabels = classifier.predict(testfile)
        glabels = load_label_output(testfile)
        testacc = eval_list(glabels, slabels)
        print(" Acc.: %.2f" % testacc)
    print()
    return (devacc, testacc)


# if __name__ == "__main__":
#     set_reproducible()
#     # n_runs = 5
#     n_runs = 2
#     # if len(sys.argv) > 1:
#     #     print(sys.argv)
#     #     n_runs = int(sys.argv[1]) # 这里注释掉了，我没看懂什么意思
#     datadir = "../data/"
#     # trainfile =  datadir + "traindata.csv"
#     # devfile =  datadir + "devdata.csv"
#     trainfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv'
#     devfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv'
#     testfile = None
#     # testfile = datadir + "testdata.csv"

#     # Runs
#     start_time = time.perf_counter()
#     devaccs = []
#     testaccs = []
#     for i in range(1, n_runs+1):
#         classifier =  Classifier()
#         devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
#         devaccs.append(np.round(devacc,2))
#         testaccs.append(np.round(testacc,2))
#     print('\nCompleted %d runs.' % n_runs)
#     total_exec_time = (time.perf_counter() - start_time)
#     print("Dev accs:", devaccs)
#     print("Test accs:", testaccs)
#     print()
#     print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
#     print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
#     print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

In [None]:
sys.argv

['/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py',
 '-f',
 '/root/.local/share/jupyter/runtime/kernel-a6f219ac-c91f-46e8-b0f1-fe08335fb596.json']

## PyTorch Version

In [None]:
torch.device("cuda")

device(type='cuda')

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import nltk       # >= 3.6.0
import sklearn    # >=0.24.0
import pandas as pd     # >= 1.3.0
import gensim     # >= 4.1.2
import stanza     # == 1.3.0
import torch      # >= 1.10.0
import transformers # version 4.16.x.

import pandas as pd
import numpy as np
import csv
import sklearn
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('..')

class Classifier:
    """The Classifier"""

    def __init__(self):
    # initialising the class and loading the BERT model from HuggingFace and giving max embeddings to get for each columns.
    # load models
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli')
        self.bert_model = TFRobertaModel.from_pretrained('roberta-large-mnli')

        # parameters for getting embedding
        self.max_token_dict = {'asp_cat_emb':16,'asp_term_emb':24,'review_emb':50}
        self.src_column_dict = {'asp_cat_emb':'aspect_category','asp_term_emb':'aspect_term','review_emb':'review'}
        
        # loading variable encoder
        self.encoder = LabelEncoder()

        # model
        self.model = self.create_model()
        self.device = torch.device("cuda")
        self.model.to(self.device)

        # for storing training losses
        self.each_train_loss = []

    def train(self, trainfile, devfile=None):
        # WARNING: DO NOT USE THE DEV DATA AS TRAINING EXAMPLES, YOU CAN USE 
        # THEM ONLY FOR THE OPTIMIZATION OF MODEL HYPERPARAMETERS Trains the classifier model on the training set stored in file trainfile

        # loading the train file
        print('loading the train file...')
        df = self.loadfile(trainfile)

        print('getting embeddings...')
        X = self.get_embeddings(df)
        Y = df['sentiment'].values

        print('encoder fit transform...')
        Y = self.encoder.fit_transform(np.array(Y).reshape(-1,1))
        X = torch.tensor(X)
        Y = torch.tensor(Y)

        # device = torch.device("cuda")
        X = X.to(self.device)
        Y = Y.to(self.device)

        # training mode
        self.model.train()
        self.model.to(self.device)

        # optimizer and scheduler
        print('optimizing and scheduling...')

        optimizer = torch.optim.Adam(self.model.parameters(), 0.0001)
        # optimizer = torch.optim.SGD(self.model.parameters(), 0.0001)
        # optim = tf.keras.optimizers.Adam(learning_rate=0.001)
        
        # loss
        print('calculating loss...')
        criterion = nn.CrossEntropyLoss()

        each_train_loss = []
        epochs = 100 # 先用5试一下

        print('epochs starts')
        temp_start = time.time()
        for i in tqdm(range(epochs)):

            # make everything to 0
            optimizer.zero_grad()

            # make prediction
            self.model.to(self.device)
            outputs = self.model(X)

            # calculate loss
            loss = criterion(outputs, Y)

            # backwards
            loss.backward()
            optimizer.step()

            self.each_train_loss.append(loss.item())
        
        # training
        print('training & fit...')
        # self.model.fit(X, Y, epochs =50, verbose = 0)
        
        temp_end = time.time()
        print('It takes ', str(round((temp_end - temp_start), 3)), ' seconds to train')


    def predict(self, datafile):
        """Predicts class labels for the input instances in file 'datafile'
        Returns the list of predicted labels
        """
        
        # loading the test file
        df2 = self.loadfile(datafile)
        X2 = self.get_embeddings(df2)
        X2 = torch.tensor(X2)
        X2 = X2.to(self.device)
        
        # Predictions
        # pred = self.model.predict(X2)
        # device = torch.device("cuda")
        self.model.to(self.device)
        pred = self.model(X2)
        # print(pred)
        pred = pred.cpu().detach().numpy()
        
        # Encoding to original class labels
        pred = np.argmax(pred, axis= 1)
        y2_pred = self.encoder.inverse_transform(pred)
        
        return list(y2_pred)


    def loadfile(self, data_csv):
        '''
        Load the files as pandas dataframe object
        '''
        columns = ['sentiment','aspect_category','aspect_term','slice','review']
        df = pd.read_csv(data_csv,sep='\t',names = columns, header = None)
        return df


    def get_embeddings(self, df):
        '''
        Load embeddings  from the RoBERTa Model
        '''
        emb_list = []
        for col, MAX_LENGTH in self.max_token_dict.items():
            str_inp = df[self.src_column_dict[col]].values
            inputs = self.tokenizer([str(i) for i in str_inp],
                          max_length = MAX_LENGTH,
                          pad_to_max_length = True,return_tensors="pt",truncation=True)
        
            inputs = [np.array(v) for _,v in inputs.items()]
            out = self.bert_model.predict(inputs)
            emb_list.append(out[0])
      
        X = np.concatenate(emb_list,axis =1)
        return X


    def create_model(self):
        '''
        Final Classifier NN model. 
        Takes embeddings as input and predicts the class encoded label.
        '''

        model = nn.Sequential(
                      nn.Linear(1024, 2048),
                      nn.Linear(2048, 512),
                      nn.Linear(512, 256),
                      nn.Linear(256, 128),
                      nn.Flatten(),

                      # nn.Linear(128, 4000),
                      nn.Linear(11520, 4000),
                      nn.Dropout(p=0.2),

                      nn.Linear(4000, 1250),
                      nn.Dropout(p=0.2),
                      
                      nn.Linear(1250, 512),
                      nn.Dropout(p=0.2),

                      nn.Linear(512, 256),
                      nn.LeakyReLU(0.2),
                      nn.Dropout(p=0.2),

                      nn.Linear(256, 64),
                      nn.Dropout(p=0.2),

                      nn.Linear(64, 16),
                      nn.LeakyReLU(0.2),
                      nn.Dropout(p=0.1),

                      nn.Linear(16, 3)
                      # nn.Softmax()
                      )
                          
        return model

In [None]:
set_reproducible()
n_runs = 5
datadir = "../data/"
trainfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv'
devfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv'
testfile = None

# Runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier =  Classifier()
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
    devaccs.append(np.round(devacc, 2))
    testaccs.append(np.round(testacc, 2))
    print('the loss is: ', classifier.each_train_loss)
    
print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Test accs:", testaccs)
print()
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 1
  1.1. Training the classifier...
loading the train file...
getting embeddings...
encoder fit transform...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 100/100 [00:36<00:00,  2.75it/s]


training & fit...
It takes  36.395  seconds to train

  1.2. Eval on the dev set... Acc.: 87.50

the loss is:  [1.0977405309677124, 1.0519875288009644, 0.9388105869293213, 0.7984851598739624, 0.8515506982803345, 0.7915442585945129, 0.7654146552085876, 0.7980470657348633, 0.7912758588790894, 0.774084210395813, 0.7609551548957825, 0.7490226030349731, 0.7518850564956665, 0.7504044771194458, 0.7485488057136536, 0.743499755859375, 0.7406907081604004, 0.7402287125587463, 0.7441688179969788, 0.7359353303909302, 0.7316527366638184, 0.743293821811676, 0.731830358505249, 0.7158421874046326, 0.7278779745101929, 0.7219838500022888, 0.719635009765625, 0.7212733030319214, 0.7039272785186768, 0.7131145596504211, 0.7037259936332703, 0.6996718049049377, 0.6917752027511597, 0.6843138933181763, 0.6835312247276306, 0.6727147102355957, 0.6700255274772644, 0.6674694418907166, 0.6549434065818787, 0.65208899974823, 0.6336787939071655, 0.6238657832145691, 0.6098898649215698, 0.5923947691917419, 0.5725078582763

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 2
  2.1. Training the classifier...
loading the train file...
getting embeddings...
encoder fit transform...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 100/100 [00:36<00:00,  2.75it/s]


training & fit...
It takes  36.404  seconds to train

  2.2. Eval on the dev set... Acc.: 87.77

the loss is:  [1.165979027748108, 1.1259963512420654, 0.9876649975776672, 0.7947739958763123, 0.9254895448684692, 0.8119766116142273, 0.7501807808876038, 0.7834502458572388, 0.7844245433807373, 0.7624731063842773, 0.7381739020347595, 0.7511001229286194, 0.7567413449287415, 0.7400014400482178, 0.73997563123703, 0.7473169565200806, 0.749054491519928, 0.7465540170669556, 0.7381739020347595, 0.7353038787841797, 0.7502076625823975, 0.7382429242134094, 0.7361026406288147, 0.7216767072677612, 0.7209374904632568, 0.7185457348823547, 0.7181941270828247, 0.7119982838630676, 0.7120673060417175, 0.7083032131195068, 0.7027655839920044, 0.6978038549423218, 0.689099907875061, 0.6965693235397339, 0.6869969964027405, 0.6850834488868713, 0.6727761626243591, 0.6739047765731812, 0.6732456088066101, 0.6688107848167419, 0.6519079804420471, 0.6555505394935608, 0.6521246433258057, 0.6345229744911194, 0.62169581651

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 3
  3.1. Training the classifier...
loading the train file...
getting embeddings...
encoder fit transform...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 100/100 [00:36<00:00,  2.75it/s]


training & fit...
It takes  36.425  seconds to train

  3.2. Eval on the dev set... Acc.: 87.77

the loss is:  [1.0500218868255615, 1.0131685733795166, 0.8738751411437988, 0.8101620674133301, 0.8130550980567932, 0.7600733637809753, 0.7730361223220825, 0.7745593786239624, 0.7531313300132751, 0.7486962080001831, 0.7470096349716187, 0.744663655757904, 0.7283483147621155, 0.7265585064888, 0.7442070841789246, 0.735657811164856, 0.733401358127594, 0.7242487668991089, 0.7435766458511353, 0.7217280864715576, 0.7242707014083862, 0.7282087206840515, 0.7226179838180542, 0.7115640044212341, 0.7173522710800171, 0.7149165868759155, 0.7164503335952759, 0.7079725861549377, 0.7018657922744751, 0.6979240775108337, 0.6908993124961853, 0.6884250044822693, 0.6809750199317932, 0.6697709560394287, 0.6709455847740173, 0.6568313837051392, 0.6585338711738586, 0.6434084177017212, 0.6389695405960083, 0.6223509311676025, 0.6074385643005371, 0.5935637950897217, 0.5698172450065613, 0.5684476494789124, 0.581055641174

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 4
  4.1. Training the classifier...
loading the train file...
getting embeddings...
encoder fit transform...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 100/100 [00:36<00:00,  2.75it/s]


training & fit...
It takes  36.407  seconds to train

  4.2. Eval on the dev set... Acc.: 85.11

the loss is:  [0.9886118769645691, 0.9581215381622314, 0.8769978880882263, 0.7554482221603394, 0.8362687230110168, 0.7735928893089294, 0.7857083678245544, 0.7697206735610962, 0.7596205472946167, 0.7498047351837158, 0.7397313117980957, 0.7339783310890198, 0.7344040870666504, 0.7315775156021118, 0.7339692115783691, 0.7218517065048218, 0.7286475300788879, 0.7222419381141663, 0.7197319269180298, 0.7191426157951355, 0.7120681405067444, 0.7073688507080078, 0.7186498641967773, 0.7095806002616882, 0.7064765095710754, 0.6986330151557922, 0.6923379898071289, 0.6861690282821655, 0.6801499724388123, 0.6749484539031982, 0.6799889802932739, 0.6696586608886719, 0.6725613474845886, 0.6710067987442017, 0.6680944561958313, 0.6666259169578552, 0.6488478183746338, 0.64266037940979, 0.638488233089447, 0.6281055212020874, 0.6246422529220581, 0.6049442291259766, 0.5928634405136108, 0.5764477849006653, 0.558670580

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.



RUN: 5
  5.1. Training the classifier...
loading the train file...
getting embeddings...
encoder fit transform...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 100/100 [00:36<00:00,  2.75it/s]


training & fit...
It takes  36.403  seconds to train

  5.2. Eval on the dev set... Acc.: 87.77

the loss is:  [0.9853392839431763, 0.9614309072494507, 0.8883952498435974, 0.7760313749313354, 0.7787042856216431, 0.8353460431098938, 0.7632879614830017, 0.7464967370033264, 0.743514358997345, 0.751507580280304, 0.7617197036743164, 0.749642014503479, 0.742085337638855, 0.7378603219985962, 0.7254273891448975, 0.7347118854522705, 0.7271013855934143, 0.7285395860671997, 0.71587735414505, 0.7228161692619324, 0.7119287848472595, 0.7108550071716309, 0.7062638401985168, 0.6995516419410706, 0.7005006074905396, 0.6873953938484192, 0.6926607489585876, 0.6890223622322083, 0.7015345692634583, 0.6876016855239868, 0.6786012649536133, 0.6802892684936523, 0.6683964729309082, 0.6601879000663757, 0.6525802612304688, 0.6427298188209534, 0.6410078406333923, 0.6341890096664429, 0.6286835074424744, 0.6130013465881348, 0.6015526056289673, 0.5746905207633972, 0.5674437880516052, 0.5862661600112915, 0.561068058013

In [None]:
set_reproducible()
n_runs = 5
datadir = "../data/"
trainfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv'
devfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv'
testfile = None

slabels = classifier.predict(devfile)
glabels = load_label_output(devfile)

In [None]:
print(sklearn.metrics.classification_report(glabels, slabels))

              precision    recall  f1-score   support

    negative       0.77      0.80      0.78        98
     neutral       0.00      0.00      0.00        14
    positive       0.91      0.94      0.92       264

    accuracy                           0.87       376
   macro avg       0.56      0.58      0.57       376
weighted avg       0.84      0.87      0.85       376



## Following are scratch

In [None]:
set_reproducible()
n_runs = 5

datadir = "../data/"
trainfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/traindata.csv'
devfile =  '/content/drive/MyDrive/DSBA M2/2 NLP/exercise_2/data/devdata.csv'
testfile = None

In [None]:
# 先运行一遍试一下
classifier2 =  Classifier()

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


In [None]:
devacc, testacc = train_and_eval(classifier2, trainfile, devfile, testfile, i)


RUN: 5
  5.1. Training the classifier...
loading the train file...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 2/2 [00:44<00:00, 22.02s/it]


training & fit...
It takes  44.053  seconds to train

  5.2. Eval on the dev set... Acc.: 70.21



In [None]:
devaccs.append(np.round(devacc,2))
testaccs.append(np.round(testacc,2))

In [None]:
def train(trainfile, model, devfile=None):
    # WARNING: DO NOT USE THE DEV DATA AS TRAINING EXAMPLES, YOU CAN USE 
    # THEM ONLY FOR THE OPTIMIZATION OF MODEL HYPERPARAMETERS Trains the classifier model on the training set stored in file trainfile

    # loading the train file
    print('loading the train file...')
    df = loadfile(trainfile)
    X = get_embeddings(df)
    Y = df['sentiment'].values
    Y = encoder.fit_transform(np.array(Y).reshape(-1,1))
    X = torch.tensor(X)
    Y = torch.tensor(Y)

    # optimizer and scheduler
    print('optimizing and scheduling...')

    optimizer = torch.optim.Adam(model.parameters(), 0.001)
    # optim = tf.keras.optimizers.Adam(learning_rate=0.001)
    
    # loss
    print('calculating loss...')
    criterion = nn.CrossEntropyLoss()

    each_train_loss = []
    epochs = 1 # 先用5试一下

    print('epochs starts')
    temp_start = time.time()
    for i in tqdm(range(epochs)):
        # change to GPU
        # inputs = inputs.to(device)
        # labels = labels.to(device)

        # make everything to 0
        optimizer.zero_grad()

        # make prediction
        outputs = model(X)

        # calculate loss
        loss = criterion(outputs, Y)

        # backwards
        loss.backward()
        optimizer.step()

        each_train_loss.append(loss.item())
    
    # self.model.compile(optimizer=optim, loss= 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    
    # training
    print('training & fit...')
    # self.model.fit(X, Y, epochs =50, verbose = 0)
    
    temp_end = time.time()
    print('It takes ', str(round((temp_end - temp_start), 3)), ' seconds to train')


def predict(datafile, model):
    """Predicts class labels for the input instances in file 'datafile'
    Returns the list of predicted labels
    """
    
    # loading the test file
    df2 = loadfile(datafile)
    X2 = get_embeddings(df2)
    X2 = torch.tensor(X2)
    
    # Predictions
    # pred = self.model.predict(X2)
    pred = model(X2)
    # print(pred)
    pred = pred.detach().numpy()

    # Encoding to original class labels
    pred = np.argmax(pred, axis= 1)
    y2_pred = encoder.inverse_transform(pred)
    
    return list(y2_pred)


def loadfile(data_csv):
    '''
    Load the files as pandas dataframe object
    '''
    columns = ['sentiment','aspect_category','aspect_term','slice','review']
    df = pd.read_csv(data_csv,sep='\t',names = columns, header = None)
    return df


def get_embeddings(df):
    '''
    Load embeddings  from the RoBERTa Model
    '''
    emb_list = []
    for col, MAX_LENGTH in max_token_dict.items():
        str_inp = df[src_column_dict[col]].values
        inputs = tokenizer([str(i) for i in str_inp],
                      max_length = MAX_LENGTH,
                      pad_to_max_length = True,return_tensors="pt",truncation=True)
    
        inputs = [np.array(v) for _,v in inputs.items()]
        out = bert_model.predict(inputs)
        emb_list.append(out[0])
  
    X = np.concatenate(emb_list,axis =1)
    return X


def create_model():
    '''
    Final Classifier NN model. 
    Takes embeddings as input and predicts the class encoded label.
    '''
    # model= tf.keras.Sequential()
    # model.add(tf.keras.layers.Dense(2048))
    # model.add(tf.keras.layers.Dense(512))
    # model.add(tf.keras.layers.Dense(256))
    # model.add(tf.keras.layers.Dense(128))
    # model.add(tf.keras.layers.Flatten())
    # model.add(tf.keras.layers.Dense(4000))
    # model.add(tf.keras.layers.Dropout(0.2))

    # model.add(tf.keras.layers.Dense(1250))
    # model.add(tf.keras.layers.Dropout(0.2))

    # model.add(tf.keras.layers.Dense(512))
    # model.add(tf.keras.layers.Dropout(0.2))

    # model.add(tf.keras.layers.Dense(256,activation= tf.nn.leaky_relu))
    # model.add(tf.keras.layers.Dropout(0.2))

    # model.add(tf.keras.layers.Dense(64)) #tf.nn.leaky_relu
    # model.add(tf.keras.layers.Dropout(0.2))
    # model.add(tf.keras.layers.Dense(16,activation= tf.nn.leaky_relu))
    # model.add(tf.keras.layers.Dropout(0.1))
    # model.add(tf.keras.layers.Dense(units=3, activation='softmax'))


    model = nn.Sequential(
                  nn.Linear(1024, 2048),
                  nn.Linear(2048, 512),
                  nn.Linear(512, 256),
                  nn.Linear(256, 128),
                  nn.Flatten(),

                  # nn.Linear(128, 4000),
                  nn.Linear(11520, 4000),
                  nn.Dropout(p=0.2),

                  nn.Linear(4000, 1250),
                  nn.Dropout(p=0.2),
                  
                  nn.Linear(1250, 512),
                  nn.Dropout(p=0.2),

                  nn.Linear(512, 256),
                  nn.LeakyReLU(0.2),
                  nn.Dropout(p=0.2),

                  nn.Linear(256, 64),
                  nn.Dropout(p=0.2),

                  nn.Linear(64, 16),
                  nn.LeakyReLU(0.2),
                  nn.Dropout(p=0.1),

                  nn.Linear(16, 3),
                  nn.Softmax()
                  )
                      
    return model

In [None]:
# initialising the class and loading the BERT model from HuggingFace and giving max embeddings to get for each columns.
# load models
tokenizer = RobertaTokenizer.from_pretrained('roberta-large-mnli')
bert_model = TFRobertaModel.from_pretrained('roberta-large-mnli')

# parameters for getting embedding
max_token_dict = {'asp_cat_emb':16,'asp_term_emb':24,'review_emb':50}
src_column_dict = {'asp_cat_emb':'aspect_category','asp_term_emb':'aspect_term','review_emb':'review'}

# loading variable encoder
encoder = LabelEncoder()

# model
model = create_model()

# for storing training losses
each_train_loss = []

Some layers from the model checkpoint at roberta-large-mnli were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-large-mnli.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


In [None]:
# def train_and_eval(classifier, trainfile, devfile, testfile, run_id):
# classifier.train(trainfile, devfile)
train(trainfile, model=model, devfile=devfile)

loading the train file...
optimizing and scheduling...
calculating loss...
epochs starts


100%|██████████| 1/1 [00:22<00:00, 22.27s/it]

training & fit...
It takes  22.276  seconds to train





In [None]:
slabels = predict(devfile, model=model)

In [None]:
from collections import Counter
Counter(slabels)

Counter({'positive': 376})

In [None]:
glabels = load_label_output(devfile)
devacc = eval_list(glabels, slabels)
print(" Acc.: %.2f" % devacc)
testacc = -1
if testfile is not None:
    # Evaluation on the test data
    print("  %s.3. Eval on the test set..." % str(run_id), end="")
    slabels = classifier.predict(testfile)
    glabels = load_label_output(testfile)
    testacc = eval_list(glabels, slabels)
    print(" Acc.: %.2f" % testacc)
# print()
# return (devacc, testacc)

 Acc.: 70.21


In [None]:
# Runs
start_time = time.perf_counter()
devaccs = []
testaccs = []
for i in range(1, n_runs+1):
    classifier =  Classifier()
    devacc, testacc = train_and_eval(classifier, trainfile, devfile, testfile, i)
    devaccs.append(np.round(devacc,2))
    testaccs.append(np.round(testacc,2))


In [None]:
print('\nCompleted %d runs.' % n_runs)
total_exec_time = (time.perf_counter() - start_time)
print("Dev accs:", devaccs)
print("Test accs:", testaccs)
print()
print("Mean Dev Acc.: %.2f (%.2f)" % (np.mean(devaccs), np.std(devaccs)))
print("Mean Test Acc.: %.2f (%.2f)" % (np.mean(testaccs), np.std(testaccs)))
print("\nExec time: %.2f s. ( %d per run )" % (total_exec_time, total_exec_time / n_runs))