In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import tqdm


import numpy as np
import pandas as pd

from sklearn.metrics import f1_score

import re

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import nltk
from nltk.tokenize import word_tokenize

import matplotlib.pyplot as plt
#nltk.download('punkt','./data')


from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                         do_lower_case = True)

In [2]:
def swap_columns(df, col1, col2):
    col_list = list(df.columns)
    x, y = col_list.index(col1), col_list.index(col2)
    col_list[y], col_list[x] = col_list[x], col_list[y]
    df = df[col_list]
    return df

In [3]:
train_path = "./twitter_data/train.csv"
test_path = "./twitter_data/test.csv"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_df = pd.read_csv(train_path,index_col=False)
test_df = pd.read_csv(test_path,index_col=False)
train_df
len(train_df)

5970

In [4]:
def preprocess(df, mode='train'):
    if mode=='test':
        df=df.rename(columns={"Category": "Tweet"})
        
    
    #df = df.drop(columns=["index"])
    df = df.dropna()
    df = df[df['Tweet'] != "Not Available"]
    df=df.rename(columns={"Tweet": "text"})
    df=df.rename(columns={"Category": "category"})
    df.insert(loc=3, column='label', value=-1)
    df.insert(loc=4, column='data_type', value=0)
    #df = swap_columns(df, 'category', 'text')
    
    return df

In [5]:
#test_df_p=preprocess(test_df,'test') # has no labels!
dfx=preprocess(train_df,'train')
dfx.reset_index(inplace=True,drop=True)
dfx

Unnamed: 0,Id,category,text,label,data_type
0,635930169241374720,neutral,IOS 9 App Transport Security. Mm need to check...,-1,0
1,635950258682523648,neutral,"Mar if you have an iOS device, you should down...",-1,0
2,636030803433009153,negative,@jimmie_vanagon my phone does not run on lates...,-1,0
3,636100906224848896,positive,Not sure how to start your publication on iOS?...,-1,0
4,636176272947744772,neutral,"Two Dollar Tuesday is here with Forklift 2, Qu...",-1,0
...,...,...,...,...,...
5417,638445576212754433,positive,"Ok ed let's do this, Zlatan, greizmann and Lap...",-1,0
5418,638531837313306624,neutral,Goal level: Zlatan 90k by Friday? = Posting e...,-1,0
5419,639016598477651968,neutral,@YouAreMyArsenal Wouldn't surprise me if we en...,-1,0
5420,640276909633486849,neutral,Rib injury for Zlatan against Russia is a big ...,-1,0


In [6]:
dfx['data_type'] = ['not_set'] * dfx.shape[0]

In [7]:
for i in range(1,len(dfx)):
    if i not in dfx.index:
        continue
    #print(i)
    if dfx['category'][i]=='positive':
        dfx['label'][i]=2
    if dfx['category'][i]=='neutral':
        dfx['label'][i]=1
    if dfx['category'][i]=='negative':
        dfx['label'][i]=0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfx['label'][i]=1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfx['label'][i]=0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfx['label'][i]=2


In [9]:
dfx.tail(100)

Unnamed: 0,Id,category,text,label,data_type
5322,641622362882985984,neutral,WORLD HAS ABANDONED THE SYSTEM. This is now c...,1,not_set
5323,641624522307690496,positive,@shredz_cG ok just add and message me on xbox ...,2,not_set
5324,641626528300699648,positive,@terruntz @DIRECTVService you can access Sunda...,2,not_set
5325,641627938186743808,positive,i work till 3pm . yay lol cant wait till thurs...,2,not_set
5326,641631234200875009,neutral,I tried the Xbox One preview....couldn't figur...,1,not_set
...,...,...,...,...,...
5417,638445576212754433,positive,"Ok ed let's do this, Zlatan, greizmann and Lap...",2,not_set
5418,638531837313306624,neutral,Goal level: Zlatan 90k by Friday? = Posting e...,1,not_set
5419,639016598477651968,neutral,@YouAreMyArsenal Wouldn't surprise me if we en...,1,not_set
5420,640276909633486849,neutral,Rib injury for Zlatan against Russia is a big ...,1,not_set


In [12]:
X_train, X_val, y_train, y_val = train_test_split(dfx.index.values, 
                                                   dfx.label.values,
                                                   test_size = 0.15,
                                                   random_state = 17,
                                                   stratify = dfx.label.values)

In [13]:
dfx.loc[X_train, 'data_type'] = 'train'
dfx.loc[X_val, 'data_type'] = 'val'

#groupby count
dfx.groupby(['category', 'label', 'data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Id,text
category,label,data_type,Unnamed: 3_level_1,Unnamed: 4_level_1
Tweet,-1,train,1,1
negative,0,train,738,738
negative,0,val,131,131
neutral,-1,train,1,1
neutral,1,train,1659,1659
neutral,1,val,293,293
positive,2,train,2209,2209
positive,2,val,390,390


In [14]:
encoded_data_train = tokenizer.batch_encode_plus(dfx[dfx.data_type == 'train'].text.values,
                                                add_special_tokes = True,
                                                return_attention_mask = True,
                                                pad_to_max_length = True,
                                                max_length = 256,
                                                return_tensors = 'pt')
encoded_data_val = tokenizer.batch_encode_plus(dfx[dfx.data_type == 'val'].text.values,
                                                add_special_tokes = True,
                                                return_attention_mask = True,
                                                pad_to_max_length = True,
                                                max_length = 256,
                                                return_tensors = 'pt')

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword arguments {'add_special_tokes': True} not recognized.
Keyword 

In [16]:
encoded_data_train


{'input_ids': tensor([[  101, 16380,  1023,  ...,     0,     0,     0],
        [  101,  9388,  2065,  ...,     0,     0,     0],
        [  101,  1030, 24671,  ...,     0,     0,     0],
        ...,
        [  101,  3125,  2504,  ...,     0,     0,     0],
        [  101,  1030,  2017,  ...,     0,     0,     0],
        [  101,  2053,  9541,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

In [19]:
#train set
input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(dfx[dfx.data_type == 'train'].label.values)

#validation set
input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(dfx[dfx.data_type == 'val'].label.values)

In [20]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels = 3,
                                                      output_attentions = False,
                                                      output_hidden_states = False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [21]:
from torch.utils.data import TensorDataset

#train set
dataset_train = TensorDataset(input_ids_train, 
                              attention_masks_train,
                              labels_train)

#validation set
dataset_val = TensorDataset(input_ids_val, 
                             attention_masks_val, 
                             labels_val)

In [22]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

batch_size = 32

#train set
dataloader_train = DataLoader(dataset_train,
                              sampler = RandomSampler(dataset_train),
                              batch_size = batch_size)

#validation set
dataloader_val = DataLoader(dataset_val,
                              sampler = RandomSampler(dataset_val),
                              batch_size = 32) #since we don't have to do backpropagation

In [218]:
from transformers import AdamW, get_linear_schedule_with_warmup

optimizer = AdamW(model.parameters(),
                 lr = 1e-5,
                 eps = 1e-8) #2e-5 > 5e-5
                 
epochs = 10

scheduler = get_linear_schedule_with_warmup(optimizer,
                                           num_warmup_steps = 0,
                                           num_training_steps = len(dataloader_train)*epochs)



In [23]:
def evaluate(dataloader_val):

    #evaluation mode 
    model.eval()
    
    #tracking variables
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in tqdm.tqdm(dataloader_val):
        
        #load into GPU
        batch = tuple(b.to(device) for b in batch)
        
        #define inputs
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2]}

        #compute logits
        with torch.no_grad():        
            outputs = model(**inputs)
        
        #compute loss
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        #compute accuracy
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    #compute average loss
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [24]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis = 1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds, average = 'weighted')

In [25]:
#accuracy score
def accuracy_per_class(preds, labels):
    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    #make prediction
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy:{len(y_preds[y_preds==label])}/{len(y_true)}\n')

In [26]:
import random

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [27]:
for epoch in tqdm.tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0
    
    progress_bar = tqdm.tqdm(dataloader_train, 
                        desc = 'Epoch {:1d}'.format(epoch), 
                        leave = False, 
                        disable = False)
    
    for batch in progress_bar:
        
        model.zero_grad() #set gradient to 0
    
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids': batch[0], 
                  'attention_mask': batch[1], 
                  'labels': batch[2]}
        
        print("before FP")
        outputs = model(**inputs) #unpack the dict straight into inputs
        print("after FP")
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()
        print("after BP")
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        
        optimizer.step()
        scheduler.step()
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item() / len(batch))})
        
    torch.save(model.state_dict(), f'Models/ BERT_ft_epoch{epoch}.model')
    
    tqdm.tqdm.write('\n Epoch {epoch}')
    
    loss_train_ave = loss_train_total / len(dataloader)
    tqdm.tqdm.write('Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_val)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.tqdm.write(f'Validation loss: {val_loss}')
    tqdm.tqdm.write(f'F1 Score (weighted): {val_f1}')

NameError: name 'epochs' is not defined