In [1]:
import numpy as np
import pandas as pd
import src.helpers as hlp
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from collections import defaultdict

from torch import nn, optim
from keras_preprocessing.sequence import pad_sequences
from torch.utils.data import Dataset, DataLoader

import transformers
from transformers import XLNetTokenizer, XLNetModel, AdamW, XLNetForSequenceClassification
import torch
import contractions


ModuleNotFoundError: No module named 'transformers'

In [5]:
t_pos = pd.read_table('/kaggle/input/twitter-full/train_pos_full.txt', header=None, names=['tweet'], dtype=str,on_bad_lines='skip')
t_pos['label'] = 1
t_neg = pd.read_table('/kaggle/input/twitter-full/train_neg_full.txt', header=None, names=['tweet'], dtype=str,on_bad_lines='skip')
t_neg['label'] = 0
df = pd.concat((t_pos,t_neg))

In [6]:
#df['tweet'] = df['tweet'].apply(lambda x: hlp.remove_stopwords(x))
df['tweet'] = df['tweet'].apply(lambda x: hlp.remove_punct(x))
df['tweet'] = df['tweet'].apply(lambda x: hlp.add_space(x))
df['tweet'] = df['tweet'].apply(lambda x: hlp.remove_white_space(x))
#df['tweet'] = df['tweet'].apply(lambda x: hlp.remove_words_digits(x))
df['tweet'] = df['tweet'].apply(lambda x: hlp.to_lower(x))
df['tweet'] = df['tweet'].apply(lambda x: hlp.unslang(x))
df['tweet'] = df['tweet'].apply(lambda x: contractions.fix(x))
#df['tweet'] = df['tweet'].apply(lambda x: hlp.remove_specific_words(x))
#df['tweet'] = df['tweet'].apply(lambda x: hlp.remove_single_char(x))
df['tweet'] = df['tweet'].apply(lambda x: hlp.lemmatize(x))
df = df[df['tweet'] != '']
df = df.drop_duplicates()
df.reset_index(inplace=True)

In [7]:
MAX_LEN = 256

In [8]:
class CustomDataset(Dataset):

    def __init__(self, tweets, labels, tokenizer, max_len):
        self.tweets = tweets
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.tweets)
    
    def __getitem__(self, index):
        tweet = str(self.tweets[index])
        label = self.labels[index]

        encoding = self.tokenizer.encode_plus(
        tweet,
        add_special_tokens=True,
        max_length=self.max_len,
        return_token_type_ids=False,
        pad_to_max_length=False,
        return_attention_mask=True,
        return_tensors='pt',
        )

        input_ids = pad_sequences(encoding['input_ids'], maxlen=MAX_LEN, dtype=torch.Tensor ,truncating="post",padding="post")
        input_ids = input_ids.astype(dtype = 'int64')
        input_ids = torch.tensor(input_ids) 

        attention_mask = pad_sequences(encoding['attention_mask'], maxlen=MAX_LEN, dtype=torch.Tensor ,truncating="post",padding="post")
        attention_mask = attention_mask.astype(dtype = 'int64')
        attention_mask = torch.tensor(attention_mask)       

        return {
        'review_text': tweet,
        'input_ids': input_ids,
        'attention_mask': attention_mask.flatten(),
        'targets': torch.tensor(label, dtype=torch.long)
        }

In [9]:
X_train, X_test, y_train, y_test = train_test_split (df['tweet'], df['label'] , test_size=0.2)

In [10]:
xlnet_tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')

Downloading:   0%|          | 0.00/779k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/760 [00:00<?, ?B/s]

In [11]:
tweet_train_dataset = CustomDataset(tweets=X_train.to_numpy(), labels=y_train.to_numpy(), tokenizer=xlnet_tokenizer, max_len=MAX_LEN)
tweet_test_dataset = CustomDataset(tweets=X_test.to_numpy(), labels=y_test.to_numpy(), tokenizer=xlnet_tokenizer, max_len=MAX_LEN)


In [12]:
BATCH_SIZE = 32

In [13]:
tweet_train_loader = DataLoader(tweet_train_dataset, batch_size=BATCH_SIZE)
tweet_test_loader = DataLoader(tweet_test_dataset, batch_size=BATCH_SIZE)


In [14]:
EPOCHS = 3

model = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels = 2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


Downloading:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.weight', 'logits_proj.bias', 'sequence_summary.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5)

In [16]:
from sklearn import metrics
def train_epoch(model, data_loader, optimizer, device, n_examples):
    model = model.train()
    losses = []
    acc = 0
    counter = 0
  
    for d in data_loader:
        input_ids_shape = d["input_ids"].shape
        input_ids = d["input_ids"].reshape(input_ids_shape[0],input_ids_shape[2]).to(device)
        attention_mask = d["attention_mask"].to(device)
        targets = d["targets"].to(device)
        
        outputs = model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = targets)
        loss = outputs[0]
        logits = outputs[1]

        _, prediction = torch.max(outputs[1], dim=1)
        targets = targets.cpu().detach().numpy()
        prediction = prediction.cpu().detach().numpy()
        accuracy = metrics.accuracy_score(targets, prediction)

        acc += accuracy
        losses.append(loss.item())
        
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()
        if counter % 1000 == 0:
            save_str = '/kaggle/working/xlnet_model_'+str(counter)+'.bin'
            torch.save(model.state_dict(), save_str)
        if counter%100 == 0:
                print("{}th iteration over {} = {}%".format(counter, len(data_loader), 100.0*counter/len(data_loader)))
                print("running mean_acc : ", acc/counter if counter != 0 else acc)
                print("running mean_loss : ", np.mean(losses))
        counter = counter + 1
        
        

    return acc / counter, np.mean(losses)

In [17]:
def eval_model(model, data_loader, device, n_examples):
    model = model.eval()
    losses = []
    acc = 0
    counter = 0
  
    with torch.no_grad():
        for d in data_loader:
            input_ids_shape = d["input_ids"].shape
            input_ids = d["input_ids"].reshape(input_ids_shape[0],input_ids_shape[2]).to(device)
            attention_mask = d["attention_mask"].to(device)
            targets = d["targets"].to(device)
            
            outputs = model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = targets)

            loss = outputs[0]
            logits = outputs[1]

            _, prediction = torch.max(outputs[1], dim=1)
            targets = targets.cpu().detach().numpy()
            prediction = prediction.cpu().detach().numpy()
            accuracy = metrics.accuracy_score(targets, prediction)

            acc += accuracy
            losses.append(loss.item())
            if counter%100 == 0:
                print("{}th iteration over {} = {}%".format(counter, len(data_loader), 100.0*counter/len(data_loader)))
                print("running mean_acc : ", acc/counter if counter != 0 else acc)
                print("running mean_loss : ", np.mean(losses))
            counter += 1
            

    return acc / counter, np.mean(losses)

In [18]:
import logging 
logging.basicConfig(level = logging.ERROR)
history = defaultdict(list)
best_accuracy = 0

for epoch in range(EPOCHS):
    print(f'Epoch {epoch + 1}/{EPOCHS}')
    print('-' * 10)

    train_acc, train_loss = train_epoch(
        model,
        tweet_train_loader,     
        optimizer, 
        device, 
        len(X_train)
    )

    print(f'Train loss {train_loss} Train accuracy {train_acc}')

    val_acc, val_loss = eval_model(
        model,
        tweet_test_loader, 
        device, 
        len(X_test)
    )

    print(f'Val loss {val_loss} Val accuracy {val_acc}')
    print()

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_acc'].append(val_acc)
    history['val_loss'].append(val_loss)

    if val_acc > best_accuracy:
        torch.save(model.state_dict(), '/kaggle/working/xlnet_best_model.bin')
        best_accuracy = val_acc

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch 1/3
----------


0th iteration over 55193 = 0.0%
running mean_acc :  0.46875
running mean_loss :  0.7471943497657776


100th iteration over 55193 = 0.1811823963183737%
running mean_acc :  0.619375
running mean_loss :  0.638375850302158


200th iteration over 55193 = 0.3623647926367474%
running mean_acc :  0.68953125
running mean_loss :  0.5699611077112938


300th iteration over 55193 = 0.5435471889551211%
running mean_acc :  0.7217708333333334
running mean_loss :  0.530358553675322


400th iteration over 55193 = 0.7247295852734948%
running mean_acc :  0.74
running mean_loss :  0.508541116244775


500th iteration over 55193 = 0.9059119815918686%
running mean_acc :  0.7515625
running mean_loss :  0.49362174907844225


600th iteration over 55193 = 1.0870943779102422%
running mean_acc :  0.75921875
running mean_loss :  0.48311764019202075


700th iteration over 55193 = 1.2682767742286158%
running mean_acc :  0.7655803571428571
running mean_loss :  0.47365577678367515


800th iteration over 55193 = 1.4494591705469897%
running mean_acc :  0.7713671875
running mean_loss :  0.4655222584320216


900th iteration over 55193 = 1.6306415668653633%
running mean_acc :  0.7769097222222222
running mean_loss :  0.4566446653547615


1000th iteration over 55193 = 1.8118239631837372%
running mean_acc :  0.780875
running mean_loss :  0.45107135856961394


1100th iteration over 55193 = 1.9930063595021108%
running mean_acc :  0.7841761363636364
running mean_loss :  0.44651316210532815


1200th iteration over 55193 = 2.1741887558204844%
running mean_acc :  0.7860677083333333
running mean_loss :  0.44296388851663254


1300th iteration over 55193 = 2.3553711521388583%
running mean_acc :  0.7881009615384615
running mean_loss :  0.4402327844608206


1400th iteration over 55193 = 2.5365535484572317%
running mean_acc :  0.78984375
running mean_loss :  0.43778424591442927


1500th iteration over 55193 = 2.7177359447756055%
running mean_acc :  0.7921458333333333
running mean_loss :  0.43421804445295


1600th iteration over 55193 = 2.8989183410939794%
running mean_acc :  0.794140625
running mean_loss :  0.4301996943561231


1700th iteration over 55193 = 3.080100737412353%
running mean_acc :  0.795
running mean_loss :  0.42895475365462404


1800th iteration over 55193 = 3.2612831337307266%
running mean_acc :  0.7968055555555555
running mean_loss :  0.4259242844757009


1900th iteration over 55193 = 3.4424655300491005%
running mean_acc :  0.7982072368421053
running mean_loss :  0.4233147289284526


2000th iteration over 55193 = 3.6236479263674743%
running mean_acc :  0.79978125
running mean_loss :  0.42125358995886103


2100th iteration over 55193 = 3.8048303226858478%
running mean_acc :  0.8010416666666667
running mean_loss :  0.41942068652162545


2200th iteration over 55193 = 3.9860127190042216%
running mean_acc :  0.8025142045454545
running mean_loss :  0.41755838859411654


2300th iteration over 55193 = 4.1671951153225955%
running mean_acc :  0.8036820652173913
running mean_loss :  0.4156302169761881


2400th iteration over 55193 = 4.348377511640969%
running mean_acc :  0.8045963541666666
running mean_loss :  0.4148089951435659


2500th iteration over 55193 = 4.529559907959342%
running mean_acc :  0.8051
running mean_loss :  0.4135997541984621


2600th iteration over 55193 = 4.710742304277717%
running mean_acc :  0.8060697115384615
running mean_loss :  0.41202859847488793


2700th iteration over 55193 = 4.89192470059609%
running mean_acc :  0.8069560185185185
running mean_loss :  0.4107181016701939


2800th iteration over 55193 = 5.073107096914463%
running mean_acc :  0.8077790178571429
running mean_loss :  0.409741498442132


2900th iteration over 55193 = 5.254289493232838%
running mean_acc :  0.8083943965517242
running mean_loss :  0.40918636348731596


3000th iteration over 55193 = 5.435471889551211%
running mean_acc :  0.8089583333333333
running mean_loss :  0.40831339727219484


3100th iteration over 55193 = 5.6166542858695845%
running mean_acc :  0.8096975806451613
running mean_loss :  0.4070071203166229


3200th iteration over 55193 = 5.797836682187959%
running mean_acc :  0.81064453125
running mean_loss :  0.405379421634251


3300th iteration over 55193 = 5.979019078506332%
running mean_acc :  0.8116477272727273
running mean_loss :  0.4038490427376393


3400th iteration over 55193 = 6.160201474824706%
running mean_acc :  0.8124356617647058
running mean_loss :  0.4029259335833485


3500th iteration over 55193 = 6.34138387114308%
running mean_acc :  0.8130803571428571
running mean_loss :  0.40213026790338324


3600th iteration over 55193 = 6.522566267461453%
running mean_acc :  0.8136111111111111
running mean_loss :  0.4012454929111137


3700th iteration over 55193 = 6.703748663779828%
running mean_acc :  0.8141807432432432
running mean_loss :  0.400053723621887


3800th iteration over 55193 = 6.884931060098201%
running mean_acc :  0.8147121710526316
running mean_loss :  0.39897355780111304


3900th iteration over 55193 = 7.066113456416574%
running mean_acc :  0.8151842948717949
running mean_loss :  0.39809697518644194


4000th iteration over 55193 = 7.247295852734949%
running mean_acc :  0.8155859375
running mean_loss :  0.3973853713522044


4100th iteration over 55193 = 7.428478249053322%
running mean_acc :  0.8162195121951219
running mean_loss :  0.3965108724348692


4200th iteration over 55193 = 7.6096606453716955%
running mean_acc :  0.8168377976190476
running mean_loss :  0.3955818380441333


4300th iteration over 55193 = 7.79084304169007%
running mean_acc :  0.817405523255814
running mean_loss :  0.3948194791437914


4400th iteration over 55193 = 7.972025438008443%
running mean_acc :  0.81796875
running mean_loss :  0.39411777093535905


4500th iteration over 55193 = 8.153207834326818%
running mean_acc :  0.8179305555555556
running mean_loss :  0.39371369342602935


4600th iteration over 55193 = 8.334390230645191%
running mean_acc :  0.818335597826087
running mean_loss :  0.3929224139440969


4700th iteration over 55193 = 8.515572626963564%
running mean_acc :  0.818936170212766
running mean_loss :  0.3919083588546424


4800th iteration over 55193 = 8.696755023281938%
running mean_acc :  0.8193294270833333
running mean_loss :  0.3914357055908962


4900th iteration over 55193 = 8.877937419600311%
running mean_acc :  0.8196364795918367
running mean_loss :  0.39074953206733304


5000th iteration over 55193 = 9.059119815918685%
running mean_acc :  0.8199375
running mean_loss :  0.3901404729040354


5100th iteration over 55193 = 9.24030221223706%
running mean_acc :  0.8202573529411765
running mean_loss :  0.38939380950727687


5200th iteration over 55193 = 9.421484608555433%
running mean_acc :  0.8205769230769231
running mean_loss :  0.3885450403241326


5300th iteration over 55193 = 9.602667004873807%
running mean_acc :  0.820566037735849
running mean_loss :  0.3883090977667863


5400th iteration over 55193 = 9.78384940119218%
running mean_acc :  0.820943287037037
running mean_loss :  0.38775611756560185


5500th iteration over 55193 = 9.965031797510553%
running mean_acc :  0.8213238636363637
running mean_loss :  0.3870404879207872


5600th iteration over 55193 = 10.146214193828927%
running mean_acc :  0.8216964285714285
running mean_loss :  0.3863838863314805


5700th iteration over 55193 = 10.327396590147302%
running mean_acc :  0.8219243421052631
running mean_loss :  0.38599100687920557


5800th iteration over 55193 = 10.508578986465675%
running mean_acc :  0.8221497844827587
running mean_loss :  0.38559547180097653


5900th iteration over 55193 = 10.689761382784049%
running mean_acc :  0.8225529661016949
running mean_loss :  0.38496715302988344


6000th iteration over 55193 = 10.870943779102422%
running mean_acc :  0.8227135416666667
running mean_loss :  0.3847162361937927


6100th iteration over 55193 = 11.052126175420796%
running mean_acc :  0.8227612704918033
running mean_loss :  0.3845397580284952


6200th iteration over 55193 = 11.233308571739169%
running mean_acc :  0.8229082661290322
running mean_loss :  0.3841399567847847


6300th iteration over 55193 = 11.414490968057544%
running mean_acc :  0.8230605158730159
running mean_loss :  0.38375051106404473


6400th iteration over 55193 = 11.595673364375918%
running mean_acc :  0.8232421875
running mean_loss :  0.3833018121736028


6500th iteration over 55193 = 11.776855760694291%
running mean_acc :  0.8234951923076923
running mean_loss :  0.3828727662198875


6600th iteration over 55193 = 11.958038157012664%
running mean_acc :  0.8238825757575757
running mean_loss :  0.38216078731191466


6700th iteration over 55193 = 12.139220553331038%
running mean_acc :  0.8241884328358209
running mean_loss :  0.3815390388312455


6800th iteration over 55193 = 12.320402949649411%
running mean_acc :  0.8244255514705883
running mean_loss :  0.38125471483410467


6900th iteration over 55193 = 12.501585345967786%
running mean_acc :  0.8246467391304347
running mean_loss :  0.38103769744324384


7000th iteration over 55193 = 12.68276774228616%
running mean_acc :  0.8248616071428572
running mean_loss :  0.3805563756194375


7100th iteration over 55193 = 12.863950138604533%
running mean_acc :  0.8250704225352112
running mean_loss :  0.3801620320396396


7200th iteration over 55193 = 13.045132534922907%
running mean_acc :  0.8253342013888889
running mean_loss :  0.3798645873689979


7300th iteration over 55193 = 13.22631493124128%
running mean_acc :  0.8255821917808219
running mean_loss :  0.37950125234700316


7400th iteration over 55193 = 13.407497327559655%
running mean_acc :  0.8256967905405406
running mean_loss :  0.3793625887324803


7500th iteration over 55193 = 13.588679723878029%
running mean_acc :  0.8258958333333334
running mean_loss :  0.3790773833492107


7600th iteration over 55193 = 13.769862120196402%
running mean_acc :  0.8261759868421052
running mean_loss :  0.3787530777715426


7700th iteration over 55193 = 13.951044516514775%
running mean_acc :  0.8263474025974026
running mean_loss :  0.37854182513820367


7800th iteration over 55193 = 14.132226912833149%
running mean_acc :  0.826738782051282
running mean_loss :  0.3780376171654286


7900th iteration over 55193 = 14.313409309151522%
running mean_acc :  0.8271518987341772
running mean_loss :  0.37743949816677635


8000th iteration over 55193 = 14.494591705469897%
running mean_acc :  0.82726171875
running mean_loss :  0.3772196803898934


8100th iteration over 55193 = 14.67577410178827%
running mean_acc :  0.8274498456790124
running mean_loss :  0.3767781152350051


8200th iteration over 55193 = 14.856956498106644%
running mean_acc :  0.8276219512195122
running mean_loss :  0.3763943962392146


8300th iteration over 55193 = 15.038138894425018%
running mean_acc :  0.8278237951807229
running mean_loss :  0.3760194631450263


8400th iteration over 55193 = 15.219321290743391%
running mean_acc :  0.8279203869047619
running mean_loss :  0.37586407790087106


8500th iteration over 55193 = 15.400503687061764%
running mean_acc :  0.8281176470588235
running mean_loss :  0.3756506381068436


8600th iteration over 55193 = 15.58168608338014%
running mean_acc :  0.8282558139534884
running mean_loss :  0.37540212946757373


8700th iteration over 55193 = 15.762868479698513%
running mean_acc :  0.8284985632183908
running mean_loss :  0.37503112650232034


8800th iteration over 55193 = 15.944050876016886%
running mean_acc :  0.8287819602272727
running mean_loss :  0.37462318368461806


8900th iteration over 55193 = 16.12523327233526%
running mean_acc :  0.8290379213483146
running mean_loss :  0.37421239042396615


9000th iteration over 55193 = 16.306415668653635%
running mean_acc :  0.8291875
running mean_loss :  0.3739372930105508


9100th iteration over 55193 = 16.48759806497201%
running mean_acc :  0.8292548076923076
running mean_loss :  0.37372909144669


9200th iteration over 55193 = 16.668780461290382%
running mean_acc :  0.8293138586956522
running mean_loss :  0.37355730978173024


9300th iteration over 55193 = 16.849962857608755%
running mean_acc :  0.8295094086021505
running mean_loss :  0.37327828600805635


9400th iteration over 55193 = 17.03114525392713%
running mean_acc :  0.8296476063829787
running mean_loss :  0.37295652342427854


9500th iteration over 55193 = 17.212327650245502%
running mean_acc :  0.8297236842105263
running mean_loss :  0.3728142104374021


9600th iteration over 55193 = 17.393510046563875%
running mean_acc :  0.8298860677083333
running mean_loss :  0.3724864216907647


9700th iteration over 55193 = 17.57469244288225%
running mean_acc :  0.8300289948453609
running mean_loss :  0.37227123833746534


9800th iteration over 55193 = 17.755874839200622%
running mean_acc :  0.8302391581632653
running mean_loss :  0.3719628529676174


9900th iteration over 55193 = 17.937057235518996%
running mean_acc :  0.8304545454545454
running mean_loss :  0.3716269278594634


10000th iteration over 55193 = 18.11823963183737%
running mean_acc :  0.830584375
running mean_loss :  0.37136222475314784


10100th iteration over 55193 = 18.299422028155746%
running mean_acc :  0.8307394801980198
running mean_loss :  0.3710937310508449


10200th iteration over 55193 = 18.48060442447412%
running mean_acc :  0.8310079656862746
running mean_loss :  0.3706381165514435


10300th iteration over 55193 = 18.661786820792493%
running mean_acc :  0.8311529126213593
running mean_loss :  0.37037723188509747


10400th iteration over 55193 = 18.842969217110866%
running mean_acc :  0.8313852163461538
running mean_loss :  0.3699384372562826


10500th iteration over 55193 = 19.02415161342924%
running mean_acc :  0.831577380952381
running mean_loss :  0.36967037373946376


10600th iteration over 55193 = 19.205334009747613%
running mean_acc :  0.831777712264151
running mean_loss :  0.3694371431897287


10700th iteration over 55193 = 19.386516406065986%
running mean_acc :  0.8320210280373832
running mean_loss :  0.3690482467592581


10800th iteration over 55193 = 19.56769880238436%
running mean_acc :  0.8322135416666666
running mean_loss :  0.3688383135065449


10900th iteration over 55193 = 19.748881198702733%
running mean_acc :  0.8323767201834862
running mean_loss :  0.3684780834477305


11000th iteration over 55193 = 19.930063595021107%
running mean_acc :  0.8323863636363636
running mean_loss :  0.3683724893828802


11100th iteration over 55193 = 20.11124599133948%
running mean_acc :  0.8325647522522522
running mean_loss :  0.36810186229738157


11200th iteration over 55193 = 20.292428387657854%
running mean_acc :  0.8327483258928572
running mean_loss :  0.367718663112855


11300th iteration over 55193 = 20.47361078397623%
running mean_acc :  0.8329258849557523
running mean_loss :  0.3674687253516149


11400th iteration over 55193 = 20.654793180294604%
running mean_acc :  0.8329824561403508
running mean_loss :  0.36728449721886247


11500th iteration over 55193 = 20.835975576612977%
running mean_acc :  0.8331304347826087
running mean_loss :  0.3670682957187475


11600th iteration over 55193 = 21.01715797293135%
running mean_acc :  0.8332866379310345
running mean_loss :  0.36677502532419837


11700th iteration over 55193 = 21.198340369249724%
running mean_acc :  0.8334455128205128
running mean_loss :  0.36655066866654024


11800th iteration over 55193 = 21.379522765568097%
running mean_acc :  0.833583156779661
running mean_loss :  0.36635172177816727


11900th iteration over 55193 = 21.56070516188647%
running mean_acc :  0.8337211134453781
running mean_loss :  0.36605713122127015


12000th iteration over 55193 = 21.741887558204844%
running mean_acc :  0.8338541666666667
running mean_loss :  0.3657562892454712


12100th iteration over 55193 = 21.923069954523218%
running mean_acc :  0.8338972107438016
running mean_loss :  0.36570013218928976


12200th iteration over 55193 = 22.10425235084159%
running mean_acc :  0.8340855532786885
running mean_loss :  0.36537861288309526


12300th iteration over 55193 = 22.285434747159965%
running mean_acc :  0.8341996951219512
running mean_loss :  0.36520749957953885


12400th iteration over 55193 = 22.466617143478338%
running mean_acc :  0.8343220766129033
running mean_loss :  0.3649216666949842


12500th iteration over 55193 = 22.647799539796715%
running mean_acc :  0.8344325
running mean_loss :  0.3646980910151112


12600th iteration over 55193 = 22.82898193611509%
running mean_acc :  0.834469246031746
running mean_loss :  0.3646422273734904


12700th iteration over 55193 = 23.01016433243346%
running mean_acc :  0.8345767716535433
running mean_loss :  0.364505645545573


12800th iteration over 55193 = 23.191346728751835%
running mean_acc :  0.83478271484375
running mean_loss :  0.3642575886840001


12900th iteration over 55193 = 23.37252912507021%
running mean_acc :  0.8348885658914729
running mean_loss :  0.3640978643164627


13000th iteration over 55193 = 23.553711521388582%
running mean_acc :  0.8349543269230769
running mean_loss :  0.36400271393974765


13100th iteration over 55193 = 23.734893917706955%
running mean_acc :  0.835
running mean_loss :  0.36383731808360137


13200th iteration over 55193 = 23.91607631402533%
running mean_acc :  0.8350662878787879
running mean_loss :  0.36361225328633445


13300th iteration over 55193 = 24.097258710343702%
running mean_acc :  0.8351832706766917
running mean_loss :  0.3634435763254478


13400th iteration over 55193 = 24.278441106662076%
running mean_acc :  0.8352961753731343
running mean_loss :  0.36332289671067935


13500th iteration over 55193 = 24.45962350298045%
running mean_acc :  0.8354305555555556
running mean_loss :  0.3631034925636314


13600th iteration over 55193 = 24.640805899298822%
running mean_acc :  0.8355284926470589
running mean_loss :  0.36289987328600143


13700th iteration over 55193 = 24.8219882956172%
running mean_acc :  0.8356592153284672
running mean_loss :  0.3626599995176441


13800th iteration over 55193 = 25.003170691935573%
running mean_acc :  0.8357382246376811
running mean_loss :  0.36254547587227764


13900th iteration over 55193 = 25.184353088253946%
running mean_acc :  0.8358902877697841
running mean_loss :  0.3623340505248788


14000th iteration over 55193 = 25.36553548457232%
running mean_acc :  0.8359084821428572
running mean_loss :  0.3621460098578209


14100th iteration over 55193 = 25.546717880890693%
running mean_acc :  0.8360616134751773
running mean_loss :  0.3619459485284454


14200th iteration over 55193 = 25.727900277209066%
running mean_acc :  0.8361091549295775
running mean_loss :  0.36189008421939356


14300th iteration over 55193 = 25.90908267352744%
running mean_acc :  0.8361822552447552
running mean_loss :  0.361668428175975


14400th iteration over 55193 = 26.090265069845813%
running mean_acc :  0.8362847222222223
running mean_loss :  0.3613951057681987


14500th iteration over 55193 = 26.271447466164187%
running mean_acc :  0.836396551724138
running mean_loss :  0.36125466441785076


14600th iteration over 55193 = 26.45262986248256%
running mean_acc :  0.836382705479452
running mean_loss :  0.3612432080177434


14700th iteration over 55193 = 26.633812258800933%
running mean_acc :  0.8364732142857143
running mean_loss :  0.36108007669574543


14800th iteration over 55193 = 26.81499465511931%
running mean_acc :  0.8365561655405406
running mean_loss :  0.3609579569972791


14900th iteration over 55193 = 26.996177051437684%
running mean_acc :  0.8366988255033557
running mean_loss :  0.3607013573879188


15000th iteration over 55193 = 27.177359447756057%
running mean_acc :  0.83676875
running mean_loss :  0.36057164474877745


15100th iteration over 55193 = 27.35854184407443%
running mean_acc :  0.8368687913907285
running mean_loss :  0.3604213351303754


15200th iteration over 55193 = 27.539724240392804%
running mean_acc :  0.837008634868421
running mean_loss :  0.3602020033132747


15300th iteration over 55193 = 27.720906636711177%
running mean_acc :  0.837107843137255
running mean_loss :  0.35999724341417777


15400th iteration over 55193 = 27.90208903302955%
running mean_acc :  0.8372037337662338
running mean_loss :  0.35987387678786287


15500th iteration over 55193 = 28.083271429347924%
running mean_acc :  0.8373064516129032
running mean_loss :  0.3597422197266945


15600th iteration over 55193 = 28.264453825666298%
running mean_acc :  0.8373778044871795
running mean_loss :  0.35962921111503554


15700th iteration over 55193 = 28.44563622198467%
running mean_acc :  0.8374582006369427
running mean_loss :  0.3595367257413489


15800th iteration over 55193 = 28.626818618303044%
running mean_acc :  0.8375751582278481
running mean_loss :  0.3593946991111235


15900th iteration over 55193 = 28.808001014621418%
running mean_acc :  0.8376022012578617
running mean_loss :  0.35935741977898644


16000th iteration over 55193 = 28.989183410939795%
running mean_acc :  0.83771875
running mean_loss :  0.35919711790882525


16100th iteration over 55193 = 29.170365807258168%
running mean_acc :  0.8378299689440993
running mean_loss :  0.35902106860634864


16200th iteration over 55193 = 29.35154820357654%
running mean_acc :  0.8379398148148148
running mean_loss :  0.35886416852273706


16300th iteration over 55193 = 29.532730599894915%
running mean_acc :  0.838036809815951
running mean_loss :  0.35872002027009675


16400th iteration over 55193 = 29.71391299621329%
running mean_acc :  0.8381478658536585
running mean_loss :  0.35849240376048974


16500th iteration over 55193 = 29.895095392531662%
running mean_acc :  0.8381931818181818
running mean_loss :  0.35838946102957


16600th iteration over 55193 = 30.076277788850035%
running mean_acc :  0.8382981927710843
running mean_loss :  0.3582678530130729


16700th iteration over 55193 = 30.25746018516841%
running mean_acc :  0.8384113023952096
running mean_loss :  0.3580475865359878


16800th iteration over 55193 = 30.438642581486782%
running mean_acc :  0.8385342261904762
running mean_loss :  0.3578274710803761


16900th iteration over 55193 = 30.619824977805155%
running mean_acc :  0.8386020710059171
running mean_loss :  0.35763847808514565


17000th iteration over 55193 = 30.80100737412353%
running mean_acc :  0.8387334558823529
running mean_loss :  0.35737619518096025
