In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import math
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import random
import string
from sklearn.metrics import f1_score

from nltk.translate import nist_score
from nltk.translate import gleu_score
from nltk.translate import ribes_score
from nltk.translate import chrf_score

import similarity_score

In [2]:
def read_data(current_file: str):
    text_data = list()
    if os.path.exists(current_file):
        open_file = open(current_file, 'r', encoding="utf-8")
        f = open_file.read().split('\n')  
        list_data = [f[x:x+5] for x in range(0, len(f), 6)]
        df = pd.DataFrame(list_data, columns =['Source', 'Reference', "Candidate", "Bleu_Score", "Label" ]) 
        df["Bleu_Score"] = df["Bleu_Score"].apply(pd.to_numeric)

    else: "Error: file does not exist"
    return df

In [3]:
def calc_nist(row):
    return nist_score.corpus_nist([[row["Reference"].split(" ")]], [row["Candidate"].split(" ")])

def calc_gleu(row):
    return gleu_score.corpus_gleu([[row["Reference"].split(" ")]], [row["Candidate"].split(" ")])

def calc_ribes(row):
    return ribes_score.corpus_ribes([[row["Reference"].split(" ")]], [row["Candidate"].split(" ")])

def calc_chrf(row):
    return chrf_score.corpus_chrf([row["Reference"].split(" ")], [row["Candidate"].split(" ")])

def calc_avmax(row):
    model = similarity_score.load_model()
    score = similarity_score.score(row["Reference"],row["Candidate"],model)
    
# def calc_wer(row, print_matrix=False):
#     hyp = row["Candidate"]
#     ref = row["Reference"]
#     N = len(hyp)
#     M = len(ref)
#     L = np.zeros((N,M))
#     for i in range(0, N):
#         for j in range(0, M):
#             if min(i,j) == 0:
#                 L[i,j] = max(i,j)
#             else:
#                 deletion = L[i-1,j] + 1
#                 insertion = L[i,j-1] + 1
#                 sub = 1 if hyp[i] != ref[j] else 0
#                 substitution = L[i-1,j-1] + sub
#                 L[i,j] = min(deletion, min(insertion, substitution))
#                 # print("{} - {}: del {} ins {} sub {} s {}".format(hyp[i], ref[j], deletion, insertion, substitution, sub))
#     if print_matrix:
#         print("WER matrix ({}x{}): ".format(N, M))
#         print(L)
#     return int(L[N-1, M-1])

In [4]:
def add_features(df):
    df["Ref_Length"] = df["Reference"].apply(lambda x: len(x))
    df["Can_Length"] = df["Candidate"].apply(lambda x: len(x))
    df["Src_Length"] = df["Source"].apply(lambda x: len(x))
    df["target"] = df["Label"].apply(lambda x: 1 if x == "H" else 0)
    
    df["NIST_Score"] = df.apply(lambda row: calc_nist(row), axis=1)
    df["Gleu_Score"] = df.apply(lambda row: calc_gleu(row), axis=1)
    #df["Ribes_Score"] = df.apply(lambda row: calc_ribes(row), axis=1)
    df["Chrf_Score"] = df.apply(lambda row: calc_chrf(row), axis=1)
    #df["AvMax_Score"] = df.apply(lambda row: calc_avmax(row), axis=1)
#     df["Wer_Score"] = df.apply(lambda row: calc_wer(row), axis=1)


    return df

In [5]:
df = read_data("/Users/anthonycuturrufo/Documents/School/Artzi_Eval/train.txt")
dataset = add_features(df)

In [6]:
dataset.head()

Unnamed: 0,Source,Reference,Candidate,Bleu_Score,Label,Ref_Length,Can_Length,Src_Length,target,NIST_Score,Gleu_Score,Chrf_Score
0,巴林 公主 下 嫁 美 大兵 惊 世 婚姻 五 年 宣告 破裂,"bahraini princess marries us soldier , astonis...",bahraini princess marries a u.s. soldier ; ast...,0.3125,H,75,83,31,1,1.423246,0.173913,0.539032
1,"巴林 公主 梅 丽 安 ・ 哈 里 发 下 嫁 美国 陆 战 队 大兵 强 生 , 曾 获 ...",the star-crossed marriage between bahraini pri...,u.s. television stations had once feted the ma...,0.6531,H,234,251,129,1,3.941611,0.427711,0.700914
2,"梅 丽 安 是 海湾 国家 巴林 王室 的 成员 , 强 生 冒 着 赔 上 军旅 生涯 的...",meriam is a member of the gulf country bahrain...,"meri gulf state of bahrain , the royal family ...",0.3784,M,198,183,111,0,1.663802,0.10274,0.350299
3,"不过 , 据 拉 斯 维 加 斯 评论 报 报导 , 才 过 了 五 年 , 两 人 就 对...",but the las vegas review-journal reported that...,"however , according to the las vegas , comment...",0.3646,M,220,174,106,0,1.998639,0.171233,0.361169
4,"他们 两 人 在 一九九九年 相遇 , 当时 强 生 还是 职业 军人 , 派 驻 在 巴林 .",the pair met in 1999 when career military man ...,"the two met in 1999 , when johnson was still a...",0.7778,H,80,89,48,1,3.045229,0.409091,0.782057


In [7]:
numerical_columns = ['Chrf_Score','Gleu_Score','Bleu_Score','Ref_Length','Can_Length','Src_Length']
output_label = ['target']

In [8]:
numerical_data = np.stack([dataset[col].values for col in numerical_columns], 1)
numerical_data = torch.tensor(numerical_data, dtype=torch.float)
outputs = torch.tensor(dataset[output_label].values).flatten()

In [9]:
total_records = 584
test_records = int(total_records * .15)

numerical_train_data = numerical_data[:total_records-test_records]
numerical_test_data = numerical_data[total_records-test_records:total_records]
train_outputs = outputs[:total_records-test_records]
test_outputs = outputs[total_records-test_records:total_records]

In [10]:
class Model(nn.Module):

    def __init__(self, num_numerical_cols, output_size, layers, p=0.4):
        super().__init__()
        
        self.batch_norm_num = nn.BatchNorm1d(num_numerical_cols)
        input_size = num_numerical_cols
        all_layers = []
        for i in layers:
            all_layers.append(nn.Linear(input_size, i))
            all_layers.append(nn.ReLU(inplace=True))
            all_layers.append(nn.BatchNorm1d(i))
            all_layers.append(nn.Dropout(p))
            input_size = i
        
        all_layers.append(nn.Linear(layers[-1], output_size))
        self.layers = nn.Sequential(*all_layers)
        
    def forward(self, x_numerical):     
        x_numerical = self.batch_norm_num(x_numerical)
        x = x_numerical
        x = self.layers(x)
        return x

In [11]:
model = Model(numerical_data.shape[1], 2, [100,50,25], p=0.4)

#loss_function = nn.CrossEntropyLoss(weight = torch.Tensor([1.0, 1.1]))
loss_function = nn.CrossEntropyLoss()
#loss_function = nn.NLLLoss()
#loss_function = nn.BCEWithLogitsLoss()


#optimizer = torch.optim.SGD(model.parameters(), lr=5e-4, momentum=.9)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)

epochs = 20000
aggregated_losses = []

for i in range(epochs):
    i += 1
    y_pred = model(numerical_train_data)
    single_loss = loss_function(y_pred, train_outputs)
    aggregated_losses.append(single_loss)

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
    
    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.74490833
epoch:  26 loss: 0.72360450
epoch:  51 loss: 0.67457569
epoch:  76 loss: 0.65862125
epoch: 101 loss: 0.63180447
epoch: 126 loss: 0.62117970
epoch: 151 loss: 0.58153731
epoch: 176 loss: 0.60210782
epoch: 201 loss: 0.57821691
epoch: 226 loss: 0.56562877
epoch: 251 loss: 0.56600058
epoch: 276 loss: 0.55627966
epoch: 301 loss: 0.55889189
epoch: 326 loss: 0.53891736
epoch: 351 loss: 0.54520136
epoch: 376 loss: 0.51729578
epoch: 401 loss: 0.53218031
epoch: 426 loss: 0.52084929
epoch: 451 loss: 0.52865058
epoch: 476 loss: 0.49797702
epoch: 501 loss: 0.50828910
epoch: 526 loss: 0.51344401
epoch: 551 loss: 0.49943775
epoch: 576 loss: 0.51492643
epoch: 601 loss: 0.47842064
epoch: 626 loss: 0.50445056
epoch: 651 loss: 0.49704301
epoch: 676 loss: 0.50963145
epoch: 701 loss: 0.48419082
epoch: 726 loss: 0.48984423
epoch: 751 loss: 0.50243688
epoch: 776 loss: 0.49096698
epoch: 801 loss: 0.50001919
epoch: 826 loss: 0.47625959
epoch: 851 loss: 0.49316213
epoch: 876 loss: 0.4

epoch: 7126 loss: 0.41162547
epoch: 7151 loss: 0.39522994
epoch: 7176 loss: 0.40605253
epoch: 7201 loss: 0.39341182
epoch: 7226 loss: 0.40299100
epoch: 7251 loss: 0.39272180
epoch: 7276 loss: 0.38030550
epoch: 7301 loss: 0.40726402
epoch: 7326 loss: 0.38895378
epoch: 7351 loss: 0.42493680
epoch: 7376 loss: 0.37836385
epoch: 7401 loss: 0.39661828
epoch: 7426 loss: 0.39231256
epoch: 7451 loss: 0.38331768
epoch: 7476 loss: 0.39751291
epoch: 7501 loss: 0.38861156
epoch: 7526 loss: 0.41570920
epoch: 7551 loss: 0.39749393
epoch: 7576 loss: 0.39200220
epoch: 7601 loss: 0.38624430
epoch: 7626 loss: 0.38831657
epoch: 7651 loss: 0.36804757
epoch: 7676 loss: 0.38639703
epoch: 7701 loss: 0.39095077
epoch: 7726 loss: 0.38090408
epoch: 7751 loss: 0.38171390
epoch: 7776 loss: 0.37526843
epoch: 7801 loss: 0.38740167
epoch: 7826 loss: 0.37251329
epoch: 7851 loss: 0.39687827
epoch: 7876 loss: 0.37296417
epoch: 7901 loss: 0.37441275
epoch: 7926 loss: 0.35852718
epoch: 7951 loss: 0.38085386
epoch: 7976 lo

epoch: 14051 loss: 0.30238032
epoch: 14076 loss: 0.30881089
epoch: 14101 loss: 0.30277127
epoch: 14126 loss: 0.29883534
epoch: 14151 loss: 0.31569558
epoch: 14176 loss: 0.31851259
epoch: 14201 loss: 0.32955855
epoch: 14226 loss: 0.32294106
epoch: 14251 loss: 0.31553134
epoch: 14276 loss: 0.29600471
epoch: 14301 loss: 0.34810576
epoch: 14326 loss: 0.34474409
epoch: 14351 loss: 0.29201445
epoch: 14376 loss: 0.34080014
epoch: 14401 loss: 0.32965249
epoch: 14426 loss: 0.31219721
epoch: 14451 loss: 0.32600152
epoch: 14476 loss: 0.30810276
epoch: 14501 loss: 0.30783957
epoch: 14526 loss: 0.32098442
epoch: 14551 loss: 0.32346919
epoch: 14576 loss: 0.34977940
epoch: 14601 loss: 0.38288289
epoch: 14626 loss: 0.30446425
epoch: 14651 loss: 0.32216936
epoch: 14676 loss: 0.32334775
epoch: 14701 loss: 0.30047119
epoch: 14726 loss: 0.34497711
epoch: 14751 loss: 0.34260792
epoch: 14776 loss: 0.32682589
epoch: 14801 loss: 0.31360558
epoch: 14826 loss: 0.34289911
epoch: 14851 loss: 0.32878727
epoch: 148

In [12]:
#prints out validation loss
with torch.no_grad():
    validation_y_val = model(numerical_test_data)
    loss = loss_function(validation_y_val, test_outputs)
print(f'Loss: {loss:.8f}')

Loss: 1.38791418


In [13]:
#prints out training loss
with torch.no_grad():
    train_y_val = model(numerical_train_data)
    loss = loss_function(train_y_val, train_outputs)
print(f'Loss: {loss:.8f}')

Loss: 0.29698852


In [14]:
validation_y_output = np.argmax(validation_y_val, axis=1)
train_y_output = np.argmax(train_y_val, axis=1) 

validation_accuracy = np.mean(validation_y_output.numpy() == test_outputs.numpy())
validation_f1 = f1_score(validation_y_output.numpy(), test_outputs.numpy(), average='weighted')

train_accuracy = np.mean(train_y_output.numpy() == train_outputs.numpy())
train_f1 = f1_score(train_y_output.numpy(), train_outputs.numpy(), average='weighted')

print("Validation Accuracy: ", validation_accuracy, "- F1 Score = ", validation_f1)
print("Train Accuracy: ", train_accuracy, "- F1 Score = ", train_f1)

Validation Accuracy:  0.7126436781609196 - F1 Score =  0.7121118885368947
Train Accuracy:  0.8631790744466801 - F1 Score =  0.8632361746005475


# Test Set (F1 Score Evaluation)

In [15]:
test_df = read_data("/Users/anthonycuturrufo/Documents/School/Artzi_Eval/test.txt")

In [16]:
test_dataset = add_features(test_df)

In [17]:
testset_numerical_data = np.stack([test_dataset[col].values for col in numerical_columns], 1)
testset_numerical_data = torch.tensor(testset_numerical_data, dtype=torch.float)

testset_outputs = torch.tensor(test_dataset[output_label].values).flatten()

In [18]:
#prints out testing loss
with torch.no_grad():
    y_val = model(testset_numerical_data)
    loss = loss_function(y_val, testset_outputs)
print(f'Loss: {loss:.8f}')

Loss: 0.61506683


In [19]:
y_output = np.argmax(y_val, axis=1)
accuracy = np.mean(y_output.numpy() == testset_outputs.numpy())
f1 = f1_score(y_output.numpy(), testset_outputs.numpy(), average='weighted')
print("Testing Accuracy: ",accuracy, "- F1 Score = ", f1)

Testing Accuracy:  0.7701149425287356 - F1 Score =  0.7712201591511936


In [20]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print(confusion_matrix(testset_outputs,y_output))
print(classification_report(testset_outputs,y_output))
print(accuracy_score(testset_outputs, y_output))

[[58 24]
 [16 76]]
              precision    recall  f1-score   support

           0       0.78      0.71      0.74        82
           1       0.76      0.83      0.79        92

    accuracy                           0.77       174
   macro avg       0.77      0.77      0.77       174
weighted avg       0.77      0.77      0.77       174

0.7701149425287356
