In [None]:
import io, sys
import fasttext
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from similarity.normalized_levenshtein import NormalizedLevenshtein
from similarity.cosine import Cosine
from similarity.jaccard import Jaccard
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn.parameter import Parameter
from torch.optim import Adam, SGD

In [None]:
# -*- coding: UTF-8 -*-
# The path to the local git repo for Indic NLP library
INDIC_NLP_LIB_HOME=r"<PATH TO INDIC NLP Src>" # eg. "/../../../indic_nlp_library/src"
# The path to the local git repo for Indic NLP Resources
INDIC_NLP_RESOURCES=r"<PATH TO INDIC NLP resouces>" # eg. "../../../indic_nlp_resources"

import sys
import io

sys.path.append(format(INDIC_NLP_LIB_HOME))

import indicnlp
from indicnlp import common

#common.init()
common.set_resources_path(INDIC_NLP_RESOURCES)

from indicnlp import loader
#=======Load API=======#
loader.load()
#=======Load API=======#
from indicnlp.script import  phonetic_sim as psim
from indicnlp.script import  indic_scripts as isc
from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator

# Feedforward Neural Network Definition

In [None]:
class DeepLayer(nn.Module):
    def __init__(self, inputDimension, outputDimension ):

        super(DeepLayer,self).__init__()

        self.inputDimension = inputDimension
        self.outputDimension = outputDimension

        self.linear1 = nn.Linear(self.inputDimension, self.inputDimension * 2, bias = True)
        self.nonLinear1 = nn.ReLU()
        self.linear2 = nn.Linear(self.inputDimension  * 2, self.outputDimension, bias = True)

        nn.init.xavier_uniform_(self.linear1.weight)
        nn.init.xavier_uniform_(self.linear2.weight)

    def forward(self, x_in):

        output = self.linear2(self.nonLinear1(self.linear1(x_in)))
        return output
    
class FFNN(nn.Module):
    def __init__(self, inputDimension, outputDimension):
        super(FFNN,self).__init__()

        self.inputDimension = inputDimension
        self.outputDimension = outputDimension

        self.layer = DeepLayer(self.inputDimension, self.outputDimension)

        self.bce_loss = nn.BCEWithLogitsLoss()
        self.sigmoid = nn.Sigmoid()

    def loss(self, x, y):

        outputScores = self.layer(x)
        loss = self.bce_loss(outputScores, y)

        prob_output = self.sigmoid(outputScores)
        return loss, prob_output, y

    def forward(self, x):

        outputScores = self.layer(x)
        prob_output = self.sigmoid(outputScores)
        
        return prob_output


In [None]:
jaccard = Jaccard(2)
def jsim(w1, w2):
    return jaccard.similarity(w1,w2)

cosine = Cosine(2)
normalized_levenshtein = NormalizedLevenshtein()

def wls(w1, w2):
    cosineSim = cosine.similarity(w1,w2)
    normSim = normalized_levenshtein.similarity(w1, w2)
    finalScore = 0.75*normSim+0.25*cosineSim
    return finalScore

def phonetic(w1, w2, lang):
    charVector = np.zeros(38)
    for c in w1:
        charVector += isc.get_phonetic_feature_vector(c,"hi")
    hinVec = charVector/len(w1)
    charVector = np.zeros(38)
    for c in w2:
        charTrans = UnicodeIndicTransliterator.transliterate(c,"hi",lang)
        charVector += isc.get_phonetic_feature_vector(charTrans,lang)
    tgtlangVec = charVector/len(w2)
    sim = psim.cosine(hinVec,tgtlangVec)
#     print(hinVec, tgtlangVec)
    return sim

def phoneticV(w1, w2, lang):
    charVector = np.zeros(38)
    for c in w1:
        charVector += isc.get_phonetic_feature_vector(c,"hi")
    hinVec = charVector/len(w1)
    charVector = np.zeros(38)
    for c in w2:
        charTrans = UnicodeIndicTransliterator.transliterate(c,"hi",lang)
        charVector += isc.get_phonetic_feature_vector(charTrans,lang)
    tgtlangVec = charVector/len(w2)
    sim = psim.cosine(hinVec,tgtlangVec)
#     print(hinVec, tgtlangVec)
    return hinVec, tgtlangVec

def crossVec(w1, w2, himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang):
    if w1 in hiDict:
        hi_index = hiDict[ w1 ]
        hiVec = hiCrossModel[ hi_index ]
    else:
        hiVec = himodel.get_word_vector(w1)
    
    if w2 in tgtDict:
        tgt_index = tgtDict[ w2 ]
        tgtVec = tgtCrossModel[ tgt_index ]
    else:
        tgtVec = tgtmodel.get_word_vector(w2)
    return hiVec, tgtVec

## Dataset Preparation for Similarity scores or Vectors

In [None]:
def prepData(file, simType, himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang):
    X = []
    y = []
    with io.open(file, "r") as infile:
        for line in infile:
            word_pairs = []
            line = line.strip()
            line = line.split(";")
#             word1, word2, con1, con2, label, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12, g13, g14, g15, g16, g17, g18 = line[3].strip(), line[4].strip(), line[5].strip(), line[6].strip(), line[25].strip(), line[7].strip(), line[8].strip(), line[9].strip(), line[10].strip(), line[11].strip(), line[12].strip(), line[13].strip(), line[14].strip(), line[15].strip(), line[16].strip(), line[17].strip(), line[18].strip(), line[19].strip(), line[20].strip(), line[21].strip(), line[22].strip(), line[23].strip(), line[24].strip()
            word1, word2, con1, con2, label, g1, g2, g3, g4, g5, g6, g7, g8 = line[2].strip(), line[3].strip(), line[4].strip(), line[5].strip(), line[14].strip(), line[6].strip(), line[7].strip(), line[8].strip(), line[9].strip(), line[10].strip(), line[11].strip(), line[12].strip(), line[13].strip()
#             gaze_list = [float(g1), float(g2), float(g3), float(g4), float(g5), float(g6), float(g7), float(g8), float(g9), float(g10), float(g11), float(g12), float(g13), float(g14), float(g15), float(g16), float(g17), float(g18)]
            gaze_list = [float(g1), float(g2), float(g3), float(g4), float(g5), float(g6), float(g7), float(g8)]
            if(simType == "JSim"):
                wSim = jsim(word1, word2)
                con1 = con1.split()
                con2 = con2.split()
                score = 0.0
                for conWord1 in con1:
                    for conWord2 in con2:
                        score = score + jsim(conWord1, conWord2)
                conSim = score / (len(con1)*len(con2)*1.0)
                finalList = [wSim, conSim]
            elif(simType == "WLS"):
                wSim = wls(word1, word2)
                con1 = con1.split()
                con2 = con2.split()
                score = 0.0
                for conWord1 in con1:
                    for conWord2 in con2:
                        score = score + wls(conWord1, conWord2)
                conSim = score / (len(con1)*len(con2)*1.0)
                finalList = [wSim, conSim]
            elif(simType == "WLSGaze"):
                wSim = wls(word1, word2)
                con1 = con1.split()
                con2 = con2.split()
                score = 0.0
                for conWord1 in con1:
                    for conWord2 in con2:
                        score = score + wls(conWord1, conWord2)
                conSim = score / (len(con1)*len(con2)*1.0)
#                 finalList = [wSim, conSim, float(g1), float(g2), float(g3), float(g4), float(g5), float(g6), float(g7), float(g8)]
                finalList = [wSim, conSim, float(g1), float(g2), float(g3), float(g4), float(g5), float(g6), float(g7), float(g8), float(g9), float(g10), float(g11), float(g12), float(g13), float(g14), float(g15), float(g16), float(g17), float(g18)]
            elif(simType == "Phonetic"):
                wSim = phonetic(word1, word2, lang)
                con1 = con1.split()
                con2 = con2.split()
                score = 0.0
                for conWord1 in con1:
                    for conWord2 in con2:
                        score = score + phonetic(conWord1, conWord2, lang)
                conSim = score / (len(con1)*len(con2)*1.0)
                finalList = [wSim, conSim]
            elif(simType == "PhoneticGaze"):
                wSim = phonetic(word1, word2, lang)
                con1 = con1.split()
                con2 = con2.split()
                score = 0.0
                for conWord1 in con1:
                    for conWord2 in con2:
                        score = score + phonetic(conWord1, conWord2, lang)
                conSim = score / (len(con1)*len(con2)*1.0)
#                 finalList = [wSim, conSim, float(g1), float(g2), float(g3), float(g4), float(g5), float(g6), float(g7), float(g8)]
                finalList = [wSim, conSim, float(g1), float(g2), float(g3), float(g4), float(g5), float(g6), float(g7), float(g8), float(g9), float(g10), float(g11), float(g12), float(g13), float(g14), float(g15), float(g16), float(g17), float(g18)]    
            elif(simType == "PhoneticV"):
                vec1, vec2 = phoneticV(word1, word2, lang)
                con1 = con1.split()
                con2 = con2.split()
                fcVec1 = np.zeros(38)
                fcVec2 = np.zeros(38)
                for conWord1 in con1:
                    for conWord2 in con2:
                        cVec1, cVec2 = phoneticV(conWord1, conWord2, lang)
                        fcVec1 += cVec1
                        fcVec2 += cVec2
                fcVec1 = fcVec1 / (len(con1)*1.0)
                fcVec2 = fcVec2 / (len(con2)*1.0)
                finalList = np.concatenate((vec1, vec2, fcVec1, fcVec2))
            elif(simType == "PhoneticVGaze"):
                vec1, vec2 = phoneticV(word1, word2, lang)
                con1 = con1.split()
                con2 = con2.split()
                fcVec1 = np.zeros(38)
                fcVec2 = np.zeros(38)
                for conWord1 in con1:
                    for conWord2 in con2:
                        cVec1, cVec2 = phoneticV(conWord1, conWord2, lang)
                        fcVec1 += cVec1
                        fcVec2 += cVec2
                fcVec1 = fcVec1 / (len(con1)*1.0)
                fcVec2 = fcVec2 / (len(con2)*1.0)
#                 finalGaze = np.array(gaze_list).reshape(8,)
                finalGaze = np.array(gaze_list).reshape(18,)
                finalList = np.concatenate((vec1, vec2, fcVec1, fcVec2, finalGaze))
            elif(simType == "monolingual"):
                vec1, vec2 = monolingual(word1, word2, himodel, tgtmodel)
                con1 = con1.split()
                con2 = con2.split()
                fcVec1 = np.zeros(50)
                fcVec2 = np.zeros(50)
                for conWord1 in con1:
                    for conWord2 in con2:
                        cVec1, cVec2 = monolingual(conWord1, conWord2, himodel, tgtmodel)
                        fcVec1 += cVec1
                        fcVec2 += cVec2
                fcVec1 = fcVec1 / (len(con1)*1.0)
                fcVec2 = fcVec2 / (len(con2)*1.0)
                finalList = np.concatenate((vec1, vec2, fcVec1, fcVec2))
            elif(simType == "crossSim"):
                finalList = [s1, s2, s3, s4]
            elif(simType == "crossVec"):
#                 print("Started preparing data for Hi-"+lang+"!")
                vec1, vec2 = crossVec(word1, word2, himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang)
                con1 = con1.split()
                con2 = con2.split()
                fcVec1 = np.zeros(50)
                fcVec2 = np.zeros(50)
                for conWord1 in con1:
                    for conWord2 in con2:
                        cVec1, cVec2 = crossVec(conWord1.strip(), conWord2.strip(), himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang)
                        cVec1 = cVec1.astype(float)
                        cVec2 = cVec2.astype(float)
                        fcVec1 += cVec1
                        fcVec2 += cVec2
                fcVec1 = fcVec1 / (len(con1)*1.0)
                fcVec2 = fcVec2 / (len(con2)*1.0)
                finalList = np.concatenate((vec1, vec2, fcVec1, fcVec2))
#                 print(finalList)
            elif(simType == "crossVecGaze"):
                writeFile2 = io.open
#                 print("Started preparing data for Hi-"+lang+"!")
                vec1, vec2 = crossVec(word1, word2, himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang)
                con1 = con1.split()
                con2 = con2.split()
                fcVec1 = np.zeros(50)
                fcVec2 = np.zeros(50)
                for conWord1 in con1:
                    for conWord2 in con2:
                        cVec1, cVec2 = crossVec(conWord1.strip(), conWord2.strip(), himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang)
                        cVec1 = cVec1.astype(float)
                        cVec2 = cVec2.astype(float)
                        fcVec1 += cVec1
                        fcVec2 += cVec2
                fcVec1 = fcVec1 / (len(con1)*1.0)
                fcVec2 = fcVec2 / (len(con2)*1.0)                
                finalGaze = np.array(gaze_list).reshape(8,)
#                 finalGaze = np.array(gaze_list).reshape(18,)
                finalList = np.concatenate((vec1, vec2, fcVec1, fcVec2, finalGaze))
                
            X.append(finalList)
            y.append(int(label))
#     print(len(X), len(y))
    X = np.asarray(X, dtype='float32')
    y = np.asarray(y, dtype='int32')
    return X, y

In [None]:
import os
import codecs

def load_embeddings(file_name):
    dictionary = dict()
    reverseDict = []

    wv = []
    dimension = 0
    with codecs.open(file_name, 'r', 'utf-8',errors='ignore') as f_in:
        for line in f_in:
            line = line.strip()

            if line:
                if len(line.split(' ')) == 2:
                    continue

                vocabulary = line.split(' ')[0]
                if vocabulary.lower() not in dictionary:
                    temp = []
                    dictionary[vocabulary.lower()] = len(dictionary)
                    reverseDict.append(vocabulary.lower())

                    if dimension == 0:
                        dimension = len(line.split(' ')[1:])

                    if dimension != len(line.split(' ')[1:]):
                        print(line)
                        print(str(dimension) +"\t" + str(len(line.split(' ')[1:])))
                        exit()
                    for i in line.split(' ')[1:]:
                        temp.append(float(i))

                    wv.append(temp)

    wordEmbedding = np.array(wv)

    return wordEmbedding, dictionary, reverseDict, wordEmbedding.shape[0], dimension

In [31]:
def batch(iterable1, iterable2, n=1):
    l = iterable1.shape[0]
    for ndx in range(0, l, n):
        yield iterable1[ndx:min(ndx + n, l)], iterable2[ndx:min(ndx + n, l)]

## Without Gaze

In [None]:
langs = ['mr']
# methods = ['FF', 'SVM', 'SVMP', 'SVMG', 'LR']
methods = ['FF']
sims = ["crossVec"]

with io.open("EACL_CrossVec_Results.csv", "w+") as outfile:
    outfile.write("Language Pair;Similarity;Method;C;Folds;Precision;Recall;F-Score\n")
    for lang in langs:
        himodel = fasttext.load_model("../fastText-0.2.0/models/himodel50.bin")
        tgtmodel = fasttext.load_model("../fastText-0.2.0/models/"+lang+"model50.bin")
        hiCrossModel, hiDict, hiRevDict, NS, embDim = load_embeddings("../data/MUSE/hi"+lang+"/vectors-hi.txt")
        tgtCrossModel, tgtDict, tgtRevDict, tgtNS, tgtembDim = load_embeddings("../MUSE/hi"+lang+"/vectors-"+lang+".txt")
        for sim in sims:
            for mlmethod in methods:
                X,y = prepData("../data/D1/D1.csv", sim, himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang)
                vals = [1e-2, 1e-1, 1]
                for c in vals:
                    n = 5
                    skf = StratifiedKFold(n_splits=n, random_state=42)
                    skf.get_n_splits(X, y)
                
                    fprecision = 0
                    frecall = 0
                    ffscore = 0
                    for train_index, test_index in skf.split(X, y):
                    #     print("TRAIN:", train_index, "TEST:", test_index)
                        
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y[train_index], y[test_index]
                        if(mlmethod == "SVM"):
                            svclassifier = SVC(kernel='linear', C=c)
                        elif(mlmethod == "SVMP"):
                            svclassifier = SVC(kernel='poly', C=c)
                        elif(mlmethod == "SVMG"):
                            svclassifier = SVC(kernel='rbf', C=c)
                        elif(mlmethod == "LR"):
                            svclassifier = LogisticRegression(C=c)
                        elif(mlmethod == "FF"):
                            ff_nn = FFNN(X_train.shape[1], 1)
                            ff_nn.cuda()
                            initial_lr = 0.4
                            optim = SGD(ff_nn.parameters(), lr=initial_lr, momentum=0.04, weight_decay=0.0, nesterov=True)
                  
                        if(mlmethod == 'FF'):
                            for epoch in range(1, 20):
                                for inputs, labels in batch(X_train, y_train, 20):
                                    input_tensor = torch.from_numpy(inputs).cuda()
                                    output_tensor = torch.FloatTensor(labels).cuda().unsqueeze(1)
                                    
                                    optim.zero_grad()
                                    loss, _, _ = ff_nn.loss(input_tensor, output_tensor)

                                    loss.backward()
                                    optim.step()
                                    
                                lrate = initial_lr * np.exp(-0.1 * epoch)
                                optim = SGD(ff_nn.parameters(), lr=lrate, momentum=0.04, weight_decay=0.0, nesterov=True)

                            
                            ff_nn.eval()
                            true = []
                            predicted = []
                            
                            for inputs, labels in batch(X_test, y_test, 20):
                                input_tensor = torch.from_numpy(inputs).cuda()
                                
                                true_labels = []
                                for i in range(labels.shape[0]):
                                    true_labels.append(int(labels[i]))
                                true.extend(true_labels)

                                optim.zero_grad()
                                pred_labels = ff_nn.forward(input_tensor)
                                pred_output = pred_labels.data.cpu().numpy()
                                pred = []

                                for i in range(len(pred_output)):
                                    if pred_output[i] >= 0.5:
                                        pred.append(1)
                                    else:
                                        pred.append(0)
                                predicted.extend(pred)
                            
                            precision = precision_score(true, predicted, average='weighted')
                            recall = recall_score(true, predicted, average='weighted')
                            fscore = f1_score(true, predicted, average='weighted')
                            fprecision  = fprecision + precision
                            frecall = frecall + recall
                            ffscore = ffscore + fscore
                
                        else:
                            svclassifier.fit(X_train, y_train)
                            y_pred = svclassifier.predict(X_test)
                            precision = precision_score(y_test, y_pred, average='weighted')
                            recall = recall_score(y_test, y_pred, average='weighted')
                            fscore = f1_score(y_test, y_pred, average='weighted')
                            fprecision  = fprecision + precision
                            frecall = frecall + recall
                            ffscore = ffscore + fscore
                    fprecision = fprecision / n
                    frecall = frecall / n
                    ffscore = ffscore / n
                    outfile.write("hi-"+lang+";"+sim+";"+mlmethod+";"+str(c)+";"+str(n)+";"+str(fprecision)+";"+str(frecall)+";"+str(ffscore)+"\n")
#                     print("hi-"+lang+";"+sim+";"+mlmethod+";"+str(c)+";"+str(n)+";"+str(fprecision)+";"+str(frecall)+";"+str(ffscore))
                    outfile.flush()

## With Gaze

In [None]:
langs = ['mr']
# methods = ['FF', 'SVM', 'SVMP', 'SVMG', 'LR']
methods = ['SVM', 'LR']
sims = ["crossVecGaze"]

with io.open("CoNLLCrossVecGazePredictionResults.csv", "w+") as outfile:
    outfile.write("Language Pair;Similarity;Method;C;Folds;Precision;Recall;F-Score\n")
    for lang in langs:
        himodel = fasttext.load_model("../fastText-0.2.0/models/himodel50.bin")
        tgtmodel = fasttext.load_model("../fastText-0.2.0/models/"+lang+"model50.bin")
        hiCrossModel, hiDict, hiRevDict, NS, embDim = load_embeddings("../MUSE/hi"+lang+"/vectors-hi.txt")
        tgtCrossModel, tgtDict, tgtRevDict, tgtNS, tgtembDim = load_embeddings("../MUSE/hi"+lang+"/vectors-"+lang+".txt")
        for sim in sims:
            for mlmethod in methods:
                X,y = prepData("../data/D2/D2.csv", sim, himodel, tgtmodel, hiCrossModel, hiDict, tgtCrossModel, tgtDict, lang)
                vals = [1e-2, 1e-1, 1]
                for c in vals:
                    n = 5
                    skf = StratifiedKFold(n_splits=n, random_state=42)
                    skf.get_n_splits(X, y)
                
                    fprecision = 0
                    frecall = 0
                    ffscore = 0
                    for train_index, test_index in skf.split(X, y):
                    #     print("TRAIN:", train_index, "TEST:", test_index)
                        
                        X_train, X_test = X[train_index], X[test_index]
                        y_train, y_test = y[train_index], y[test_index]
                        if(mlmethod == "SVM"):
                            svclassifier = SVC(kernel='linear', C=c)
                        elif(mlmethod == "SVMP"):
                            svclassifier = SVC(kernel='poly', C=c)
                        elif(mlmethod == "SVMG"):
                            svclassifier = SVC(kernel='rbf', C=c)
                        elif(mlmethod == "LR"):
                            svclassifier = LogisticRegression(C=c)
                        elif(mlmethod == "FF"):
                            ff_nn = FFNN(X_train.shape[1], 1)
                            ff_nn.cuda()
                            initial_lr = 0.4
                            optim = SGD(ff_nn.parameters(), lr=initial_lr, momentum=0.04, weight_decay=0.0, nesterov=True)
                  
                        if(mlmethod == 'FF'):
                            for epoch in range(1, 20):
                                for inputs, labels in batch(X_train, y_train, 20):
                                    input_tensor = torch.from_numpy(inputs).cuda()
                                    output_tensor = torch.FloatTensor(labels).cuda().unsqueeze(1)
                                    
                                    optim.zero_grad()
                                    loss, _, _ = ff_nn.loss(input_tensor, output_tensor)

                                    loss.backward()
                                    optim.step()
                                    
                                lrate = initial_lr * np.exp(-0.1 * epoch)
                                optim = SGD(ff_nn.parameters(), lr=lrate, momentum=0.04, weight_decay=0.0, nesterov=True)

                            
                            ff_nn.eval()
                            true = []
                            predicted = []
                            
                            for inputs, labels in batch(X_test, y_test, 20):
                                input_tensor = torch.from_numpy(inputs).cuda()
                                
                                true_labels = []
                                for i in range(labels.shape[0]):
                                    true_labels.append(int(labels[i]))
                                true.extend(true_labels)

                                optim.zero_grad()
                                pred_labels = ff_nn.forward(input_tensor)
                                pred_output = pred_labels.data.cpu().numpy()
                                pred = []

                                for i in range(len(pred_output)):
                                    if pred_output[i] >= 0.5:
                                        pred.append(1)
                                    else:
                                        pred.append(0)
                                predicted.extend(pred)
                            
                            precision = precision_score(true, predicted, average='weighted')
                            recall = recall_score(true, predicted, average='weighted')
                            fscore = f1_score(true, predicted, average='weighted')
                            fprecision  = fprecision + precision
                            frecall = frecall + recall
                            ffscore = ffscore + fscore
                
                        else:
                            svclassifier.fit(X_train, y_train)
                            y_pred = svclassifier.predict(X_test)
                            precision = precision_score(y_test, y_pred, average='weighted')
                            recall = recall_score(y_test, y_pred, average='weighted')
                            fscore = f1_score(y_test, y_pred, average='weighted')
                            fprecision  = fprecision + precision
                            frecall = frecall + recall
                            ffscore = ffscore + fscore
                    fprecision = fprecision / n
                    frecall = frecall / n
                    ffscore = ffscore / n
                    outfile.write("hi-"+lang+";"+sim+";"+mlmethod+";"+str(c)+";"+str(n)+";"+str(fprecision)+";"+str(frecall)+";"+str(ffscore)+"\n")
#                     print("hi-"+lang+";"+sim+";"+mlmethod+";"+str(c)+";"+str(n)+";"+str(fprecision)+";"+str(frecall)+";"+str(ffscore))
                    outfile.flush()