In [1]:
import fastai
from fastai import *
from fastai.text import * 
from fastai.text.models import *
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
import swifter
import nltk
from nltk.corpus import stopwords
import re 
from bs4 import BeautifulSoup

from nltk.stem.snowball import SnowballStemmer

In [2]:
stops={}

In [3]:
#PREPROCEESING THE TEXT

def text_preprocessing(text, language, minWordSize):
    
    # remove html
    text_html = BeautifulSoup(text,"html.parser" ).get_text()
    
    # remove non-letters
    text_non_letters = re.sub("[^a-zA-Z']", " ", text_html) 
        
    # convert to lower-case
    text_lower = text_non_letters.lower()
    
    # remove stop words
    text_no_stop_words = ' '
    for w in text_lower.split():
        if w not in stops:  
            text_no_stop_words = text_no_stop_words + w + ' '  
    
    # do stemming
    text_stemmer = ' '
    stemmer = SnowballStemmer(language)
    for w in text_no_stop_words.split():
        text_stemmer = text_stemmer + stemmer.stem(w) + ' '
         
    # remove short words
    text_no_short_words = ' '
    for w in text_stemmer.split(): 
        if len(w) >=minWordSize:
            text_no_short_words = text_no_short_words + w + ' '
    return text_no_short_words

In [4]:
#PREPROCEESING THE TEXT

def text_preprocessing_simple(text, language, minWordSize):
    
    # remove html
    text_html = BeautifulSoup(text,"html.parser" ).get_text()
    
    # remove non-letters
    text_non_letters = re.sub("[^a-zA-Z']", " ", text_html) 
        
    # convert to lower-case
    text_lower = text_non_letters.lower()
    return text_lower

In [5]:
def change_ratings(x):
    if x in [0, 1,2]:
        return -1
    elif x == 3:
        return 0
    else:
        return 1

In [6]:
def get_old_data():
    #load and prepare old data
    df_reviews = pd.read_csv('Input_data/Old_data_reviews.csv',sep=',',quotechar='"' ) #contains the reviews from Facebook and Tripadvisor
    df_cat_labels = pd.read_csv('Input_data/Old_data_categorisation_labels.csv',sep=',',quotechar='"') # contains the classlabels of the reviews
    df_merged_data = pd.merge(df_cat_labels,df_reviews,how='left',left_on='review_id',right_on='id')

    # Drop unnecessary columns
    df_merged_data = df_merged_data.drop(['labeler_id', 'post_type', 'datetime_posted','likes', 'traveler_type',
                        'rating_food','rating_service', 'rating_environment', 'rating_value',
                         'reviewer_id','source_subject_id','id_x','review_id','id_y','source'],axis=1)

    #select english reviews
    olddata =  df_merged_data.loc[df_merged_data['language'] == 'en']
    olddata['sentiment'] = olddata['rating'].apply(change_ratings)
    reviews_old = olddata['text'].values
    sentiment_old=olddata['sentiment'].values
    return reviews_old, sentiment_old

In [7]:
def get_new_data():
    #load and prepare new data
    newdata=pd.read_csv('Input_data/New_data.csv')
    #Split category column 
    newdata['category']=newdata['Categories'].apply(lambda x:x.split(';'))

    def category_includer(data,string):    
        for i in range(len(data)):
            data.loc[i,string]=0
            column=list(data.loc[i,'category'])
            if string in column:
                data.loc[i,string]=1
        return data

    newdata=category_includer(newdata,'experience')
    newdata=category_includer(newdata,'service')
    newdata=category_includer(newdata,'consistency')
    newdata=category_includer(newdata,'value')
    newdata=category_includer(newdata,'food')
    newdata=category_includer(newdata,'convenience')

    #dropping the two columns and filter only English
    newdata=newdata.drop(['Categories','category'],axis=1)
    newdata=newdata[newdata['Language']=="eng"]
    reviews_new= newdata['Text'].values
    sentiment_new = newdata['Sentiment'].values

    return reviews_new, sentiment_new

In [8]:
def get_all_data():
    reviews_old, sentiment_old = get_old_data()
    reviews_new, sentiment_new = get_new_data()
    reviews = list(reviews_old) + list(reviews_new)
    sentiment = list(sentiment_old) + list(sentiment_new)
    return reviews, sentiment

In [9]:
def run_preprocessing(preprocess_fn, reviews_train, reviews_test, language, minWordLength):
    for i in range(len(reviews_train)):
        reviews_train[i] = preprocess_fn(reviews_train[i], language, minWordLength)
    for i in range(len(reviews_test)):
        reviews_test[i] = preprocess_fn(reviews_test[i], language, minWordLength)

In [10]:
def preprocess_data(type_data="all", preprocessing="all"):
    #Get data
    if type_data == "all":
        reviews, sentiments = get_all_data()
    elif type_data == "old":
        reviews, sentiments = get_old_data()
    else:
        reviews, sentiments = get_new_data()

    #test train split
    reviews_train, reviews_test, sentiment_train, sentiment_test = train_test_split(reviews, sentiments, test_size=0.2,random_state=0,stratify=sentiments)

    #bag of words
    language = 'english'
    minWordLength = 2 # shorter words will be removed
    
    if preprocessing =="all":
        run_preprocessing(text_preprocessing, reviews_train, reviews_test, language, minWordLength)
    else:
        run_preprocessing(text_preprocessing_simple, reviews_train, reviews_test, language, minWordLength)
    return reviews_train, reviews_test, sentiment_train, sentiment_test


In [48]:
def train_model(type_data="all", preprocessing="all"):
    reviews_train, reviews_test, sentiment_train, sentiment_test = preprocess_data(type_data, preprocessing)
    reviews_new, sentiments_new = get_new_data()
    newdata = pd.DataFrame(list(zip(reviews_new, sentiments_new)), columns=['text', 'labels'])
    # Create language databunch
    data_lm = TextLMDataBunch.from_df(train_df = pd.DataFrame({'text': reviews_train, 'labels': sentiment_train}), 
                                      valid_df = pd.DataFrame({'text': reviews_test, 'labels':sentiment_test}), 
                                      path = "",
                                     text_cols=0,
                                     label_cols=1)

    # Create classifier databunch
    data_clas = TextClasDataBunch.from_df(path = "", 
                                          train_df = pd.DataFrame({'text': reviews_train, 'labels': sentiment_train}),
                                          valid_df = pd.DataFrame({'text': reviews_test, 'labels': sentiment_test}), 
                                          vocab=data_lm.train_ds.vocab, bs=16,
                                         text_cols=0,
                                     label_cols=1)
    learn = language_model_learner(data_lm, AWD_LSTM, pretrained=True, drop_mult=0.3)
    learn.fit_one_cycle(1, 1e-2)
    learn.unfreeze()
    learn.fit_one_cycle(5, 1e-3, moms=(0.8,0.7))
    learn.save_encoder('fine_tuned_enc')
    learn = text_classifier_learner(data_clas, AWD_LSTM,drop_mult=0.2);
    learn.load_encoder('fine_tuned_enc')
    learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7))
    learn.freeze_to(-2)
    learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2), moms=(0.8,0.7))
    learn.freeze_to(-3)
    learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3), moms=(0.8,0.7))
    learn.unfreeze()
    learn.fit_one_cycle(10, slice(1e-3/(2.6**4),1e-3), moms=(0.8,0.7))

    preds, targets = learn.get_preds()
    predictions = np.argmax(preds, axis = 1)
    
    accuracy_matrix=pd.crosstab(predictions, targets)
    accuracy_matrix2=accuracy_matrix.copy()
    accuracy_matrix2=accuracy_matrix2.rename(columns={0:-1,1:0,2:1})
    accuracy_matrix2=accuracy_matrix2.rename(index={0: -1, 1:0, 2: 1})
    accuracy_matrix2 = accuracy_matrix2.T
    
    precision_minus_1=accuracy_matrix[0][0]/accuracy_matrix.sum(1)[0]
    precision_0 = accuracy_matrix[1][1]/accuracy_matrix.sum(1)[1]
    precision_1 = accuracy_matrix[2][2]/accuracy_matrix.sum(1)[2]
    precision=[precision_minus_1,precision_0,precision_1]
    recall_minus_1 = accuracy_matrix[0][0]/accuracy_matrix.sum(0)[0]
    recall_0 = accuracy_matrix[1][1]/accuracy_matrix.sum(0)[1]
    recall_1 = accuracy_matrix[2][2]/accuracy_matrix.sum(0)[2]
    recall=[recall_minus_1,recall_0,recall_1]
    weighted_precision = (precision[0]*accuracy_matrix.sum(0)[0]+ precision[1]*accuracy_matrix.sum(0)[1]+ precision[2]*accuracy_matrix.sum(0)[2])/sum(accuracy_matrix.sum(0))
    weighted_recall = (recall[0]*accuracy_matrix.sum(1)[0]+ recall[1]*accuracy_matrix.sum(1)[1]+ recall[2]*accuracy_matrix.sum(1)[2])/sum(accuracy_matrix.sum(1))
    learn.export('train_'+ type_data + '_preprocess_' + preprocessing + '_ulmfit.pkl');
    
    newdata['pred'] = newdata['text'].apply(lambda txt: str(learn.predict(txt)[0]))
    
    accuracy_matrix_test = pd.crosstab(newdata.pred,newdata.labels)
    accuracy_matrix_test=accuracy_matrix_test.rename(columns={-1:0,0:1,1:2})
    accuracy_matrix_test=accuracy_matrix_test.rename(index={-1:0,0:1,1:2})

    precision_minus_1=accuracy_matrix_test[0][0]/accuracy_matrix_test.sum(1)[0]
    precision_0 = accuracy_matrix_test[1][1]/accuracy_matrix_test.sum(1)[1]
    precision_1 = accuracy_matrix_test[2][2]/accuracy_matrix_test.sum(1)[2]
    precision=[precision_minus_1,precision_0,precision_1]
    recall_minus_1 = accuracy_matrix_test[0][0]/accuracy_matrix_test.sum(0)[0]
    recall_0 = accuracy_matrix_test[1][1]/accuracy_matrix_test.sum(0)[1]
    recall_1 = accuracy_matrix_test[2][2]/accuracy_matrix_test.sum(0)[2]
    recall=[recall_minus_1,recall_0,recall_1]
    weighted_precision_test = (precision[0]*accuracy_matrix_test.sum(0)[0]+ precision[1]*accuracy_matrix_test.sum(0)[1]+ precision[2]*accuracy_matrix_test.sum(0)[2])/sum(accuracy_matrix_test.sum(0))
    weighted_recall_test = (recall[0]*accuracy_matrix_test.sum(1)[0]+ recall[1]*accuracy_matrix_test.sum(1)[1]+ recall[2]*accuracy_matrix_test.sum(1)[2])/sum(accuracy_matrix_test.sum(1))
    
    return learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test

## New Data for training, No stemming

In [49]:
learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test=train_model("new", "simple")
print("New, Simple")
print(accuracy_matrix)
print("Validation Dataset")
print("Weighted Precision",weighted_precision)
print("Weighted Reccall", weighted_recall)
print("Test Dataset")
print(accuracy_matrix_test)
print("Weighted Precison Test",  weighted_precision_test)
print("Weighted Recall Test", weighted_recall_test)

  ' that document to Beautiful Soup.' % decoded_markup


epoch,train_loss,valid_loss,accuracy,time
0,5.303167,4.729417,0.176644,00:08


epoch,train_loss,valid_loss,accuracy,time
0,4.690261,4.484616,0.208807,00:09
1,4.494267,4.288888,0.227841,00:09
2,4.283785,4.206264,0.238413,00:09
3,4.093012,4.177383,0.24168,00:09
4,3.955849,4.17759,0.241985,00:09


epoch,train_loss,valid_loss,accuracy,time
0,0.441748,0.381253,0.865112,00:35


epoch,train_loss,valid_loss,accuracy,time
0,0.410132,0.339771,0.878296,00:35


epoch,train_loss,valid_loss,accuracy,time
0,0.38236,0.306028,0.896552,00:36


epoch,train_loss,valid_loss,accuracy,time
0,0.245521,0.293865,0.891481,00:40
1,0.228332,0.283576,0.901623,00:44
2,0.244146,0.290463,0.902637,00:36
3,0.1733,0.304525,0.900609,00:43
4,0.097057,0.303391,0.896552,00:42
5,0.080853,0.338199,0.902637,00:43
6,0.088923,0.345449,0.896552,00:39
7,0.061048,0.333379,0.907708,00:44
8,0.040892,0.33875,0.900609,00:41
9,0.05082,0.355306,0.900609,00:41


New, Simple
col_0    0   1    2
row_0              
0      139  13   10
1        3  16    6
2       35  31  733
Validation Dataset
Weighted Precision 0.8898585528131335
Weighted Reccall 0.9288224288661263
Test Dataset
labels    0    1     2
pred                  
-1      725   46   115
0        88  160    99
1        70   96  3531
Weighted Precison Test 0.9003327801552753
Weighted Recall Test 0.8918959655990852


## New data for training, with stemming

In [50]:
learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test=train_model("new", "all")
print("New, Simple")
print(accuracy_matrix)
print("Validation Dataset")
print("Weighted Precision",weighted_precision)
print("Weighted Reccall", weighted_recall)
print("Test Dataset")
print(accuracy_matrix_test)
print("Weighted Precison Test",  weighted_precision_test)
print("Weighted Recall Test", weighted_recall_test)

  ' that document to Beautiful Soup.' % decoded_markup


epoch,train_loss,valid_loss,accuracy,time
0,5.908585,5.147351,0.151875,00:08


epoch,train_loss,valid_loss,accuracy,time
0,5.097179,4.878819,0.180357,00:08
1,4.890745,4.643445,0.202835,00:08
2,4.669193,4.532998,0.215,00:08
3,4.475345,4.494557,0.218371,00:08
4,4.336513,4.489472,0.219129,00:08


epoch,train_loss,valid_loss,accuracy,time
0,0.50121,0.397431,0.872211,00:33


epoch,train_loss,valid_loss,accuracy,time
0,0.433548,0.349709,0.87931,00:34


epoch,train_loss,valid_loss,accuracy,time
0,0.395152,0.326464,0.874239,00:34


epoch,train_loss,valid_loss,accuracy,time
0,0.276599,0.303386,0.891481,00:38
1,0.256259,0.29346,0.88641,00:42
2,0.229936,0.295933,0.896552,00:34
3,0.18477,0.319564,0.883367,00:41
4,0.121085,0.323504,0.890467,00:40
5,0.095024,0.347343,0.878296,00:41
6,0.087869,0.380421,0.882353,00:38
7,0.067997,0.376579,0.880325,00:42
8,0.047322,0.373544,0.87931,00:39
9,0.052153,0.393247,0.877282,00:39


New, Simple
col_0    0   1    2
row_0              
0      132  21   17
1        4   9    8
2       41  30  724
Validation Dataset
Weighted Precision 0.8572593873555393
Weighted Reccall 0.9111503859123793
Test Dataset
labels    0    1     2
pred                  
-1      608   60   214
0        48   98    66
1       227  144  3465
Weighted Precison Test 0.8379501441945325
Weighted Recall Test 0.857059310739877


## Old data for training, No stemming and no stop word filters

In [51]:
learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test=train_model("old", "simple")
print("New, Simple")
print(accuracy_matrix)
print("Validation Dataset")
print("Weighted Precision",weighted_precision)
print("Weighted Reccall", weighted_recall)
print("Test Dataset")
print(accuracy_matrix_test)
print("Weighted Precison Test",  weighted_precision_test)
print("Weighted Recall Test", weighted_recall_test)

epoch,train_loss,valid_loss,accuracy,time
0,4.675793,4.382808,0.223878,00:37


epoch,train_loss,valid_loss,accuracy,time
0,4.208443,4.181471,0.245524,00:40
1,3.954906,3.959342,0.270059,00:40
2,3.626609,3.804894,0.289895,00:40
3,3.369307,3.727332,0.300688,00:40
4,3.208462,3.713552,0.302305,00:40


epoch,train_loss,valid_loss,accuracy,time
0,0.265474,0.217008,0.918599,02:24


epoch,train_loss,valid_loss,accuracy,time
0,0.216557,0.160888,0.933892,02:27


epoch,train_loss,valid_loss,accuracy,time
0,0.13558,0.116568,0.953133,02:36


epoch,train_loss,valid_loss,accuracy,time
0,0.07872,0.110664,0.958559,02:18
1,0.068201,0.094161,0.968426,02:25
2,0.030822,0.077103,0.973853,02:08
3,0.019416,0.088345,0.976813,02:41
4,0.016053,0.093884,0.978293,02:08
5,0.014307,0.08212,0.980266,02:42
6,0.008344,0.087175,0.980266,02:30
7,0.008263,0.096331,0.980266,02:32
8,0.007604,0.093564,0.9778,02:27
9,0.00508,0.09468,0.979773,02:10


New, Simple
col_0   0    1     2
row_0               
0      64    4     0
1       6  102     1
2       7   23  1820
Validation Dataset
Weighted Precision 0.9791102391888702
Weighted Reccall 0.9825799712458549
Test Dataset
labels    0    1     2
pred                  
-1      238   30   118
0       533  142   481
1       112  130  3146
Weighted Precison Test 0.82333413799613
Weighted Recall Test 0.7086595078205956


## Old data for training, With stemming and stop words filter

In [52]:
learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test=train_model("old", "all")
print("New, Simple")
print(accuracy_matrix)
print("Validation Dataset")
print("Weighted Precision",weighted_precision)
print("Weighted Reccall", weighted_recall)
print("Test Dataset")
print(accuracy_matrix_test)
print("Weighted Precison Test",  weighted_precision_test)
print("Weighted Recall Test", weighted_recall_test)

epoch,train_loss,valid_loss,accuracy,time
0,5.021291,4.692528,0.200357,00:33


epoch,train_loss,valid_loss,accuracy,time
0,4.52693,4.475866,0.222341,00:35
1,4.262245,4.235905,0.246499,00:36
2,3.942935,4.074142,0.265344,00:36
3,3.682522,3.989883,0.277443,00:36
4,3.537864,3.975934,0.278769,00:36


epoch,train_loss,valid_loss,accuracy,time
0,0.261477,0.227974,0.918599,02:17


epoch,train_loss,valid_loss,accuracy,time
0,0.217068,0.161254,0.939319,02:20


epoch,train_loss,valid_loss,accuracy,time
0,0.147842,0.118912,0.955106,02:27


epoch,train_loss,valid_loss,accuracy,time
0,0.075863,0.104161,0.96448,02:10
1,0.079051,0.080473,0.97336,02:17
2,0.040725,0.072297,0.977306,02:01
3,0.027082,0.074924,0.97928,02:33
4,0.015839,0.073085,0.980266,02:01
5,0.017688,0.079023,0.978786,02:33
6,0.00919,0.066244,0.978786,02:22
7,0.007403,0.076229,0.981253,02:24
8,0.004785,0.074761,0.980266,02:20
9,0.005441,0.078951,0.980266,02:03


New, Simple
col_0   0    1     2
row_0               
0      62    2     0
1      10  109     5
2       5   18  1816
Validation Dataset
Weighted Precision 0.9798786549775163
Weighted Reccall 0.9818738223666391
Test Dataset
labels    0    1     2
pred                  
-1      466   56   235
0       139   41    74
1       278  205  3436
Weighted Precison Test 0.7861575773841087
Weighted Recall Test 0.8173693032749333


## All data, no stemming and no stop words filter

In [53]:
learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test=train_model("all", "simple")
print("New, Simple")
print(accuracy_matrix)
print("Validation Dataset")
print("Weighted Precision",weighted_precision)
print("Weighted Reccall", weighted_recall)
print("Test Dataset")
print(accuracy_matrix_test)
print("Weighted Precison Test",  weighted_precision_test)
print("Weighted Recall Test", weighted_recall_test)

  ' that document to Beautiful Soup.' % decoded_markup


epoch,train_loss,valid_loss,accuracy,time
0,4.653063,4.424557,0.220938,00:50


epoch,train_loss,valid_loss,accuracy,time
0,4.23513,4.233929,0.243478,00:53
1,3.985912,4.040794,0.264422,00:53
2,3.706315,3.915871,0.280566,00:53
3,3.471212,3.85536,0.289029,00:53
4,3.331177,3.847532,0.291003,00:53


epoch,train_loss,valid_loss,accuracy,time
0,0.315224,0.289059,0.889147,03:16


epoch,train_loss,valid_loss,accuracy,time
0,0.320256,0.233335,0.913375,02:58


epoch,train_loss,valid_loss,accuracy,time
0,0.214178,0.19703,0.936608,03:01


epoch,train_loss,valid_loss,accuracy,time
0,0.153862,0.191787,0.935944,03:07
1,0.118514,0.250161,0.943578,03:07
2,0.097045,0.325558,0.946233,02:45
3,0.072589,0.25295,0.949884,02:49
4,0.045967,0.190514,0.950216,03:15
5,0.032175,0.197037,0.947561,03:20
6,0.025014,0.218933,0.945237,03:13
7,0.025565,0.221446,0.95088,02:54
8,0.013063,0.231569,0.94922,03:10
9,0.010472,0.222711,0.948556,03:00


New, Simple
col_0    0    1     2
row_0                
0      184   12     6
1       19  127    17
2       51   50  2547
Validation Dataset
Weighted Precision 0.9460999272731566
Weighted Reccall 0.9559116344891503
Test Dataset
labels    0    1     2
pred                  
-1      808  115   352
0        41  127   104
1        34   60  3289
Weighted Precison Test 0.8806345705579162
Weighted Recall Test 0.8625084165007728


## All data, with stemming and stop words filters

In [54]:

learn, accuracy_matrix, weighted_precision, weighted_recall, accuracy_matrix_test, weighted_precision_test, weighted_recall_test=train_model("all", "all")
print("New, Simple")
print(accuracy_matrix)
print("Validation Dataset")
print("Weighted Precision",weighted_precision)
print("Weighted Reccall", weighted_recall)
print("Test Dataset")
print(accuracy_matrix_test)
print("Weighted Precison Test",  weighted_precision_test)
print("Weighted Recall Test", weighted_recall_test)

  ' that document to Beautiful Soup.' % decoded_markup


epoch,train_loss,valid_loss,accuracy,time
0,4.943565,4.700051,0.199762,00:44


epoch,train_loss,valid_loss,accuracy,time
0,4.520302,4.496871,0.221992,00:47
1,4.263268,4.286583,0.242142,00:47
2,3.99887,4.155742,0.258056,00:47
3,3.766071,4.091449,0.267309,00:47
4,3.635593,4.083505,0.268882,00:47


epoch,train_loss,valid_loss,accuracy,time
0,0.296639,0.2979,0.893794,03:06


epoch,train_loss,valid_loss,accuracy,time
0,0.300323,0.229887,0.908397,02:49


epoch,train_loss,valid_loss,accuracy,time
0,0.217801,0.210939,0.924328,02:52


epoch,train_loss,valid_loss,accuracy,time
0,0.167806,0.198494,0.92997,02:57
1,0.133437,0.169816,0.945569,02:56
2,0.105211,0.174563,0.951211,02:36
3,0.074911,0.185035,0.947561,02:40
4,0.048558,0.194357,0.948888,03:05
5,0.037593,0.194678,0.95088,03:09
6,0.021887,0.20029,0.95088,03:03
7,0.025019,0.1909,0.95453,02:45
8,0.012538,0.207416,0.954198,03:00
9,0.01174,0.204269,0.952539,02:51


New, Simple
col_0    0    1     2
row_0                
0      191   10     8
1       17  129    12
2       46   50  2550
Validation Dataset
Weighted Precision 0.9502794218754288
Weighted Reccall 0.9593133911302317
Test Dataset
labels    0    1     2
pred                  
-1      677   78   279
0        17   22    22
1       189  202  3444
Weighted Precison Test 0.8215473575289088
Weighted Recall Test 0.8770755437485549
