In [8]:
import os
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold

tfidf_vectorizer = TfidfVectorizer()

file_list = []
file_list.append(r'./200416_label0_dataset_sentence.xlsx')
file_list.append(r'./200416_label1_dataset_sentence.xlsx')
file_list.append(r'./200416_label2_dataset_sentence.xlsx')
file_list.append(r'./200416_label3_dataset_sentence.xlsx')
file_list.append(r'./200416_label4_dataset_sentence.xlsx')
file_list.append(r'./200416_label5_dataset_sentence.xlsx')
file_list.append(r'./200416_label6_dataset_sentence.xlsx')

In [9]:

def baseline_model(texts,labels,Round,sender_num):
    
    x_train = tfidf_vectorizer.fit_transform(texts).toarray()
#     x_test = tfidf_vectorizer.transform(x_test)
    svd = TruncatedSVD(n_components=400).fit(x_train)
    reduced = svd.transform(x_train)
    total = reduced.tolist()
    for item in range(len(total)):
        total[item] = np.append(total[item], sender_num[item])
    for item in range(len(total)):
        total[item] = np.append(total[item], Round[item])
    
    Model_name = ['XGBClassifier','BernoulliNB','Svm (linear)','Logistic Regression',
              'Random Forest','kNN','Decision Tree']
    function = [XGBClassifier(),BernoulliNB(),svm.SVC(kernel="linear"),LogisticRegression(),
              RandomForestClassifier(),KNeighborsClassifier(),DecisionTreeClassifier()]
    #Model_name = ['BernoulliNB']
    #function = [BernoulliNB()]
        
    
    perform_f1 = []
    perform_acc = []
    perform_recall = []
    perform_precision = []
    perform_data_f1 = []
    perform_data_acc = []
    
    all_df = pd.DataFrame({'file_name':[''],
                           'model':[''],
                           'precision':[''],
                           'recall':[''],
                           'f1-score':[''],
                           'support':[''] })


        
    for i in tqdm(range(len(function))):

        if Model_name[i] == 'XGBClassifier':
            new_total = np.array(total)
        else:
            new_total = total                


        kf = KFold(n_splits=10, shuffle=True)       
        model = function[i]
        
        predicted = []
        expected = []
        for train_index, test_index in kf.split(new_total):
            x_train = np.array(new_total)[train_index]
            y_train = np.array(labels)[train_index]
            x_test = np.array(new_total)[test_index]
            y_test = np.array(labels)[test_index]

            model.fit(x_train, y_train)
            expected.extend(y_test)
            predicted.extend(model.predict(x_test))
        
        print('Model Name:'+ Model_name[i])
        print(metrics.classification_report(expected, predicted))
        print('weighted-avg prf: {0}, {1}, {2}'.format(
        metrics.precision_score(expected, predicted, average='weighted'),
        metrics.recall_score(expected, predicted, average='weighted'),
        metrics.f1_score(expected, predicted, average='weighted'))
        )       

  
        report = metrics.classification_report(expected, predicted, output_dict=True)
        df = pd.DataFrame(report).transpose()
        df.insert(0,'model',Model_name[i])
        
        all_df = all_df.append(df)
        
       
    return all_df



In [10]:

for x in range(len(file_list)):
    dataset = pd.read_excel(file_list[x])

    tmp = []
    for i in dataset['sender']:
        if i == 'customer':
            tmp.append(0)
        else:
            tmp.append(1)
    dataset['sender_num'] = tmp
    dataset.head()

    texts = list(dataset["texts"])
    labels = list(dataset["labels"])
    Round = list(dataset["round"])
    sender_num = list(dataset["sender_num"])
    
    #baseline_model(texts,labels,Round,sender_num)
    df_report = baseline_model(texts,labels,Round,sender_num)
    df_report.to_csv('./Kfold'+ str(x) +'.csv')
    
    
    

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     10668
           1       1.00      0.83      0.91      6487

    accuracy                           0.94     17155
   macro avg       0.95      0.92      0.93     17155
weighted avg       0.94      0.94      0.93     17155

weighted-avg prf: 0.9413047629035085, 0.9356455843777325, 0.9342804543457371


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       0.85      0.80      0.83     10668
           1       0.70      0.77      0.74      6487

    accuracy                           0.79     17155
   macro avg       0.78      0.79      0.78     17155
weighted avg       0.80      0.79      0.79     17155

weighted-avg prf: 0.7965254821286073, 0.7910230253570387, 0.7926890832633282


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     10668
           1       1.00      0.83      0.91      6487

    accuracy                           0.94     17155
   macro avg       0.95      0.91      0.93     17155
weighted avg       0.94      0.94      0.93     17155

weighted-avg prf: 0.9416535642834423, 0.9356455843777325, 0.9342391077339562


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     10668
           1       1.00      0.83      0.91      6487

    accuracy                           0.94     17155
   macro avg       0.95      0.92      0.93     17155
weighted avg       0.94      0.94      0.93     17155

weighted-avg prf: 0.9416834282014543, 0.9358787525502769, 0.934502773056279


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       0.87      0.97      0.92     10668
           1       0.93      0.77      0.85      6487

    accuracy                           0.89     17155
   macro avg       0.90      0.87      0.88     17155
weighted avg       0.90      0.89      0.89     17155

weighted-avg prf: 0.8971763429135241, 0.8933255610609152, 0.8909463176694105


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       0.92      0.95      0.93     10668
           1       0.91      0.87      0.89      6487

    accuracy                           0.92     17155
   macro avg       0.92      0.91      0.91     17155
weighted avg       0.92      0.92      0.92     17155

weighted-avg prf: 0.9171557087686592, 0.9174001748761295, 0.9170513907475575


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       0.93      0.92      0.93     10668
           1       0.87      0.88      0.88      6487

    accuracy                           0.91     17155
   macro avg       0.90      0.90      0.90     17155
weighted avg       0.91      0.91      0.91     17155

weighted-avg prf: 0.9081070545527254, 0.9078985718449432, 0.9079887570004253


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       0.94      0.99      0.97     15670
           1       0.83      0.31      0.46      1485

    accuracy                           0.94     17155
   macro avg       0.88      0.65      0.71     17155
weighted avg       0.93      0.94      0.92     17155

weighted-avg prf: 0.9291958187059179, 0.9350626639463713, 0.9213733280625589


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       0.93      0.96      0.95     15670
           1       0.39      0.26      0.32      1485

    accuracy                           0.90     17155
   macro avg       0.66      0.61      0.63     17155
weighted avg       0.89      0.90      0.89     17155

weighted-avg prf: 0.8856364109664862, 0.9008452346254736, 0.8919692872397518


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       0.94      1.00      0.96     15670
           1       0.86      0.27      0.41      1485

    accuracy                           0.93     17155
   macro avg       0.90      0.63      0.69     17155
weighted avg       0.93      0.93      0.92     17155

weighted-avg prf: 0.9284249435792589, 0.9330224424366074, 0.9166862526354362


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       0.94      0.99      0.96     15670
           1       0.84      0.28      0.42      1485

    accuracy                           0.93     17155
   macro avg       0.89      0.64      0.69     17155
weighted avg       0.93      0.93      0.92     17155

weighted-avg prf: 0.9275820573180077, 0.9331390265228796, 0.9175203379732029


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       0.93      0.99      0.96     15670
           1       0.78      0.21      0.33      1485

    accuracy                           0.93     17155
   macro avg       0.85      0.60      0.65     17155
weighted avg       0.92      0.93      0.91     17155

weighted-avg prf: 0.9169863523576643, 0.926610317691635, 0.9069523450978153


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       0.93      0.98      0.96     15670
           1       0.60      0.28      0.38      1485

    accuracy                           0.92     17155
   macro avg       0.77      0.63      0.67     17155
weighted avg       0.91      0.92      0.91     17155

weighted-avg prf: 0.9056843881455773, 0.921364033809385, 0.9077766720858403


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       0.94      0.94      0.94     15670
           1       0.37      0.41      0.39      1485

    accuracy                           0.89     17155
   macro avg       0.66      0.67      0.66     17155
weighted avg       0.89      0.89      0.89     17155

weighted-avg prf: 0.8941569003133893, 0.8897114543864763, 0.8918552462835261


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       0.90      1.00      0.95     11705
           1       0.99      0.76      0.86      5450

    accuracy                           0.92     17155
   macro avg       0.95      0.88      0.90     17155
weighted avg       0.93      0.92      0.92     17155

weighted-avg prf: 0.9283595426754342, 0.9211308656368405, 0.9178685831347927


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       0.91      0.85      0.88     11705
           1       0.72      0.82      0.77      5450

    accuracy                           0.84     17155
   macro avg       0.82      0.84      0.82     17155
weighted avg       0.85      0.84      0.84     17155

weighted-avg prf: 0.8501553318301497, 0.8426114835324978, 0.8449290402810672


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       0.91      0.98      0.94     11705
           1       0.95      0.79      0.86      5450

    accuracy                           0.92     17155
   macro avg       0.93      0.88      0.90     17155
weighted avg       0.92      0.92      0.92     17155

weighted-avg prf: 0.9208052219083016, 0.9185660157388517, 0.91630939225663


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       0.92      0.96      0.94     11705
           1       0.90      0.82      0.86      5450

    accuracy                           0.91     17155
   macro avg       0.91      0.89      0.90     17155
weighted avg       0.91      0.91      0.91     17155

weighted-avg prf: 0.913336283123994, 0.9138443602448266, 0.9126638270621755


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       0.90      0.98      0.94     11705
           1       0.94      0.76      0.84      5450

    accuracy                           0.91     17155
   macro avg       0.92      0.87      0.89     17155
weighted avg       0.91      0.91      0.91     17155

weighted-avg prf: 0.9121593395944306, 0.9097639172252987, 0.9070093367782277


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       0.90      0.97      0.94     11705
           1       0.93      0.78      0.85      5450

    accuracy                           0.91     17155
   macro avg       0.92      0.88      0.89     17155
weighted avg       0.91      0.91      0.91     17155

weighted-avg prf: 0.913411376491575, 0.911979014864471, 0.90967181324349


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       0.92      0.92      0.92     11705
           1       0.83      0.83      0.83      5450

    accuracy                           0.89     17155
   macro avg       0.88      0.88      0.88     17155
weighted avg       0.89      0.89      0.89     17155

weighted-avg prf: 0.8936008384849956, 0.8935587292334596, 0.8935795680686273


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7683
           1       1.00      1.00      1.00      9472

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251792403424, 0.9998251238705916, 0.999825120640809


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7683
           1       1.00      1.00      1.00      9472

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251792403424, 0.9998251238705916, 0.999825120640809


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7683
           1       1.00      1.00      1.00      9472

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251792403424, 0.9998251238705916, 0.999825120640809


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7683
           1       1.00      1.00      1.00      9472

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251792403424, 0.9998251238705916, 0.999825120640809


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      7683
           1       1.00      0.99      0.99      9472

    accuracy                           0.99     17155
   macro avg       0.99      0.99      0.99     17155
weighted avg       0.99      0.99      0.99     17155

weighted-avg prf: 0.9930243359228726, 0.9929466627805305, 0.9929507888100455


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7683
           1       1.00      1.00      1.00      9472

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251792403424, 0.9998251238705916, 0.999825120640809


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7683
           1       1.00      1.00      1.00      9472

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9997085430685323, 0.9997085397843194, 0.9997085379920424


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       1.00      1.00      1.00      7679

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9999417155469736, 0.9999417079568639, 0.9999417083165473


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       1.00      1.00      1.00      7679

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9999417155469736, 0.9999417079568639, 0.9999417083165473


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       1.00      1.00      1.00      7679

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9999417155469736, 0.9999417079568639, 0.9999417083165473


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       1.00      1.00      1.00      7679

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9999417155469736, 0.9999417079568639, 0.9999417083165473


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       0.99      0.99      0.99      7679

    accuracy                           0.99     17155
   macro avg       0.99      0.99      0.99     17155
weighted avg       0.99      0.99      0.99     17155

weighted-avg prf: 0.9949284491673617, 0.9949285922471582, 0.994928435250481


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       1.00      1.00      1.00      7679

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9999417155469736, 0.9999417079568639, 0.9999417083165473


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9476
           1       1.00      1.00      1.00      7679

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9999417155469736, 0.9999417079568639, 0.9999417083165473


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       0.97      0.90      0.93     11618
           1       0.82      0.94      0.88      5537

    accuracy                           0.91     17155
   macro avg       0.90      0.92      0.91     17155
weighted avg       0.92      0.91      0.92     17155

weighted-avg prf: 0.9222184782783577, 0.9148353249781405, 0.9162364302918895


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       0.97      0.85      0.91     11618
           1       0.75      0.94      0.84      5537

    accuracy                           0.88     17155
   macro avg       0.86      0.90      0.87     17155
weighted avg       0.90      0.88      0.88     17155

weighted-avg prf: 0.8980370435822964, 0.8809093558729233, 0.8837354825432115


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       0.98      0.89      0.93     11618
           1       0.80      0.95      0.87      5537

    accuracy                           0.91     17155
   macro avg       0.89      0.92      0.90     17155
weighted avg       0.92      0.91      0.91     17155

weighted-avg prf: 0.9195474194746414, 0.9090061206645292, 0.9107841688702697


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       0.97      0.89      0.93     11618
           1       0.80      0.95      0.87      5537

    accuracy                           0.91     17155
   macro avg       0.89      0.92      0.90     17155
weighted avg       0.92      0.91      0.91     17155

weighted-avg prf: 0.9193416019192651, 0.9094141649664821, 0.911133765650687


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       0.91      0.92      0.91     11618
           1       0.82      0.80      0.81      5537

    accuracy                           0.88     17155
   macro avg       0.86      0.86      0.86     17155
weighted avg       0.88      0.88      0.88     17155

weighted-avg prf: 0.8774318535515233, 0.8781113378023899, 0.8777112519752477


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       0.88      0.92      0.90     11618
           1       0.82      0.73      0.78      5537

    accuracy                           0.86     17155
   macro avg       0.85      0.83      0.84     17155
weighted avg       0.86      0.86      0.86     17155

weighted-avg prf: 0.860434541536398, 0.8624307781987759, 0.8602960110761274


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       0.90      0.91      0.91     11618
           1       0.81      0.80      0.80      5537

    accuracy                           0.87     17155
   macro avg       0.86      0.85      0.86     17155
weighted avg       0.87      0.87      0.87     17155

weighted-avg prf: 0.8740942878470168, 0.8746721072573593, 0.8743448656933825


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

Model Name:XGBClassifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9478
           1       1.00      1.00      1.00      7677

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251921815797, 0.9998251238705916, 0.9998251271114283


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:BernoulliNB
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9478
           1       1.00      1.00      1.00      7677

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251921815797, 0.9998251238705916, 0.9998251271114283


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Svm (linear)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9478
           1       1.00      1.00      1.00      7677

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251921815797, 0.9998251238705916, 0.9998251271114283


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Logistic Regression
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9478
           1       1.00      1.00      1.00      7677

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251921815797, 0.9998251238705916, 0.9998251271114283


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Random Forest
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      9478
           1       0.99      0.99      0.99      7677

    accuracy                           0.99     17155
   macro avg       0.99      0.99      0.99     17155
weighted avg       0.99      0.99      0.99     17155

weighted-avg prf: 0.9945787033845503, 0.9945788399883416, 0.9945786042348387


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:kNN
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9478
           1       1.00      1.00      1.00      7677

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9998251921815797, 0.9998251238705916, 0.9998251271114283


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


Model Name:Decision Tree
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9478
           1       1.00      1.00      1.00      7677

    accuracy                           1.00     17155
   macro avg       1.00      1.00      1.00     17155
weighted avg       1.00      1.00      1.00     17155

weighted-avg prf: 0.9995919575142458, 0.9995919556980473, 0.9995919531716638


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,





In [7]:
df_report

Unnamed: 0,model,f1-score,precision,recall,support
0,BernoulliNB,0.826945,0.850198,0.804931,10668.0
1,BernoulliNB,0.734603,0.705032,0.766764,6487.0
accuracy,BernoulliNB,0.790498,0.790498,0.790498,0.790498
macro avg,BernoulliNB,0.780774,0.777615,0.785847,17155.0
weighted avg,BernoulliNB,0.792027,0.795305,0.790498,17155.0
