In [1]:
import pandas as pd
import numpy as np
import pickle

from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, mutual_info_classif
from sklearn.feature_selection import RFECV

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV

from sklearn.ensemble import VotingClassifier

import os
import ipaddress
import re
import favicon
import urllib.request
from bs4 import BeautifulSoup
import socket
import requests
from googlesearch import search
import whois
from datetime import date, datetime
import time
import pathlib

In [2]:
def trainModel():
    print("Initializing model training...")
    data = pd.read_csv("phishing_data.csv", index_col=0)
    
    y = data['Result'].values
    X = data.drop('Result',axis=1).values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state = 0)

    k_folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)      #for gridsearch
    k_folds2 = StratifiedKFold(n_splits=10, shuffle=True, random_state=100)  #for top50 pipeline cv
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    
    #Logistic Regression
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    LR = LogisticRegression()

    pipeline = Pipeline([('scaler', scaler),
                         ('kbest', kbest),
                         ('LR', LR)])

    param_grid = {
        'kbest__k': [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],    #we start with 4 because of our earlier discovery through visualisation of heatmap + feature ranking
        'kbest__score_func': [chi2, mutual_info_classif],
        'LR__C': [0.001, 0.01, 0.1, 1, 10, 100],
        'LR__penalty': ['l2'],
        'LR__max_iter': [100, 200, 300],
        'LR__solver': ['newton-cg', 'lbfgs', 'liblinear']
    }

    print("Training Logistic Regression Model... This can take up to over an hour")
    grid_search_LR = GridSearchCV(pipeline, param_grid=param_grid, cv=k_folds, scoring='accuracy', verbose=1)
    grid_search_LR.fit(X_train, y_train)
    print("Hyperparameter tuning complete.")
    
    topFifty = grid_search_LR.cv_results_['rank_test_score'].argsort()[:50]
    params = grid_search_LR.cv_results_['params']
    highestScore = grid_search_LR.best_score_

    pipelines = []
    accuracy_ = []
    accuracy_std = []
    
    print("Cross Validating top fifty Logistic Regression pipelines...")
    for i in topFifty:
        if (highestScore - grid_search_LR.cv_results_['mean_test_score'][i] >= 1):
            break
        k = params[i]['kbest__k']    
        score_func = params[i]['kbest__score_func']    
        C = params[i]['LR__C']
        max_iter = params[i]['LR__max_iter']
        penalty = params[i]['LR__penalty']
        solver = params[i]['LR__solver']

        kbest = SelectKBest(score_func=score_func, k=k)
        LR = LogisticRegression(C=C, max_iter=max_iter, penalty=penalty, solver=solver)

        pipeline = Pipeline([('scaler', scaler),
                             ('kbest', kbest),
                             ('LR', LR)])

        accuracy = cross_val_score(pipeline, X_train, y_train, cv=k_folds2, scoring='accuracy')

        pipelines.append(pipeline)
        accuracy_.append(accuracy.mean())
        accuracy_std.append(accuracy.std())

    value = min(accuracy_std)
    index = accuracy_std.index(value)
    LR_pipeline = pipelines[index]
    LR_pipeline.fit(X_train, y_train)
    LR_accuracy = LR_pipeline.score(X_test, y_test)
    print("Finished training Logistic Regression Model. Preparing to train Naive Bayes model...\n")

    # Naive Bayes
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    Bernoulli = BernoulliNB()

    pipeline = Pipeline([('scaler', scaler),
                         ('kbest', kbest),
                         ('NB', Bernoulli)])

    param_grid = {
        'kbest__k': [4,5,6,7,8,9,10,11,12,13,14],
        'kbest__score_func': [chi2, mutual_info_classif],
        'NB__alpha': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
    }
    
    print("Training Naive Bayes Model... This can take up to half an hour.")
    grid_search_nb = GridSearchCV(pipeline, param_grid=param_grid, cv=k_folds, scoring='accuracy', verbose=1)
    grid_search_nb.fit(X_train, y_train)
    print("Hyperparameter tuning complete.")
    
    topFifty = grid_search_nb.cv_results_['rank_test_score'].argsort()[:50]
    params = grid_search_nb.cv_results_['params']
    highestScore = grid_search_nb.best_score_

    pipelines = []
    accuracy_ = []
    accuracy_std = []

    print("Cross Validating top fifty Naive Bayes pipelines...")
    for i in topFifty:
        if (highestScore - grid_search_nb.cv_results_['mean_test_score'][i] >= 1):
            break
        k = params[i]['kbest__k']
        score_func = params[i]['kbest__score_func']    
        alpha = params[i]['NB__alpha']

        kbest = SelectKBest(score_func=score_func, k=k)
        Bernoulli = BernoulliNB(alpha=alpha)

        pipeline = Pipeline([('scaler', scaler),
                             ('kbest', kbest),
                             ('NB', Bernoulli)])

        accuracy = cross_val_score(pipeline, X_train, y_train, cv=k_folds2, scoring='accuracy')

        pipelines.append(pipeline)
        accuracy_.append(accuracy.mean())
        accuracy_std.append(accuracy.std())

    value = min(accuracy_std)
    index = accuracy_std.index(value)
    NB_pipeline = pipelines[index]
    NB_pipeline.fit(X_train, y_train)
    NB_accuracy = NB_pipeline.score(X_test, y_test)
    print("Finished training Naive Bayes Model. Preparing to train K-nearest Neighbors model...\n")

    ## KNN
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    knn = KNeighborsClassifier()
    pipeline = Pipeline([('scaler', scaler),
                         ('kbest', kbest),
                         ('KNN', knn)])

    param_grid = {
        'kbest__k': [4,5,6,7,8,9,10,11,12,13,14],
        'kbest__score_func': [chi2, mutual_info_classif],
        'KNN__n_neighbors': [10,11,12,13,14,15],
        'KNN__weights': ['uniform', 'distance']
    }

    print("Training K-nearest Neighbors Model... This can take up to half an hour.")
    grid_search_knn = GridSearchCV(pipeline, param_grid=param_grid, cv=k_folds, scoring='accuracy', verbose=1) 
    grid_search_knn.fit(X_train, y_train)
    print("Hyperparameter tuning complete.")
    
    topFifty = grid_search_knn.cv_results_['rank_test_score'].argsort()[:50]
    params = grid_search_knn.cv_results_['params']
    highestScore = grid_search_knn.best_score_

    pipelines = []
    accuracy_ = []
    accuracy_std = []

    print("Cross Validating top fifty KNN pipelines...")
    for i in topFifty:
        if (highestScore - grid_search_knn.cv_results_['mean_test_score'][i] >= 1):
            break
        k = params[i]['kbest__k']    
        score_func = params[i]['kbest__score_func']    
        n_neighbors = params[i]['KNN__n_neighbors']
        weights = params[i]['KNN__weights']

        kbest = SelectKBest(score_func=score_func, k=k)
        knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)

        pipeline = Pipeline([('scaler', scaler),
                             ('kbest', kbest),
                             ('KNN', knn)])

        accuracy = cross_val_score(pipeline, X_train, y_train, cv=k_folds2, scoring='accuracy')

        pipelines.append(pipeline)
        accuracy_.append(accuracy.mean())
        accuracy_std.append(accuracy.std())

    value = min(accuracy_std)
    index = accuracy_std.index(value)
    knn_pipeline = pipelines[index]
    knn_pipeline.fit(X_train, y_train)
    knn_accuracy = knn_pipeline.score(X_test, y_test)
    print("Finished training K-nearest Neighbors Model. Preparing to train Decision Tree model...\n")

    #Decision Tree
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    dtree = DecisionTreeClassifier()

    pipeline = Pipeline([('scaler', scaler),
                         ('kbest', kbest),
                         ('tree', dtree)])

    param_grid = {
        'kbest__k': [4,5,6,7,8,9,10,11,12,13,14],
        'kbest__score_func': [chi2, mutual_info_classif],
        'tree__criterion' : ['gini', 'entropy'],
        'tree__max_depth' : [None,5,10,15,20],
        'tree__min_samples_split' : [2,5,10,15]
    }

    print("Training Decision Tree Model... This can take up to over an hour.")
    grid_search_dtree = GridSearchCV(pipeline, param_grid=param_grid, cv=k_folds, scoring='accuracy', verbose=1) 
    grid_search_dtree.fit(X_train, y_train)
    print("Hyperparameter tuning complete.")
    
    topFifty = grid_search_dtree.cv_results_['rank_test_score'].argsort()[:50]
    params = grid_search_dtree.cv_results_['params']
    highestScore = grid_search_dtree.best_score_

    pipelines = []
    accuracy_ = []
    accuracy_std = []

    print("Cross Validating top fifty Decision Tree pipelines...")
    for i in topFifty:
        if (highestScore - grid_search_dtree.cv_results_['mean_test_score'][i] >= 1):
            break
        k = params[i]['kbest__k']    
        score_func = params[i]['kbest__score_func']    
        criterion = params[i]['tree__criterion']
        max_depth = params[i]['tree__max_depth']
        min_samples_split = params[i]['tree__min_samples_split']

        kbest = SelectKBest(score_func=score_func, k=k)
        dtree = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split)

        pipeline = Pipeline([('scaler', scaler),
                             ('kbest', kbest),
                             ('tree', dtree)])

        accuracy = cross_val_score(pipeline, X_train, y_train, cv=k_folds2, scoring='accuracy')

        pipelines.append(pipeline)
        accuracy_.append(accuracy.mean())
        accuracy_std.append(accuracy.std())

    value = min(accuracy_std)
    index = accuracy_std.index(value)
    dtree_pipeline = pipelines[index]
    dtree_pipeline.fit(X_train, y_train)
    dtree_accuracy = dtree_pipeline.score(X_test, y_test)
    print("Finished training Decision Tree Model. Preparing to train Random Forest model...\n")

    # Random Forest
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    RF = RandomForestClassifier()

    pipeline = Pipeline([('scaler', scaler),
                         ('kbest', kbest),
                         ('RF', RF)])

    param_grid = {
        'kbest__k': [4,5,6,7,8,9,10,11,12,13,14],
        'kbest__score_func': [chi2, mutual_info_classif],
        'RF__criterion' : ['gini', 'entropy'],
        'RF__max_depth' : [None,5,10,15,20,25],
        'RF__min_samples_split' : [2, 5, 10],
    }

    print("Training Random Forest Model... This can take up to over an hour.")
    grid_search_RF = GridSearchCV(pipeline, param_grid=param_grid, cv=k_folds, scoring='accuracy', verbose=1) 
    grid_search_RF.fit(X_train, y_train)
    print("Hyperparameter tuning complete.")

    print("Cross Validating top fifty Random Forest pipelines...")
    topFifty = grid_search_RF.cv_results_['rank_test_score'].argsort()[:50]
    params = grid_search_RF.cv_results_['params']
    highestScore = grid_search_RF.best_score_

    pipelines = []
    accuracy_ = []
    accuracy_std = []

    for i in topFifty:
        if (highestScore - grid_search_RF.cv_results_['mean_test_score'][i] >= 1):
            break
        k = params[i]['kbest__k']    
        score_func = params[i]['kbest__score_func']    
        criterion = params[i]['RF__criterion']
        max_depth = params[i]['RF__max_depth']
        min_samples_split = params[i]['RF__min_samples_split']

        kbest = SelectKBest(score_func=score_func, k=k)
        RF = RandomForestClassifier(criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split)

        pipeline = Pipeline([('scaler', scaler),
                             ('kbest', kbest),
                             ('RF', RF)])

        accuracy = cross_val_score(pipeline, X_train, y_train, cv=k_folds2, scoring='accuracy')

        pipelines.append(pipeline)
        accuracy_.append(accuracy.mean())
        accuracy_std.append(accuracy.std())

    value = min(accuracy_std)
    index = accuracy_std.index(value)
    RF_pipeline = pipelines[index]
    RF_pipeline.fit(X_train, y_train)
    RF_accuracy = RF_pipeline.score(X_test, y_test)
    print("Finished training Random Forest Model. Preparing to train Support Vector Machine model...\n")

    # SVM
    scaler = MinMaxScaler()
    kbest = SelectKBest()
    svm = SVC()

    pipeline = Pipeline([('scaler', scaler),
                         ('kbest', kbest),
                         ('svm', svm)])

    param_grid = {
        'kbest__k': [4,5,6,7,8,9,10,11,12,13,14],
        'kbest__score_func': [chi2, mutual_info_classif],
        'svm__C' : [0.001, 0.01, 0.1, 1, 10],
        'svm__kernel': ['poly', 'rbf', 'sigmoid']
    }

    print("Training Support Vector Machine Model... This can take up to over an hour.")
    grid_search_svm = GridSearchCV(pipeline, param_grid=param_grid, cv=k_folds, scoring='accuracy', verbose=1) 
    grid_search_svm.fit(X_train, y_train)
    print("Hyperparameter tuning complete.")

    topFifty = grid_search_svm.cv_results_['rank_test_score'].argsort()[:50]
    params = grid_search_svm.cv_results_['params']
    highestScore = grid_search_svm.best_score_

    pipelines = []
    accuracy_ = []
    accuracy_std = []

    print("Cross Validating top fifty Support Vector Machine pipelines...")
    for i in topFifty:
        if (highestScore - grid_search_svm.cv_results_['mean_test_score'][i] >= 1):
            break
        k = params[i]['kbest__k']    
        score_func = params[i]['kbest__score_func']    
        C = params[i]['svm__C']
        kernel = params[i]['svm__kernel']

        kbest = SelectKBest(score_func=score_func, k=k)
        svm = SVC(C=C, kernel=kernel)

        pipeline = Pipeline([('scaler', scaler),
                             ('kbest', kbest),
                             ('svm', svm)])

        accuracy = cross_val_score(pipeline, X_train, y_train, cv=k_folds2, scoring='accuracy')

        pipelines.append(pipeline)
        accuracy_.append(accuracy.mean())
        accuracy_std.append(accuracy.std())

    value = min(accuracy_std)
    index = accuracy_std.index(value)
    svm_pipeline = pipelines[index]
    
    #calibrated classifier calibrates SVM's decision_values() into average probabilites so that
    #the predict_proba() method can be called using the SVM model. THis is needed later when we fit it in VotingClassifier()
    svm_CCV = CalibratedClassifierCV(svm_pipeline, method='isotonic')
    svm_CCV.fit(X_train, y_train)
    svm_accuracy = svm_CCV.score(X_test, y_test)
    print("Finished training Support Vector Machine Model. Finalizing modeling...")
    
    models = []
    scores = []
    models.extend([LR_pipeline, NB_pipeline, knn_pipeline, dtree_pipeline, RF_pipeline])
    scores.extend([LR_accuracy, NB_accuracy, knn_accuracy, dtree_accuracy, RF_accuracy, svm_accuracy])
    
    highestAccuracy = max(scores)
    del scores[-1]
    
    toDelete = []
    for i in range(len(scores)):
        if highestAccuracy-scores[i] > 0.05:
            toDelete.append(i)
            
    toDelete.reverse()
    for i in toDelete:
        del models[i]
        
    estimators = []
    for model in models:
        name = [*model.named_steps.keys()][-1]
        clf = model
        estimators.append((name,clf))
    
    estimators.append(('svm',svm_CCV))
    
    ensemble = VotingClassifier(estimators, voting='soft')
    ensemble.fit(X_train, y_train)
    ensemble_accuracy = ensemble.score(X_test, y_test)
    
    models = []
    scores = []
    models.extend([LR_pipeline, NB_pipeline, knn_pipeline, dtree_pipeline, RF_pipeline, svm_CCV, ensemble])
    scores.extend([LR_accuracy, NB_accuracy, knn_accuracy, dtree_accuracy, RF_accuracy, svm_accuracy, ensemble_accuracy])
       
    #refit on whole data set
    finalModel = VotingClassifier(estimators, voting='soft')
    finalModel.fit(X, y)
    
    modelDate = datetime.now().strftime("%d%m%Y_%H%M")
    filename = 'Ensemble_' + modelDate + '.sav'
    pickle.dump(finalModel, open(filename, 'wb'))
    
    return finalModel, models, scores

In [3]:
def dataext(url):    
    data = generate_data_set(url)
    data = np.array(data)
    data = data.reshape(1,-1)
    return data

In [4]:
def generate_data_set(url):
    
    data_set = []
    
    # Converts the given URL into standard format
    if not re.match(r"^https?", url):
        url = "https://" + url
        
    # Stores the response of the given URL
    try:
        response = requests.get(url)
        responseCode = int(re.findall(r'[\d]{3}',str(response))[0])
        if (responseCode >= 400):
            response = ''
            soup = -999
        else:
            soup = BeautifulSoup(response.text, 'html.parser')
    except:
        response = ""
        soup = -999
        
    # Extracts domain from the given URL
    domain = re.findall(r"://([^/]+)/?", url)[0]
    if re.match(r"^www.",domain):
        domain = domain.replace("www.","")
    
    try:
        whois_response = whois.whois(domain)
    except:
        whois_response = ''
        
    #1. having IP Address
    try:
        ipaddress.ip_address(url)
        data_set.append(-1)
    except:
        data_set.append(1) 
        
    # 2.URL_Length
    if len(url) < 54:
        data_set.append(1)
    elif len(url) >= 54 and len(url) <= 75:
        data_set.append(0)
    else:
        data_set.append(-1)
        
    # 3.Shortining_Service
    match=re.search('bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.gs|'
                    'yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|snipurl\.com|'
                    'short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.com|fic\.kr|loopt\.us|'
                    'doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|t\.co|lnkd\.in|'
                    'db\.tt|qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|ity\.im|'
                    'q\.gs|is\.gd|po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.us|u\.bb|yourls\.org|'
                    'x\.co|prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|1url\.com|tweez\.me|v\.gd|tr\.im|link\.zip\.net', url)
    if match:
        data_set.append(-1)
    else:
        data_set.append(1)
    
    #4. having_At_Symbol
    if re.findall("@", url):
        data_set.append(-1)
    else:
        data_set.append(1)
    
    #5. Prefix_Suffix
    if re.findall(r"https?://[^/-]+-[^/-]+/?", url):
        data_set.append(-1)
    else:
        data_set.append(1)
        
    # 6.having_Sub_Domain
    if len(re.findall("\.", domain)) == 1:
        data_set.append(1)
    elif len(re.findall("\.", domain)) == 2:
        data_set.append(0)
    else:
        data_set.append(-1)
        
    # 7.SSLfinal_State
    if response == '':
        httpUrl=[x.start(0) for x in re.finditer('://', url)]
    else:
        httpUrl=[x.start(0) for x in re.finditer('://', response.url)]
        
    if httpUrl[0]>4:
        data_set.append(1)
    else:
        data_set.append(-1)
        
    # 8.Favicon
    try:
        icons = favicon.get(url)
        if len(icons) == 0:
            data_set.append(-1)
        else:
            data_set.append(1)
    except:
        data_set.append(-1)
        
    #9. HTTPS_token
    if re.findall(r"https", domain):
        data_set.append(-1)
    else:
        data_set.append(1)
            
    #10. Iframe
    if response == "":
        data_set.append(-1)
    else:
        hiddenBorders = False
        for iframe in soup.find_all('iframe'):
            if re.search(r'style', str(iframe)):
                iframeStyle = iframe
                if re.findall(r"border ?: ?0", str(iframeStyle) or re.findall(r"border ?: ?none", str(iframeStyle))):
                    hiddenBorders = True
        
        if hiddenBorders == True:
            data_set.append(-1)
        else:
            data_set.append(1)
               
    #11. age_of_domain
    try:
        if whois_response == '':
            data_set.append(-1)
        else:
            if type(whois_response.creation_date) is list:
                registration_date = whois_response.creation_date[0]
            else:
                registration_date = whois_response.creation_date

            if diff_month(datetime.today(), registration_date) >= 6:
                data_set.append(1)
            else:
                data_set.append(-1)
    except:
        data_set.append(-1)
        
    #12. web_traffic
    try:
        r = requests.get('http://tools.mercenie.com/alexa-rank-checker/api/?format=json&urls=' + url)
        rCode = int(re.findall(r'[\d]{3}',str(r))[0])
        if (rCode >= 400):
            data_set.append(-1)
        else:
            data = r.json()
            rank = data['alexaranks']['first']['alexarank']['0']
            rank = int(rank)
            
            if (rank<100000):
                data_set.append(1)
            elif (rank>100000):
                data_set.append(0)
            else:
                data_set.append(-1)
    except:
        data_set.append(-1)
    
    #13. Google_Index
    try:
        site = search(domain, 10)
        resultInSite = False
        for result in site:
            if re.search(domain, result):
                resultInSite = True            
        if resultInSite:
            data_set.append(1)
        else:
            data_set.append(-1)
    except:
        data_set.append(-1)
    
    #14. Links_pointing_to_page
    if response == "":
        data_set.append(-1)
    else:
        number_of_links = len(re.findall(r"<a href=", response.text))
        if number_of_links == 0:
            data_set.append(-1)
        elif number_of_links <= 2:
            data_set.append(0)
        elif number_of_links > 2:
            data_set.append(1)
    
    return data_set

In [5]:
#model = pickle.load(open("Ensemble_26052022_0049.sav", 'rb'))

In [None]:
ensemble, models, accuracy = trainModel()

In [35]:
realTestData = pd.read_csv("realData.csv")

In [36]:
X = realTestData['URLs'].values
y_test = realTestData['Result'].values

In [37]:
X_test = [generate_data_set(url) for url in X]

Error trying to connect to socket: closing socket
Error trying to connect to socket: closing socket
Error trying to connect to socket: closing socket
Error trying to connect to socket: closing socket
Error trying to connect to socket: closing socket


In [38]:
pred = model.predict(X_test)
print('Ensemble Voter\nclassification report: \n', classification_report(y_test, pred))
print('confusion matrix: \n', confusion_matrix(y_test, pred))
print('accuracy score: \n', accuracy_score(y_test, pred))

Ensemble Voter
classification report: 
               precision    recall  f1-score   support

          -1       1.00      0.30      0.46        30
           1       0.59      1.00      0.74        30

    accuracy                           0.65        60
   macro avg       0.79      0.65      0.60        60
weighted avg       0.79      0.65      0.60        60

confusion matrix: 
 [[ 9 21]
 [ 0 30]]
accuracy score: 
 0.65
