In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import OneClassSVM as osvm
from tqdm import tqdm
import numpy as np
import seaborn as sns
from sklearn.model_selection import KFold
import time

In [None]:
# Loading Zubiaga features for rumours and non-rumours
#Path to rumour features
zRumour = pd.DataFrame.from_csv("")
#Path to non-rumour features
zNonRumour = pd.DataFrame.from_csv("")

In [None]:
linguisticFeatures = ["exclamationMarkCount","questionMarkCount","characterCount","tokenCount","subjectivity",\
                      "polarity","uppercaseCount","lowerCaseCount","firstPersonPronounCount","secondPersonPronounCount",\
                      "thirdPersonPronounCount","capitalWordsCount","averageWordComplexity","vuglarTermsCount",\
                      "emoticonCount","abbreviationCount","emojiCount","posAdjectiveCount","posAdpositionCount",\
                      "posAdverbCount","posAuxiliaryCount","posConjunctionCount", "posCoordinatingConjunctionCount" ,"posDeterminerCount",\
                      "posInterjectionCount","posNounCount","posNumeralCount","posParticleCount","posPronounCount",\
                      "posProperNounCount","posPunctuationCount","posSubordinatingConjunctionCount","posSymbolCount",\
                      "posVerbCount","posOtherCount","posSpaceCount","nerPersonCount","nerNationalityCount",\
                      "nerBuildingCount","nerOrganizationCount","nerCountriesCount","nerLocationCount","nerProductCount",\
                      "nerEventCount","nerArtCount","nerLawCount","nerLanguageCount","nerDateCount","nerTimeCount",\
                      "nerMoneyCount","nerQuantityCount","nerOrdinalCount","nerCardinalCount","insight","tentative",\
                      "positiveEmotion","negativeEmotion","anxiety","certainty","tone","sentenceComplexity", "credibilityScore", "engagementScore"]
userFeatures = ["hasProfileDescription","isVerifiedAccount","statusCount","followingCount",\
                "influnece","userRole","totalProfileLikesCount","accountAge","protectedProfile",\
                "hasProfileLocation","hasProfilePicture","averageFollowSpeed",\
                "averageBeingFollowedSpeed","averageLikeSpeed","averageStatusSpeed","screenNameLength",\
                "screenNameDigitCount"]
metaFeatures = ["hashtagCount","mentionCount","hasUrl","tweetPostTime","geoEnabled","multimediaCounter"]

In [None]:
len(zR_ling) + len(zNR_ling)

In [None]:
#Separating rumour features by the feature class
zR_ling = zRumour[linguisticFeatures]
zR_user = zRumour[userFeatures]
zR_meta = zRumour[metaFeatures]

In [None]:
#Separating non-rumour features by the feature class
zNR_ling = zNonRumour[linguisticFeatures]
zNR_user = zNonRumour[userFeatures]
zNR_meta = zNonRumour[metaFeatures]

In [None]:
#Setting-Up the experiment for kwon dataset [These conditions wil not differe between different feature sets]
_NuRange = np.arange(0.00000001,1,0.1)
_NuRange = np.append(_NuRange,1)
_GammaRange = np.arange(0.00000001,5,0.5)
_kernel="rbf"
_folds=10
kfold = KFold(n_splits=_folds)

In [None]:
#Doing the experiment for all features
zTime = np.zeros((len(_NuRange),len(_GammaRange)))
zNU = np.zeros((len(_NuRange),len(_GammaRange)))
zGA = np.zeros((len(_NuRange),len(_GammaRange)))
zTP = np.zeros((len(_NuRange),len(_GammaRange)))
zTN = np.zeros((len(_NuRange),len(_GammaRange)))
zFP = np.zeros((len(_NuRange),len(_GammaRange)))
zFN = np.zeros((len(_NuRange),len(_GammaRange)))
zPR = np.zeros((len(_NuRange),len(_GammaRange)))
zRE = np.zeros((len(_NuRange),len(_GammaRange)))
zF1 = np.zeros((len(_NuRange),len(_GammaRange)))
zAC = np.zeros((len(_NuRange),len(_GammaRange)))

zRumourValues = zRumour.values
zNonRumourValues = zNonRumour.values

folds = list(kfold.split(zNonRumourValues))
train_index, test_index = folds[0][0], folds[0][1]
train_data, test_data = zNonRumourValues[train_index], zNonRumourValues[test_index]

### The version with rolling cross-validation

In [None]:
ind1, ind2 = -1,-1 
for _nu in tqdm(_NuRange):
    ind1 += 1
    for _gamma in _GammaRange:
        start = time.time()
        ind2 += 1
        ocsvm_zr = osvm(kernel=_kernel, nu=_nu, gamma=_gamma)
        TP_T, FN_T, FP_T, TN_T, PR_T, RE_T, F_T, AC_T  = 0, 0, 0, 0, 0, 0, 0, 0
        for train_index, test_index in kfold.split(zRumourValues):
            train_data, test_data = zRumourValues[train_index], zRumourValues[test_index]

            ocsvm_zr_fit = ocsvm_zr.fit(train_data)
            zNonRumourPredict = ocsvm_zr.predict(zNonRumourValues)
            zRumourPredict = ocsvm_zr.predict(test_data)
           
            FP = sum(zNonRumourPredict==1)
            TN = sum(zNonRumourPredict==-1)
            TP = sum(zRumourPredict==1)
            FN = sum(zRumourPredict==-1)
                        
            TP_T += TP
            FN_T += FN
            FP_T += FP
            TN_T += TN
            
        TP_T = TP_T / _folds
        FN_T = FN_T / _folds
        FP_T = FP_T / _folds
        TN_T = TN_T / _folds
        PR_T = TP_T/(TP_T+FP_T)
        RE_T = TP_T/(TP_T+FN_T)
        F1_Score = (2*PR_T*RE_T)/(PR_T+RE_T)
        AC_T = (TP_T+TN_T)/(TP_T+FP_T+TN_T+FN_T)
        
        end = time.time()
        executionTime = (end - start)/_folds
        
        zTime[ind1][ind2] = executionTime
        zNU[ind1][ind2] = _nu
        zGA[ind1][ind2] = _gamma
        zF1[ind1][ind2] = F1_Score
        zAC[ind1][ind2] = AC_T
        zPR[ind1][ind2] = PR_T
        zRE[ind1][ind2] = RE_T
        zTP[ind1][ind2] = TP_T
        zTN[ind1][ind2] = TN_T
        zFP[ind1][ind2] = FP_T
        zFN[ind1][ind2] = FN_T
        
    ind2 = -1
    
ocsvm_interval_10fold_z = pd.DataFrame(zTime)
ocsvm_pr1_10fold_z = pd.DataFrame(zNU)
ocsvm_pr2_10fold_z = pd.DataFrame(zGA)
ocsvm_f1_10fold_z = pd.DataFrame(zF1)
ocsvm_ac_10fold_z = pd.DataFrame(zAC)
ocsvm_pr_10fold_z = pd.DataFrame(zPR)
ocsvm_re_10fold_z = pd.DataFrame(zRE)
ocsvm_tp_10fold_z = pd.DataFrame(zTP)
ocsvm_tn_10fold_z = pd.DataFrame(zTN)
ocsvm_fp_10fold_z = pd.DataFrame(zFP)
ocsvm_fn_10fold_z = pd.DataFrame(zFN) 

ocsvm_interval_10fold_z.to_csv("ocsvm_interval_10fold_z.csv")
ocsvm_pr1_10fold_z.to_csv("ocsvm_pr1_10fold_z.csv")
ocsvm_pr2_10fold_z.to_csv("ocsvm_pr2_10fold_z.csv")
ocsvm_f1_10fold_z.to_csv("ocsvm_f1_10fold_z.csv")
ocsvm_ac_10fold_z.to_csv("ocsvm_ac_10fold_z.csv")
ocsvm_pr_10fold_z.to_csv("ocsvm_pr_10fold_z.csv")
ocsvm_re_10fold_z.to_csv("ocsvm_re_10fold_z.csv")
ocsvm_tp_10fold_z.to_csv("ocsvm_tp_10fold_z.csv")
ocsvm_tn_10fold_z.to_csv("ocsvm_tn_10fold_z.csv")
ocsvm_fp_10fold_z.to_csv("ocsvm_fp_10fold_z.csv")
ocsvm_fn_10fold_z.to_csv("ocsvm_fn_10fold_z.csv")

In [None]:
#Preparing dataframes for seaborn heatmap 
#We are going to visualize the performance of one-class classifier on Zubiaga dataset according to nu and gamma (kernel parameter)
df_z_F1 = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_ACC = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_PR = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_RE = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_TP = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_TN = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_FP = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_FN = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_TPR = pd.DataFrame(columns=[i for i in _GammaRange])
df_z_FPR = pd.DataFrame(columns=[i for i in _GammaRange])


for i in _NuRange:
    df_z_F1.loc[i] = [zF1[i][j] for j in _GammaRange]
    df_z_ACC.loc[i] = [zACC[i][j] for j in _GammaRange]
    df_z_PR.loc[i] = [zPR[i][j] for j in _GammaRange]
    df_z_RE.loc[i] = [zRE[i][j] for j in _GammaRange]
    df_z_TP.loc[i] = [zTP[i][j] for j in _GammaRange]
    df_z_TN.loc[i] = [zTN[i][j] for j in _GammaRange]
    df_z_FP.loc[i] = [zFP[i][j] for j in _GammaRange]
    df_z_FN.loc[i] = [zFN[i][j] for j in _GammaRange]
    df_z_TPR.loc[i] = [zTPR[i][j] for j in _GammaRange]
    df_z_FPR.loc[i] = [zFPR[i][j] for j in _GammaRange]


df_z_F1.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_z_ACC.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_z_F1.columns = [0,1,2,3,4,5,6,7,8,9]
df_z_ACC.columns = [0,1,2,3,4,5,6,7,8,9]

ax = sns.heatmap(df_z_F1, cmap="Blues", vmin=0, vmax=0.35, cbar=False)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_z_F1.eps")
plt.show()

ax = sns.heatmap(df_z_ACC, cmap="Blues", vmin=0, vmax=0.8, cbar=False)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_z_ACC.eps")
plt.show()

In [None]:
#Doing the experiment for linguistic features
df_zLPerformance = pd.DataFrame(columns=["kernel", "nu","gamma","PR","RE","F1","AC","TP","FP","TN","FN", "TPR", "FPR"])
zLF1, zLACC, zLPR, zLRE, zLTP, zLTN, zLFP, zLFN, zLTPR, zLFPR = {a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange}, \
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange}

zLRumourValues = zR_ling.values
zLNonRumourValues = zNR_ling.values

In [None]:
for _nu in tqdm(_NuRange):
    for _gamma in _GammaRange:
        ocsvm_zr = osvm(kernel=_kernel, nu=_nu, gamma=_gamma)
        TP_T, FN_T, FP_T, TN_T, PR_T, RE_T, F_T, AC_T, TPR_T, FPR_T  = 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0
        for train_index, test_index in kfold.split(zLRumourValues):
            train_data, test_data = zLRumourValues[train_index], zLRumourValues[test_index]
            
            ocsvm_zr_fit = ocsvm_zr.fit(train_data)
            zNonRumourPredict = ocsvm_zr.predict(zLNonRumourValues)
            zRumourPredict = ocsvm_zr.predict(test_data)
            
            FP = sum(zNonRumourPredict==1)
            TN = sum(zNonRumourPredict==-1)
            TP = sum(zRumourPredict==1)
            FN = sum(zRumourPredict==-1)
                        
            TP_T += TP
            FN_T += FN
            FP_T += FP
            TN_T += TN
            
        TP_T = TP_T / _folds
        FN_T = FN_T / _folds
        FP_T = FP_T / _folds
        TN_T = TN_T / _folds
        PR_T = TP_T/(TP_T+FP_T)
        RE_T = TP_T/(TP_T+FN_T)
        F1_Score = (2*PR_T*RE_T)/(PR_T+RE_T)
        ACC_T = (TP_T+TN_T)/(TP_T+FP_T+TN_T+FN_T)
        TPR_T = TP_T/(TP_T+FN_T)
        FPR_T = FP_T/(FP_T+TN_T)
        
        zLF1[_nu][_gamma] = F1_Score
        zLACC[_nu][_gamma] = ACC_T
        zLPR[_nu][_gamma] = PR_T
        zLRE[_nu][_gamma] = RE_T
        zLTP[_nu][_gamma] = TP_T
        zLTN[_nu][_gamma] = TN_T
        zLFP[_nu][_gamma] = FP_T
        zLFN[_nu][_gamma] = FN_T
        zLTPR[_nu][_gamma] = TPR_T
        zLFPR[_nu][_gamma] = FPR_T
        
        df_zLPerformance.loc[len(df_zLPerformance)]=[_kernel,_nu,_gamma,PR_T,RE_T,F1_Score,ACC_T,TP_T,FP_T,TN_T,FN_T,TPR_T,FPR_T]

In [None]:
#Preparing dataframes for seaborn heatmap 
#We are going to visualize the performance of one-class classifier on Zubiaga dataset according to nu and gamma (kernel parameter)
df_zL_F1 = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_ACC = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_PR = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_RE = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_TP = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_TN = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_FP = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_FN = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_TPR = pd.DataFrame(columns=[i for i in _GammaRange])
df_zL_FPR = pd.DataFrame(columns=[i for i in _GammaRange])


for i in _NuRange:
    df_zL_F1.loc[i] = [zLF1[i][j] for j in _GammaRange]
    df_zL_ACC.loc[i] = [zLACC[i][j] for j in _GammaRange]
    df_zL_PR.loc[i] = [zLPR[i][j] for j in _GammaRange]
    df_zL_RE.loc[i] = [zLRE[i][j] for j in _GammaRange]
    df_zL_TP.loc[i] = [zLTP[i][j] for j in _GammaRange]
    df_zL_TN.loc[i] = [zLTN[i][j] for j in _GammaRange]
    df_zL_FP.loc[i] = [zLFP[i][j] for j in _GammaRange]
    df_zL_FN.loc[i] = [zLFN[i][j] for j in _GammaRange]
    df_zL_TPR.loc[i] = [zLTPR[i][j] for j in _GammaRange]
    df_zL_FPR.loc[i] = [zLFPR[i][j] for j in _GammaRange]

df_zL_F1.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_zL_ACC.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_zL_F1.columns = [0,1,2,3,4,5,6,7,8,9]
df_zL_ACC.columns = [0,1,2,3,4,5,6,7,8,9]


ax = sns.heatmap(df_zL_F1, cmap="Blues", vmin=0, vmax=0.35, cbar=False)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_zL_F1.eps")
plt.show()

ax = sns.heatmap(df_zL_ACC, cmap="Blues", vmin=0, vmax=0.8, cbar=False)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_zL_ACC.eps")
plt.show()

In [None]:
#Doing the experiment for user features
df_zUPerformance = pd.DataFrame(columns=["kernel", "nu","gamma","PR","RE","F1","AC","TP","FP","TN","FN", "TPR", "FPR"])
zUF1, zUACC, zUPR, zURE, zUTP, zUTN, zUFP, zUFN, zUTPR, zUFPR = {a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange}, \
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange}

zURumourValues = zR_user.values
zUNonRumourValues = zNR_user.values

In [None]:
for _nu in tqdm(_NuRange):
    for _gamma in _GammaRange:
        ocsvm_zr = osvm(kernel=_kernel, nu=_nu, gamma=_gamma)
        TP_T, FN_T, FP_T, TN_T, PR_T, RE_T, F_T, AC_T, TPR_T, FPR_T  = 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0
        for train_index, test_index in kfold.split(zURumourValues):
            train_data, test_data = zURumourValues[train_index], zURumourValues[test_index]
            
            ocsvm_zr_fit = ocsvm_zr.fit(train_data)
            zNonRumourPredict = ocsvm_zr.predict(zUNonRumourValues)
            zRumourPredict = ocsvm_zr.predict(test_data)
            
            FP = sum(zNonRumourPredict==1)
            TN = sum(zNonRumourPredict==-1)
            TP = sum(zRumourPredict==1)
            FN = sum(zRumourPredict==-1)
                        
            TP_T += TP
            FN_T += FN
            FP_T += FP
            TN_T += TN
            
        TP_T = TP_T / _folds
        FN_T = FN_T / _folds
        FP_T = FP_T / _folds
        TN_T = TN_T / _folds
        PR_T = TP_T/(TP_T+FP_T)
        RE_T = TP_T/(TP_T+FN_T)
        F1_Score = (2*PR_T*RE_T)/(PR_T+RE_T)
        ACC_T = (TP_T+TN_T)/(TP_T+FP_T+TN_T+FN_T)
        TPR_T = TP_T/(TP_T+FN_T)
        FPR_T = FP_T/(FP_T+TN_T)
        
        zUF1[_nu][_gamma] = F1_Score
        zUACC[_nu][_gamma] = ACC_T
        zUPR[_nu][_gamma] = PR_T
        zURE[_nu][_gamma] = RE_T
        zUTP[_nu][_gamma] = TP_T
        zUTN[_nu][_gamma] = TN_T
        zUFP[_nu][_gamma] = FP_T
        zUFN[_nu][_gamma] = FN_T
        zUTPR[_nu][_gamma] = TPR_T
        zUFPR[_nu][_gamma] = FPR_T
        
        df_zUPerformance.loc[len(df_zUPerformance)]=[_kernel,_nu,_gamma,PR_T,RE_T,F1_Score,ACC_T,TP_T,FP_T,TN_T,FN_T,TPR_T,FPR_T]

In [None]:
#Preparing dataframes for seaborn heatmap 
#We are going to visualize the performance of one-class classifier on Zubiaga dataset according to nu and gamma (kernel parameter)
df_zU_F1 = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_ACC = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_PR = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_RE = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_TP = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_TN = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_FP = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_FN = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_TPR = pd.DataFrame(columns=[i for i in _GammaRange])
df_zU_FPR = pd.DataFrame(columns=[i for i in _GammaRange])


for i in _NuRange:
    df_zU_F1.loc[i] = [zUF1[i][j] for j in _GammaRange]
    df_zU_ACC.loc[i] = [zUACC[i][j] for j in _GammaRange]
    df_zU_PR.loc[i] = [zUPR[i][j] for j in _GammaRange]
    df_zU_RE.loc[i] = [zURE[i][j] for j in _GammaRange]
    df_zU_TP.loc[i] = [zUTP[i][j] for j in _GammaRange]
    df_zU_TN.loc[i] = [zUTN[i][j] for j in _GammaRange]
    df_zU_FP.loc[i] = [zUFP[i][j] for j in _GammaRange]
    df_zU_FN.loc[i] = [zUFN[i][j] for j in _GammaRange]
    df_zU_TPR.loc[i] = [zUTPR[i][j] for j in _GammaRange]
    df_zU_FPR.loc[i] = [zUFPR[i][j] for j in _GammaRange]

df_zU_F1.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_zU_ACC.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_zU_F1.columns = [0,1,2,3,4,5,6,7,8,9]
df_zU_ACC.columns = [0,1,2,3,4,5,6,7,8,9]


ax = sns.heatmap(df_zU_F1, cmap="Blues", vmin=0, vmax=0.35, cbar=False)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_zU_F1.eps")
plt.show()

ax = sns.heatmap(df_zU_ACC, cmap="Blues", vmin=0, vmax=0.8, cbar=False)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_zU_ACC.eps")
plt.show()

In [None]:
#Doing the experiment for meta features
df_zMPerformance = pd.DataFrame(columns=["kernel", "nu","gamma","PR","RE","F1","AC","TP","FP","TN","FN", "TPR", "FPR"])
zMF1, zMACC, zMPR, zMRE, zMTP, zMTN, zMFP, zMFN, zMTPR, zMFPR = {a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange}, \
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange},\
{a:{b:0 for b in _GammaRange} for a in _NuRange}

zMRumourValues = zR_meta.values
zMNonRumourValues = zNR_meta.values

In [None]:
ind1, ind2 = 0,0 ;
for _nu in tqdm(_NuRange):
    ind1 += 1
    for _gamma in _GammaRange:
        ind2 += 1
        ocsvm_zr = osvm(kernel=_kernel, nu=_nu, gamma=_gamma)
        TP_T, FN_T, FP_T, TN_T, PR_T, RE_T, F_T, AC_T, TPR_T, FPR_T  = 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0
        for train_index, test_index in kfold.split(zMRumourValues):
            train_data, test_data = zMRumourValues[train_index], zMRumourValues[test_index]
            
            ocsvm_zr_fit = ocsvm_zr.fit(train_data)
            zNonRumourPredict = ocsvm_zr.predict(zMNonRumourValues)
            zRumourPredict = ocsvm_zr.predict(test_data)
            
            FP = sum(zNonRumourPredict==1)
            TN = sum(zNonRumourPredict==-1)
            TP = sum(zRumourPredict==1)
            FN = sum(zRumourPredict==-1)
                        
            TP_T += TP
            FN_T += FN
            FP_T += FP
            TN_T += TN
            
        TP_T = TP_T / _folds
        FN_T = FN_T / _folds
        FP_T = FP_T / _folds
        TN_T = TN_T / _folds
        PR_T = TP_T/(TP_T+FP_T)
        RE_T = TP_T/(TP_T+FN_T)
        F1_Score = (2*PR_T*RE_T)/(PR_T+RE_T)
        ACC_T = (TP_T+TN_T)/(TP_T+FP_T+TN_T+FN_T)

        
        zMF1[_nu][_gamma] = F1_Score
        zMACC[_nu][_gamma] = ACC_T
        zMPR[_nu][_gamma] = PR_T
        zMRE[_nu][_gamma] = RE_T
        zMTP[_nu][_gamma] = TP_T
        zMTN[_nu][_gamma] = TN_T
        zMFP[_nu][_gamma] = FP_T
        zMFN[_nu][_gamma] = FN_T
        zMTPR[_nu][_gamma] = TPR_T
        zMFPR[_nu][_gamma] = FPR_T
    ind2 = 0

In [None]:
#Preparing dataframes for seaborn heatmap 
#We are going to visualize the performance of one-class classifier on Zubiaga dataset according to nu and gamma (kernel parameter)
df_zM_F1 = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_ACC = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_PR = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_RE = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_TP = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_TN = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_FP = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_FN = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_TPR = pd.DataFrame(columns=[i for i in _GammaRange])
df_zM_FPR = pd.DataFrame(columns=[i for i in _GammaRange])


for i in _NuRange:
    df_zM_F1.loc[i] = [zF1[i][j] for j in _GammaRange]
    df_zM_ACC.loc[i] = [zACC[i][j] for j in _GammaRange]
    df_zM_PR.loc[i] = [zPR[i][j] for j in _GammaRange]
    df_zM_RE.loc[i] = [zRE[i][j] for j in _GammaRange]
    df_zM_TP.loc[i] = [zTP[i][j] for j in _GammaRange]
    df_zM_TN.loc[i] = [zTN[i][j] for j in _GammaRange]
    df_zM_FP.loc[i] = [zFP[i][j] for j in _GammaRange]
    df_zM_FN.loc[i] = [zFN[i][j] for j in _GammaRange]
    df_zM_TPR.loc[i] = [zTPR[i][j] for j in _GammaRange]
    df_zM_FPR.loc[i] = [zFPR[i][j] for j in _GammaRange]

df_zM_F1.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_zM_ACC.index = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
df_zM_F1.columns = [0,1,2,3,4,5,6,7,8,9]
df_zM_ACC.columns = [0,1,2,3,4,5,6,7,8,9]

ax = sns.heatmap(df_zM_F1, cmap="Blues", vmin=0, vmax=0.35)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_zM_F1.eps")
plt.show()

ax = sns.heatmap(df_zM_ACC, cmap="Blues", vmin=0, vmax=0.8)
plt.yticks(rotation=0)
plt.xlabel(r'$\gamma$')
plt.ylabel(r'$\nu$')
plt.savefig("df_zM_ACC.eps")
plt.show()