In [1]:
%matplotlib notebook
import IPython
from IPython.display import display
import pandas as pd
import csv
from numpy import nan as NA
from datetime import datetime
import re
import sys
import numpy as np
import matplotlib.pyplot as plt 
from pandas import *
import pickle
import requests
import os
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from yellowbrick.classifier import ClassificationReport
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, SGDClassifier
from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Normalizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
#Panda settings
#Pandas will not display all columns in our data when using the head() function without this
pd.set_option('max_columns',50) 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## 0.0 Opening Pickled Files

In [3]:
with open('x_y_z.pickle', 'rb') as xyz:
    feat_var = pickle.load(xyz)

In [4]:
X =  feat_var[0]
y =  feat_var[1] #y is "offense group"
z =  feat_var[2] # z is "ucr-rank"

In [5]:
z_i = pd.Series(z)
z_i.value_counts()

6    102462
7     82325
4     26379
5     23033
8     22602
3     17686
2      2170
1       942
9       196
Name: ucr-rank, dtype: int64

In [6]:
X = X[["hour","street", "month", "day", "LATITUDE", "LONGITUDE", "Temperature"]]

# 1 Target variable "offensegroup" y

As we saw in the previous notebook. There was a significant class imbalance in our target variable and that was evident when showing our results. There are many methods to deal with class imbalance but we will focus on the two most popular: Up-Sampling( randomly adding elements to our minority class) and Down-sampling( reducing the number of elements in our minority class). 

In this example we will use cross validation of 12, And we will use the best parameters obtained during hyperparameter tunning.

In [7]:
y_i = pd.Series(y)
y_i.value_counts()  # 0 non violent, 1 violent

0    230618
1     47177
dtype: int64

## 1.1 Up-Sampling

In [8]:
from collections import Counter
from imblearn.over_sampling import RandomOverSampler 
ros = RandomOverSampler(random_state=42)
X_res, y_res = ros.fit_sample(X, y)

In [9]:
X_res_i = pd.DataFrame(X_res)
y_res_i = pd.Series(y_res)

print (X_res_i.info())
print (y_res_i.value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461236 entries, 0 to 461235
Data columns (total 7 columns):
0    461236 non-null int64
1    461236 non-null int64
2    461236 non-null int64
3    461236 non-null int64
4    461236 non-null int64
5    461236 non-null int64
6    461236 non-null int64
dtypes: int64(7)
memory usage: 24.6 MB
None
1    230618
0    230618
dtype: int64


The module performed the oversampling with success 


Choosing the best parameters using the best models from the previous notebook:

In [10]:
knn = KNeighborsClassifier(n_neighbors = 50, n_jobs= -1) 
rnf = RandomForestClassifier(n_estimators = 128, n_jobs= -1)
# ann = MLPClassifier(hidden_layer_sizes = (300,), max_iter = 400)
sgd = SGDClassifier(alpha = 0.0001, n_jobs= -1)
baggin = BaggingClassifier(max_features = 5, max_samples = 100, n_estimators = 100, n_jobs= -1)
pas = PassiveAggressiveClassifier(C = 100, fit_intercept = True, max_iter = 10, n_jobs = -1)

In [11]:
import multiprocessing
from multiprocessing import Process
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score


In [12]:

def sampling_offensegroup(features, target, average, pickle_file, pickle_estimators):
    
    
    scores = {'accuracy_knn':[], 'f1_knn':[], 'precision_knn':[], 'recall_knn':[],
                          'accuracy_rnf':[], 'f1_rnf':[], 'precision_rnf':[], 'recall_rnf':[],
                          'accuracy_sgd':[], 'f1_sgd':[], 'precision_sgd':[], 'recall_sgd':[],
                          'accuracy_baggin':[], 'f1_baggin':[], 'precision_baggin':[], 'recall_baggin':[],
                          'accuracy_pas':[], 'f1_pas':[], 'precision_pas':[], 'recall_pas':[]}

    
    
    skf = StratifiedKFold(n_splits=12)

    for train_index, test_index in skf.split(features, target):
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = target[train_index], target[test_index]
    
        # scaling:
        scaler = StandardScaler()
        X_train = scaler.fit(X_train).transform(X_train)
        X_test = scaler.fit(X_test).transform(X_test)
    
        # knn:
        fitted_knn = knn.fit(X_train, y_train)
        predicted_knn = knn.predict(X_test)
        report_knn = classification_report(y_test, predicted_knn)
        scores['accuracy_knn'].append(accuracy_score(y_test, predicted_knn))
        scores['f1_knn'].append(f1_score(y_test, predicted_knn, average = average))
        scores['precision_knn'].append(precision_score(y_test, predicted_knn, average = average))
        scores['recall_knn'].append(recall_score(y_test, predicted_knn, average = average))   
                                      
                                      
       
        
        # rnf:
        fitted_rnf = rnf.fit(X_train, y_train)
        predicted_rnf = rnf.predict(X_test)
        report_rnf = classification_report(y_test, predicted_rnf)
        scores['accuracy_rnf'].append(accuracy_score(y_test, predicted_rnf))
        scores['f1_rnf'].append(f1_score(y_test, predicted_rnf, average = average))
        scores['precision_rnf'].append(precision_score(y_test, predicted_rnf, average = average))
        scores['recall_rnf'].append(recall_score(y_test, predicted_rnf, average = average))
                                    
       
        
        # ann: too slow, takes very long time
        #fitted_ann = ann.fit(X_train, y_train)
        #predicted_ann = ann.predict(X_test)
        #report_ann = classification_report(y_test, predicted_ann)
        #classification_dict['report_ann'].append(report_ann)
        
    
      
        # sgd:
        fitted_sgd = sgd.fit(X_train, y_train)
        predicted_sgd = sgd.predict(X_test)
        report_sgd = classification_report(y_test, predicted_sgd)
        scores['accuracy_sgd'].append(accuracy_score(y_test, predicted_sgd))
        scores['f1_sgd'].append(f1_score(y_test, predicted_sgd, average = average))
        scores['precision_sgd'].append(precision_score(y_test, predicted_sgd, average = average))
        scores['recall_sgd'].append(recall_score(y_test, predicted_sgd, average = average))
    
        #baggin:
    
        fitted_baggin = baggin.fit(X_train, y_train)
        predicted_baggin = baggin.predict(X_test)
        report_baggin = classification_report(y_test, predicted_baggin)
        scores['accuracy_baggin'].append(accuracy_score(y_test, predicted_baggin))
        scores['f1_baggin'].append(f1_score(y_test, predicted_baggin, average = average))
        scores['precision_baggin'].append(precision_score(y_test, predicted_baggin, average = average))
        scores['recall_baggin'].append(recall_score(y_test, predicted_baggin, average = average))
        
        
        # pas:
        
        fitted_pas = pas.fit(X_train, y_train)
        predicted_pas = pas.predict(X_test)
        report_pas = classification_report(y_test, predicted_pas)
        scores['accuracy_pas'].append(accuracy_score(y_test, predicted_pas))
        scores['f1_pas'].append(f1_score(y_test, predicted_pas, average = average))
        scores['precision_pas'].append(precision_score(y_test, predicted_pas, average = average))
        scores['recall_pas'].append(recall_score(y_test, predicted_pas, average = average))
        
    
        print(scores)
    
        # write last estimator to disc.
    
        with open(pickle_file, 'wb') as a:
            pickle.dump([fitted_knn, fitted_rnf, fitted_sgd, fitted_baggin, fitted_pas] , a)
        
        with open(pickle_estimators, 'wb') as est:
            pickle.dump([predicted_knn, predicted_rnf, predicted_sgd, predicted_baggin, predicted_pas,
                        report_knn, report_rnf, report_sgd, report_baggin, report_pas] , est)
            
            
        
    print('DONE!')
        
    return (scores)



In [19]:
if __name__ == '__main__':
   
    report_offensegroup_up = sampling_offensegroup(X_res, y_res, 'binary', 'offensegroup_upsampled_model.pickle',
                                                  'offensegroup_upsampled_report.pickle')



{'accuracy_knn': [0.6457932254539779], 'f1_knn': [0.6632034632034632], 'precision_knn': [0.6321324153541451], 'recall_knn': [0.6974868619595193], 'accuracy_rnf': [0.7414537697070607], 'f1_rnf': [0.6820857325655789], 'precision_rnf': [0.8853915787725272], 'recall_rnf': [0.5547114834278578], 'accuracy_sgd': [0.5727665331182684], 'f1_sgd': [0.5640562782054684], 'precision_sgd': [0.5757953498455368], 'recall_sgd': [0.5527863052187939], 'accuracy_baggin': [0.6180862687965034], 'f1_baggin': [0.6220585963647598], 'precision_baggin': [0.6156550986087754], 'recall_baggin': [0.6285967011811229], 'accuracy_pas': [0.527134606379104], 'f1_pas': [0.554902536977177], 'precision_pas': [0.5241245316186335], 'recall_pas': [0.5895207867214736]}




{'accuracy_knn': [0.6457932254539779, 0.6630677974920651], 'f1_knn': [0.6632034632034632, 0.6821450484722051], 'precision_knn': [0.6321324153541451, 0.645591377868624], 'recall_knn': [0.6974868619595193, 0.7230865289557209], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497], 'f1_rnf': [0.6820857325655789, 0.6651279217190599], 'precision_rnf': [0.8853915787725272, 0.8863281926875758], 'recall_rnf': [0.5547114834278578, 0.5322857588844373], 'accuracy_sgd': [0.5727665331182684, 0.6218325615276549], 'f1_sgd': [0.5640562782054684, 0.6138150903294367], 'precision_sgd': [0.5757953498455368, 0.6271103631724662], 'recall_sgd': [0.5527863052187939, 0.6010718559758572], 'accuracy_baggin': [0.6180862687965034, 0.6419688849575941], 'f1_baggin': [0.6220585963647598, 0.634417171395176], 'precision_baggin': [0.6156550986087754, 0.6480868385345997], 'recall_baggin': [0.6285967011811229, 0.6213122430927728], 'accuracy_pas': [0.527134606379104, 0.4588428118008221], 'f1_pas': [0.554902536977177, 0.



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513], 'f1_rnf': [0.6820857325655789, 0.6651279217190599, 0.8790491473332064], 'precision_rnf': [0.8853915787725272, 0.8863281926875758, 0.920850190894068], 'recall_rnf': [0.5547114834278578, 0.5322857588844373, 0.840878343219898], 'accuracy_sgd': [0.5727665331182684, 0.6218325615276549, 0.5990217504422937], 'f1_sgd': [0.5640562782054684, 0.6138150903294367, 0.5778459515722582], 'precision_sgd': [0.5757953498455368, 0.6271103631724662, 0.6100636205899364], 'recall_sgd': [0.5527863052187939, 0.6010718559758572, 0.548860443334374], 'accuracy_baggin': [0.6180862687965034, 0.6419688849575941, 0.6297481527734415], 'f1_bag



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132], 'f1_rnf': [0.6820857325655789, 0.6651279217190599, 0.8790491473332064, 0.7211861245803804], 'precision_rnf': [0.8853915787725272, 0.8863281926875758, 0.920850190894068, 0.8955534969893469], 'recall_rnf': [0.5547114834278578, 0.5322857588844373, 0.840878343219898, 0.6036528254761161], 'accuracy_sgd': [0.5727665331182684, 0.6218325615276549, 0.5990217504422937, 0.5782339473410344], 'f1_sgd': [0.5640562782054684, 0.6138150903294367, 0.5778459515722582, 0.5864224302880323], 'precision_sgd': [0.5757953498455368, 0.627



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132, 0.7883234467686544], 'f1_rnf': [0.6820857325655789, 0.6651279217190599, 0.8790491473332064, 0.7211861245803804, 0.7543774906412269], 'precision_rnf': [0.8853915787725272, 0.8863281926875758, 0.920850190894068, 0.8955534969893469, 0.8984610959298145], 'recall_rnf': [0.5547114834278578, 0.5322857588844373, 0.840878343219898, 0.6036528254761161, 0.6501196794671662], 'accuracy_sgd': [0.5727665331182684, 0.6218325615276549, 0.599021750442293



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132, 0.7883234467686544, 0.9534030596315953], 'f1_rnf': [0.6820857325655789, 0.6651279217190599, 0.8790491473332064, 0.7211861245803804, 0.7543774906412269, 0.9545419934516104], 'precision_rnf': [0.8853915787725272, 0.8863281926875758, 0.920850190894068, 0.8955534969893469, 0.8984610959298145, 0.9317675040880036], 'recall_rnf': [0.5547114834278578, 0.532285758884



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437, 0.6669528566968467], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553, 0.679197052853169], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149, 0.6551124002900652], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092, 0.7051201998126756], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132, 0.7883234467686544, 0.9534030596315953, 0.8250078051826413], 'f1_rnf': [0.6820857325655789, 0.6651279217190599, 0.8790491473332064, 0.7211861245803804, 0.7543774906412269, 0.9545419934516104, 0.8012176380186783], 'precision_rnf': [0.8853915787725272, 0.8863281926875758, 0.92085019



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437, 0.6669528566968467, 0.6683838068477469], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553, 0.679197052853169, 0.6866302797856124], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149, 0.6551124002900652, 0.6508202833706189], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092, 0.7051201998126756, 0.7266104693516495], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132, 0.7883234467686544, 0.9534030596315953, 0.8250078051826413, 0.7095431366427307], 'f1_rnf': [0.6820857325655789, 0.6651279217190599, 0.8790491473332064, 0.7211861245803804, 0.7543774906412269, 0.9545419



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437, 0.6669528566968467, 0.6683838068477469, 0.6578207930065564], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553, 0.679197052853169, 0.6866302797856124, 0.6682808716707023], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149, 0.6551124002900652, 0.6508202833706189, 0.6484581497797357], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092, 0.7051201998126756, 0.7266104693516495, 0.6893537308773026], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132, 0.7883234467686544, 0.9534030596315953, 0.8250078051826413, 0.7095431366427307, 0.6961442397752108], 'f1_rnf': [0.6820857



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437, 0.6669528566968467, 0.6683838068477469, 0.6578207930065564, 0.6802216671870122], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553, 0.679197052853169, 0.6866302797856124, 0.6682808716707023, 0.6951258836661293], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149, 0.6551124002900652, 0.6508202833706189, 0.6484581497797357, 0.6641702611745746], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092, 0.7051201998126756, 0.7266104693516495, 0.6893537308773026, 0.7291081277968571], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.8843011759808513, 0.7666250390259132, 0.7883234467686544, 0.9534030596315953, 0



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437, 0.6669528566968467, 0.6683838068477469, 0.6578207930065564, 0.6802216671870122, 0.6762930585909044], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553, 0.679197052853169, 0.6866302797856124, 0.6682808716707023, 0.6951258836661293, 0.692288667952713], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149, 0.6551124002900652, 0.6508202833706189, 0.6484581497797357, 0.6641702611745746, 0.6596907993966817], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092, 0.7051201998126756, 0.7266104693516495, 0.6893537308773026, 0.7291081277968571, 0.7282755749817879], 'accuracy_rnf': [0.7414537697070607, 0.7320099901139497, 0.



{'accuracy_knn': [0.6457932254539779, 0.6630677974920651, 0.6510042668331772, 0.6622177125611406, 0.6633364554063899, 0.6714278280778437, 0.6669528566968467, 0.6683838068477469, 0.6578207930065564, 0.6802216671870122, 0.6762930585909044, 0.665391820168592], 'f1_knn': [0.6632034632034632, 0.6821450484722051, 0.6655530068814202, 0.676032439176544, 0.6830606446556285, 0.6879339741530553, 0.679197052853169, 0.6866302797856124, 0.6682808716707023, 0.6951258836661293, 0.692288667952713, 0.6758248682983389], 'precision_knn': [0.6321324153541451, 0.645591377868624, 0.6389181426519865, 0.6494702018506976, 0.6452568255437298, 0.6550279986824149, 0.6551124002900652, 0.6508202833706189, 0.6484581497797357, 0.6641702611745746, 0.6596907993966817, 0.6553898802248839], 'recall_knn': [0.6974868619595193, 0.7230865289557209, 0.6945051514205433, 0.7048600270579665, 0.725569778332813, 0.7243209491102092, 0.7051201998126756, 0.7266104693516495, 0.6893537308773026, 0.7291081277968571, 0.7282755749817879, 0

In [20]:

report_offensegroup_up = pd.DataFrame(report_offensegroup_up)

In [21]:
print(report_offensegroup_up)

    accuracy_baggin  accuracy_knn  accuracy_pas  accuracy_rnf  accuracy_sgd  \
0          0.618086      0.645793      0.527135      0.741454      0.572767   
1          0.641969      0.663068      0.458843      0.732010      0.621833   
2          0.629748      0.651004      0.543007      0.884301      0.599022   
3          0.604772      0.662218      0.540483      0.766625      0.578234   
4          0.635004      0.663336      0.616089      0.788323      0.601493   
5          0.637345      0.671428      0.555469      0.953403      0.614034   
6          0.628525      0.666953      0.428400      0.825008      0.590384   
7          0.643771      0.668384      0.600687      0.709543      0.619784   
8          0.614502      0.657821      0.552477      0.696144      0.602118   
9          0.645489      0.680222      0.609923      0.892497      0.603939   
10         0.632038      0.676293      0.527396      0.806536      0.593974   
11         0.628109      0.665392      0.509730     

## 1.2 DownSampling 

In [13]:
from imblearn.under_sampling import RandomUnderSampler 
rus = RandomUnderSampler(random_state=42)
X_rus, y_rus = rus.fit_sample(X, y)


In [14]:
X_rus_i = pd.DataFrame(X_rus)
y_rus_i = pd.Series(y_rus)

print (X_rus_i.info())
print (y_rus_i.value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94354 entries, 0 to 94353
Data columns (total 7 columns):
0    94354 non-null int64
1    94354 non-null int64
2    94354 non-null int64
3    94354 non-null int64
4    94354 non-null int64
5    94354 non-null int64
6    94354 non-null int64
dtypes: int64(7)
memory usage: 5.0 MB
None
1    47177
0    47177
dtype: int64


In [24]:
if __name__ == '__main__':
   
    report_offensegroup_down = sampling_offensegroup(X_rus, y_rus, 'binary', 'offensegroup_downsampled_model.pickle',
                                                     'offensegroup_downsampled_report.pickle')



{'accuracy_knn': [0.643056968463886], 'f1_knn': [0.6477600702723051], 'precision_knn': [0.6393361406985385], 'recall_knn': [0.6564089521871821], 'accuracy_rnf': [0.6557731434384537], 'f1_rnf': [0.6635177128651335], 'precision_rnf': [0.6489180646729881], 'recall_rnf': [0.6787894201424212], 'accuracy_sgd': [0.5762970498474059], 'f1_sgd': [0.5332025777528719], 'precision_sgd': [0.5935745477230193], 'recall_sgd': [0.48397761953204477], 'accuracy_baggin': [0.6345371312309257], 'f1_baggin': [0.6553130247061646], 'precision_baggin': [0.6200635497049478], 'recall_baggin': [0.6948118006103764], 'accuracy_pas': [0.4551119023397762], 'f1_pas': [0.45614925751998986], 'precision_pas': [0.45528249303268303], 'recall_pas': [0.45701932858596134]}




{'accuracy_knn': [0.643056968463886, 0.6256358087487284], 'f1_knn': [0.6477600702723051, 0.627906976744186], 'precision_knn': [0.6393361406985385, 0.6241206030150753], 'recall_knn': [0.6564089521871821, 0.6317395727365208], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031], 'f1_rnf': [0.6635177128651335, 0.6557377049180328], 'precision_rnf': [0.6489180646729881, 0.6503251625812907], 'recall_rnf': [0.6787894201424212, 0.6612410986775178], 'accuracy_sgd': [0.5762970498474059, 0.595116988809766], 'f1_sgd': [0.5332025777528719, 0.6095168015697818], 'precision_sgd': [0.5935745477230193, 0.5885836096636665], 'recall_sgd': [0.48397761953204477, 0.6319938962360122], 'accuracy_baggin': [0.6345371312309257, 0.6070701932858596], 'f1_baggin': [0.6553130247061646, 0.600672008270871], 'precision_baggin': [0.6200635497049478, 0.6106148187073043], 'recall_baggin': [0.6948118006103764, 0.5910478128179044], 'accuracy_pas': [0.4551119023397762, 0.5085198372329603], 'f1_pas': [0.45614925751998986, 



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919], 'f1_rnf': [0.6635177128651335, 0.6557377049180328, 0.6535453597497393], 'precision_rnf': [0.6489180646729881, 0.6503251625812907, 0.6703208556149732], 'recall_rnf': [0.6787894201424212, 0.6612410986775178, 0.637589013224822], 'accuracy_sgd': [0.5762970498474059, 0.595116988809766, 0.5705747711088505], 'f1_sgd': [0.5332025777528719, 0.6095168015697818, 0.5442029963557836], 'precision_sgd': [0.5935745477230193, 0.5885836096636665, 0.5798101811906816], 'recall_sgd': [0.48397761953204477, 0.6319938962360122, 0.5127161749745677], 'accuracy_baggin': [0.6345371312309257, 0.6070701932858596, 0.6186419125127162], 'f1_ba



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327], 'f1_rnf': [0.6635177128651335, 0.6557377049180328, 0.6535453597497393, 0.6666666666666667], 'precision_rnf': [0.6489180646729881, 0.6503251625812907, 0.6703208556149732, 0.6526187576126675], 'recall_rnf': [0.6787894201424212, 0.6612410986775178, 0.637589013224822, 0.6813326551373347], 'accuracy_sgd': [0.5762970498474059, 0.595116988809766, 0.5705747711088505, 0.6036368260427264], 'f1_sgd': [0.5332025777528719, 0.6095168015697818, 0.5442029963557836, 0.5658169661512744], 'precision_sgd': [0.5935745477230193, 0.588



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327, 0.6892166836215666], 'f1_rnf': [0.6635177128651335, 0.6557377049180328, 0.6535453597497393, 0.6666666666666667, 0.690632911392405], 'precision_rnf': [0.6489180646729881, 0.6503251625812907, 0.6703208556149732, 0.6526187576126675, 0.6875], 'recall_rnf': [0.6787894201424212, 0.6612410986775178, 0.637589013224822, 0.6813326551373347, 0.6937945066124109], 'accuracy_sgd': [0.5762970498474059, 0.595116988809766, 0.5705747711088505, 0.6036368



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327, 0.6892166836215666, 0.6667514627321293], 'f1_rnf': [0.6635177128651335, 0.6557377049180328, 0.6535453597497393, 0.6666666666666667, 0.690632911392405, 0.6701082850667339], 'precision_rnf': [0.6489180646729881, 0.6503251625812907, 0.6703208556149732, 0.6526187576126675, 0.6875, 0.6634255796559462], 'recall_rnf': [0.6787894201424212, 0.6612410986775178, 0.637



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897, 0.6471635716102773], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716, 0.6536828963795256], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861, 0.6418239764648198], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403, 0.6659882981429661], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327, 0.6892166836215666, 0.6667514627321293, 0.6746374968201475], 'f1_rnf': [0.6635177128651335, 0.6557377049180328, 0.6535453597497393, 0.6666666666666667, 0.690632911392405, 0.6701082850667339, 0.6777525825144873], 'precision_rnf': [0.6489180646729881, 0.6503251625812907, 0.6703208



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897, 0.6471635716102773, 0.6394047316204529], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716, 0.6536828963795256, 0.633768246996512], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861, 0.6418239764648198, 0.6438320209973754], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403, 0.6659882981429661, 0.6240142457389977], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327, 0.6892166836215666, 0.6667514627321293, 0.6746374968201475, 0.662045281098957], 'f1_rnf': [0.6635177128651335, 0.6557377049180328, 0.6535453597497393, 0.6666666666666667, 0.690632911392405, 0.67010828



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897, 0.6471635716102773, 0.6394047316204529, 0.6353345204782498], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716, 0.6536828963795256, 0.633768246996512, 0.6407718331036211], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861, 0.6418239764648198, 0.6438320209973754, 0.631358024691358], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403, 0.6659882981429661, 0.6240142457389977, 0.6504706181633172], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327, 0.6892166836215666, 0.6667514627321293, 0.6746374968201475, 0.662045281098957, 0.6546680234037141], 'f1_rnf': [0.66351771



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897, 0.6471635716102773, 0.6394047316204529, 0.6353345204782498, 0.6408038667005851], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716, 0.6536828963795256, 0.633768246996512, 0.6407718331036211, 0.6398877837286406], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861, 0.6418239764648198, 0.6438320209973754, 0.631358024691358, 0.6415239069291742], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403, 0.6659882981429661, 0.6240142457389977, 0.6504706181633172, 0.6382599847367082], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.6620040691759919, 0.6593336724313327, 0.6892166836215666, 0.6667514627321293, 0



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897, 0.6471635716102773, 0.6394047316204529, 0.6353345204782498, 0.6408038667005851, 0.6390231493258712], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716, 0.6536828963795256, 0.633768246996512, 0.6407718331036211, 0.6398877837286406, 0.6364335126825519], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861, 0.6418239764648198, 0.6438320209973754, 0.631358024691358, 0.6415239069291742, 0.6410322580645161], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403, 0.6659882981429661, 0.6240142457389977, 0.6504706181633172, 0.6382599847367082, 0.631900279827016], 'accuracy_rnf': [0.6557731434384537, 0.6528484231943031, 0.



{'accuracy_knn': [0.643056968463886, 0.6256358087487284, 0.6272889114954222, 0.6393692777212614, 0.6593336724313327, 0.6386415670312897, 0.6471635716102773, 0.6394047316204529, 0.6353345204782498, 0.6408038667005851, 0.6390231493258712, 0.6428389722716866], 'f1_knn': [0.6477600702723051, 0.627906976744186, 0.6188060866172455, 0.6513400540939266, 0.6617851281403863, 0.6419659735349716, 0.6536828963795256, 0.633768246996512, 0.6407718331036211, 0.6398877837286406, 0.6364335126825519, 0.6444669536591542], 'precision_knn': [0.6393361406985385, 0.6241206030150753, 0.6332179930795848, 0.6304140885292717, 0.6570569064928553, 0.6361138861138861, 0.6418239764648198, 0.6438320209973754, 0.631358024691358, 0.6415239069291742, 0.6410322580645161, 0.6415427275018906], 'recall_knn': [0.6564089521871821, 0.6317395727365208, 0.6050356052899288, 0.6737029501525941, 0.6665818921668362, 0.6479267361994403, 0.6659882981429661, 0.6240142457389977, 0.6504706181633172, 0.6382599847367082, 0.631900279827016, 

In [25]:
report_offensegroup_down = pd.DataFrame(report_offensegroup_down)
print (report_offensegroup_down)

    accuracy_baggin  accuracy_knn  accuracy_pas  accuracy_rnf  accuracy_sgd  \
0          0.634537      0.643057      0.455112      0.655773      0.576297   
1          0.607070      0.625636      0.508520      0.652848      0.595117   
2          0.618642      0.627289      0.516022      0.662004      0.570575   
3          0.608723      0.639369      0.464013      0.659334      0.603637   
4          0.649288      0.659334      0.533062      0.689217      0.610504   
5          0.634571      0.638642      0.487662      0.666751      0.553294   
6          0.641058      0.647164      0.513228      0.674637      0.610532   
7          0.646528      0.639405      0.436912      0.662045      0.614983   
8          0.621979      0.635335      0.501526      0.654668      0.522895   
9          0.625032      0.640804      0.460824      0.661791      0.600356   
10         0.641313      0.639023      0.571483      0.645510      0.606334   
11         0.633808      0.642839      0.568430     

# 2 target variable "ucr-rank with 3 levels" z

In [15]:
X =  feat_var[0]
y =  feat_var[1] #y is "offense group"
z =  feat_var[2] # z is "ucr-rank"

In [16]:
X = X[["hour","street", "month", "day", "LATITUDE", "LONGITUDE", "Temperature"]]

In [17]:
z_i = pd.Series(z)
z_i.value_counts()

6    102462
7     82325
4     26379
5     23033
8     22602
3     17686
2      2170
1       942
9       196
Name: ucr-rank, dtype: int64

In [18]:
def new_cat(z):
    
    new_cat = []
    for i in z:
        if i >= 1 and i < 5:
            i = 3
        elif i == 7:
            i = 2
        elif i == 8:
            i = 2
        else:
            i = 1
        new_cat.append(i)
    z = pd.Series(new_cat)
    return z

In [19]:
z = new_cat(z)

In [20]:
z_j = pd.Series(z)
z_j.value_counts()

1    125691
2    104927
3     47177
dtype: int64

## 2.1 Up-Sampling

In [21]:
ros = RandomOverSampler(random_state=43)
X_res1, z_res = ros.fit_sample(X, z)

In [22]:
X_res_i = pd.DataFrame(X_res1)
z_res_i = pd.Series(z_res)

print (X_res_i.info())
print (z_res_i.value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 377073 entries, 0 to 377072
Data columns (total 7 columns):
0    377073 non-null int64
1    377073 non-null int64
2    377073 non-null int64
3    377073 non-null int64
4    377073 non-null int64
5    377073 non-null int64
6    377073 non-null int64
dtypes: int64(7)
memory usage: 20.1 MB
None
3    125691
2    125691
1    125691
dtype: int64


In [34]:
if __name__ == '__main__':
   
    report_ucr_up = sampling_offensegroup(X_res1, z_res, 'weighted',  'ucrrank_upsampled_model.pickle', 
                                          'ucrrank_upsampled_report.pickle')



{'accuracy_knn': [0.47573587907716786], 'f1_knn': [0.47301528587375574], 'precision_knn': [0.4749948730134729], 'recall_knn': [0.47573587907716786], 'accuracy_rnf': [0.5591726332537789], 'f1_rnf': [0.5606161263665855], 'precision_rnf': [0.572737203292459], 'recall_rnf': [0.5591726332537789], 'accuracy_sgd': [0.3436117740652347], 'f1_sgd': [0.3078873470572044], 'precision_sgd': [0.3388724362983908], 'recall_sgd': [0.3436117740652347], 'accuracy_baggin': [0.44009546539379474], 'f1_baggin': [0.44040743169262275], 'precision_baggin': [0.44403718403849235], 'recall_baggin': [0.44009546539379474], 'accuracy_pas': [0.3665552903739061], 'f1_pas': [0.32114836757564774], 'precision_pas': [0.39140295966343136], 'recall_pas': [0.3665552903739061]}




{'accuracy_knn': [0.47573587907716786, 0.47993635640413684], 'f1_knn': [0.47301528587375574, 0.47555573760845743], 'precision_knn': [0.4749948730134729, 0.4786082449046639], 'recall_knn': [0.47573587907716786, 0.47993635640413684], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855], 'f1_rnf': [0.5606161263665855, 0.6486314952627743], 'precision_rnf': [0.572737203292459, 0.6500404161424476], 'recall_rnf': [0.5591726332537789, 0.6499602227525855], 'accuracy_sgd': [0.3436117740652347, 0.3681782020684169], 'f1_sgd': [0.3078873470572044, 0.351093665890447], 'precision_sgd': [0.3388724362983908, 0.3673065518579553], 'recall_sgd': [0.3436117740652347, 0.3681782020684169], 'accuracy_baggin': [0.44009546539379474, 0.4505648369132856], 'f1_baggin': [0.44040743169262275, 0.44775230149940815], 'precision_baggin': [0.44403718403849235, 0.44722494064784923], 'recall_baggin': [0.44009546539379474, 0.4505648369132856], 'accuracy_pas': [0.3665552903739061, 0.38463007159904533], 'f1_pas': [0.32114



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461], 'f1_rnf': [0.5606161263665855, 0.6486314952627743, 0.6677048076880495], 'precision_rnf': [0.572737203292459, 0.6500404161424476, 0.6675228332961971], 'recall_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461], 'accuracy_sgd': [0.3436117740652347, 0.3681782020684169, 0.37403341288782815], 'f1_sgd': [0.3078873470572044, 0.351093665890447, 0.3680696830697378], 'precision_sgd': [0.3388724362983908, 0.3673065518579553, 0.3825106377248456], 'recall_sgd': [0.3436117740652347, 0.3681782020684169, 0.37403341288782815], 'accuracy_baggin': [0.44009546539379474, 0.4505648369132856, 0.461479713603



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808], 'f1_rnf': [0.5606161263665855, 0.6486314952627743, 0.6677048076880495, 0.6573425488340184], 'precision_rnf': [0.572737203292459, 0.6500404161424476, 0.6675228332961971, 0.6570440119478682], 'recall_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808], 'accuracy_sgd': [0.3436117740652347, 0.3681782020684169, 0.37403341288782815, 0.3195850041372287], 'f1_sgd': [0.3078873470572044, 0.351093665890447, 0.3680696830697378, 0.31563070683191674], 'precision_sgd': [0.3388724



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.6957545668639806], 'f1_rnf': [0.5606161263665855, 0.6486314952627743, 0.6677048076880495, 0.6573425488340184, 0.6910422105708477], 'precision_rnf': [0.572737203292459, 0.6500404161424476, 0.6675228332961971, 0.6570440119478682, 0.6897617435205322], 'recall_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.6957545668639806], 'accuracy_sgd': [0.3436117740652347, 0.3681782020684169, 



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.6957545668639806, 0.6827700337343263], 'f1_rnf': [0.5606161263665855, 0.6486314952627743, 0.6677048076880495, 0.6573425488340184, 0.6910422105708477, 0.6795943531053427], 'precision_rnf': [0.572737203292459, 0.6500404161424476, 0.6675228332961971, 0.6570440119478682, 0.6897617435205322, 0.678019330948305], 'recall_rnf': [0.559172633253778



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403, 0.47158392975844493], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633, 0.47135573737507797], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.6957545668639806, 0.6827700337343263, 0.6955636178473681], 'f1_rnf': [0.5606161263665855, 0.6486314952627743, 0.6677048076880495, 0.6573425488340184, 0.6910422105708477, 0.6795943531053427, 0.6892607675988909], 'precision_rnf': [0.572737203292459, 0.65004041



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403, 0.47158392975844493, 0.4845888641121079], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633, 0.47135573737507797, 0.4861692936867715], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.6957545668639806, 0.6827700337343263, 0.6955636178473681, 0.5945515880593215], 'f1_rnf': [0.5606161263665855, 0.6486314952627743, 0.6677048076880495, 0.6573425488340184, 0.69104221



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403, 0.47158392975844493, 0.4845888641121079, 0.47742287254119936], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633, 0.47135573737507797, 0.4861692936867715, 0.47718837187998603], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.6957545668639806, 0.6827700337343263, 0.6955636178473681, 0.5945515880593215, 0.5794666157469289



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606, 0.4816370695690917], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403, 0.47158392975844493, 0.4845888641121079, 0.47742287254119936, 0.4769871771685863], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633, 0.47135573737507797, 0.4861692936867715, 0.47718837187998603, 0.47679457277018217], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606, 0.4816370695690917], 'accuracy_rnf': [0.5591726332537789, 0.6499602227525855, 0.6700715990453461, 0.6592833046909808, 0.695754566863980



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606, 0.4816370695690917, 0.5045509515625994], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403, 0.47158392975844493, 0.4845888641121079, 0.47742287254119936, 0.4769871771685863, 0.5010695516015539], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633, 0.47135573737507797, 0.4861692936867715, 0.47718837187998603, 0.47679457277018217, 0.5030163195154201], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606, 0.4816370695690917, 0.5045509515625994], 'accuracy_rnf': [0.559172633253778



{'accuracy_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606, 0.4816370695690917, 0.5045509515625994, 0.49796321048946596], 'f1_knn': [0.47301528587375574, 0.47555573760845743, 0.4764448029694337, 0.4622924076917051, 0.47565553382944037, 0.47889097636016403, 0.47158392975844493, 0.4845888641121079, 0.47742287254119936, 0.4769871771685863, 0.5010695516015539, 0.49510187285779717], 'precision_knn': [0.4749948730134729, 0.4786082449046639, 0.47745753765951365, 0.46293755495614536, 0.4776042805044861, 0.47906905829509633, 0.47135573737507797, 0.4861692936867715, 0.47718837187998603, 0.47679457277018217, 0.5030163195154201, 0.49572941639636986], 'recall_knn': [0.47573587907716786, 0.47993635640413684, 0.47993635640413684, 0.46480173127108393, 0.4801413022722933, 0.4830055375214818, 0.4769269938259818, 0.490516198841576, 0.47972757940296606, 0.4816370695

In [35]:
report_ucr_up = pd.DataFrame(report_ucr_up)
print(report_ucr_up)

    accuracy_baggin  accuracy_knn  accuracy_pas  accuracy_rnf  accuracy_sgd  \
0          0.440095      0.475736      0.366555      0.559173      0.343612   
1          0.450565      0.479936      0.384630      0.649960      0.368178   
2          0.461480      0.479936      0.374288      0.670072      0.374033   
3          0.429699      0.464802      0.345045      0.659283      0.319585   
4          0.445166      0.480141      0.376933      0.695755      0.361944   
5          0.452899      0.483006      0.362771      0.682770      0.409076   
6          0.440583      0.476927      0.366718      0.695564      0.405989   
7          0.456909      0.490516      0.323468      0.594552      0.370600   
8          0.430813      0.479728      0.369168      0.579467      0.352778   
9          0.444179      0.481637      0.359462      0.695659      0.415123   
10         0.450831      0.504551      0.382662      0.782827      0.376297   
11         0.436350      0.497963      0.372510     

## 2.2 Downsampling

In [23]:
from imblearn.under_sampling import RandomUnderSampler 
rus = RandomUnderSampler(random_state=42)
X_rus1, z_rus1 = rus.fit_sample(X, z)


In [24]:
X_rus_j = pd.DataFrame(X_rus1)
z_rus_j = pd.Series(z_rus1)

print (X_rus_j.info())
print (z_rus_j.value_counts())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 141531 entries, 0 to 141530
Data columns (total 7 columns):
0    141531 non-null int64
1    141531 non-null int64
2    141531 non-null int64
3    141531 non-null int64
4    141531 non-null int64
5    141531 non-null int64
6    141531 non-null int64
dtypes: int64(7)
memory usage: 7.6 MB
None
3    47177
2    47177
1    47177
dtype: int64


In [38]:
if __name__ == '__main__':
   
    report_ucr_down = sampling_offensegroup(X_rus1, z_rus1,'weighted',  'ucrrank_downsampled_model.pickle',
                                            'ucrrank_downsampled_report.pickle')



{'accuracy_knn': [0.4710918955578162], 'f1_knn': [0.4687451321192662], 'precision_knn': [0.4688646963011701], 'recall_knn': [0.4710918955578162], 'accuracy_rnf': [0.5061885384876229], 'f1_rnf': [0.5053687779628614], 'precision_rnf': [0.5077107830947593], 'recall_rnf': [0.5061885384876229], 'accuracy_sgd': [0.3987792472024415], 'f1_sgd': [0.39513840723653776], 'precision_sgd': [0.3963779745120379], 'recall_sgd': [0.3987792472024415], 'accuracy_baggin': [0.44362495761275006], 'f1_baggin': [0.4430573189532098], 'precision_baggin': [0.44608965862550354], 'recall_baggin': [0.44362495761275006], 'accuracy_pas': [0.3584265852831468], 'f1_pas': [0.3023135143789276], 'precision_pas': [0.37404794895628884], 'recall_pas': [0.3584265852831468]}




{'accuracy_knn': [0.4710918955578162, 0.46719226856561547], 'f1_knn': [0.4687451321192662, 0.4654909414970245], 'precision_knn': [0.4688646963011701, 0.4656355315378022], 'recall_knn': [0.4710918955578162, 0.46719226856561547], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372], 'f1_rnf': [0.5053687779628614, 0.5118528473410173], 'precision_rnf': [0.5077107830947593, 0.5166657776851173], 'recall_rnf': [0.5061885384876229, 0.5126314004747372], 'accuracy_sgd': [0.3987792472024415, 0.37707697524584605], 'f1_sgd': [0.39513840723653776, 0.350808628606691], 'precision_sgd': [0.3963779745120379, 0.3974033865154086], 'recall_sgd': [0.3987792472024415, 0.37707697524584605], 'accuracy_baggin': [0.44362495761275006, 0.43023058663953884], 'f1_baggin': [0.4430573189532098, 0.4302276793949826], 'precision_baggin': [0.44608965862550354, 0.4330173092271244], 'recall_baggin': [0.44362495761275006, 0.43023058663953884], 'accuracy_pas': [0.3584265852831468, 0.3942861987114276], 'f1_pas': [0.3023135



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067], 'f1_rnf': [0.5053687779628614, 0.5118528473410173, 0.5122789890673902], 'precision_rnf': [0.5077107830947593, 0.5166657776851173, 0.5150323759940479], 'recall_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067], 'accuracy_sgd': [0.3987792472024415, 0.37707697524584605, 0.3730077992539844], 'f1_sgd': [0.39513840723653776, 0.350808628606691, 0.37274827860299903], 'precision_sgd': [0.3963779745120379, 0.3974033865154086, 0.3725296922195943], 'recall_sgd': [0.3987792472024415, 0.37707697524584605, 0.3730077992539844], 'accuracy_baggin': [0.44362495761275006, 0.43023058663953884, 0.4483723296032



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635], 'f1_rnf': [0.5053687779628614, 0.5118528473410173, 0.5122789890673902, 0.5130788897126619], 'precision_rnf': [0.5077107830947593, 0.5166657776851173, 0.5150323759940479, 0.5150089697283494], 'recall_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635], 'accuracy_sgd': [0.3987792472024415, 0.37707697524584605, 0.3730077992539844, 0.34520176330959645], 'f1_sgd': [0.39513840723653776, 0.350808628606691, 0.37274827860299903, 0.32101765527869763], 'precision_sgd': [0.3963779745



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503], 'f1_rnf': [0.5053687779628614, 0.5118528473410173, 0.5122789890673902, 0.5130788897126619, 0.5350727052272621], 'precision_rnf': [0.5077107830947593, 0.5166657776851173, 0.5150323759940479, 0.5150089697283494, 0.5372173653549195], 'recall_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503], 'accuracy_sgd': [0.3987792472024415, 0.37707697524584605, 0.3



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503, 0.5082676163825999], 'f1_rnf': [0.5053687779628614, 0.5118528473410173, 0.5122789890673902, 0.5130788897126619, 0.5350727052272621, 0.5074826283620397], 'precision_rnf': [0.5077107830947593, 0.5166657776851173, 0.5150323759940479, 0.5150089697283494, 0.5372173653549195, 0.5095867389893579], 'recall_rnf': [0.5061885384876229, 0.5



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403, 0.4787090377652946], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595, 0.47995485303641905], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503, 0.5082676163825999, 0.5250572373441872], 'f1_rnf': [0.5053687779628614, 0.5118528473410173, 0.5122789890673902, 0.5130788897126619, 0.5350727052272621, 0.5074826283620397, 0.5237390108506236], 'precision_rnf': [0.5077107830947593, 0.5166657776851173



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403, 0.4787090377652946, 0.4716492805185942], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595, 0.47995485303641905, 0.47146015319474394], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503, 0.5082676163825999, 0.5250572373441872, 0.5174255914525566], 'f1_rnf': [0.5053687779628614, 0.5118528473410173, 0.5122789890673902, 0.5130788897126619, 0.53507270522726



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403, 0.4787090377652946, 0.4716492805185942, 0.4568889319230416], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595, 0.47995485303641905, 0.47146015319474394, 0.4575276067244457], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503, 0.5082676163825999, 0.5250572373441872, 0.5174255914525566, 0.5080980242516747], 'f1_rnf



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264, 0.4755363351140507], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403, 0.4787090377652946, 0.4716492805185942, 0.4568889319230416, 0.47387466697398645], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595, 0.47995485303641905, 0.47146015319474394, 0.4575276067244457, 0.4748941176789549], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264, 0.4755363351140507], 'accuracy_rnf': [0.5061885384876229, 0.5126314004747372, 0.5125466259749067, 0.5138182434723635, 0.5357748389284503, 0.50826



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264, 0.4755363351140507, 0.47485796659035023], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403, 0.4787090377652946, 0.4716492805185942, 0.4568889319230416, 0.47387466697398645, 0.47341286589500164], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595, 0.47995485303641905, 0.47146015319474394, 0.4575276067244457, 0.4748941176789549, 0.47341048244597794], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264, 0.4755363351140507, 0.47485796659035023], 'accuracy_rnf': [0.5061885384876229, 0.5



{'accuracy_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264, 0.4755363351140507, 0.47485796659035023, 0.4710421436445349], 'f1_knn': [0.4687451321192662, 0.4654909414970245, 0.45268804643984734, 0.4697956050871611, 0.48067307781873475, 0.4581686903710403, 0.4787090377652946, 0.4716492805185942, 0.4568889319230416, 0.47387466697398645, 0.47341286589500164, 0.46867151248101147], 'precision_knn': [0.4688646963011701, 0.4656355315378022, 0.45240126301798467, 0.47074385051218337, 0.4806068840268915, 0.458338996743595, 0.47995485303641905, 0.47146015319474394, 0.4575276067244457, 0.4748941176789549, 0.47341048244597794, 0.46864114731018863], 'recall_knn': [0.4710918955578162, 0.46719226856561547, 0.4532892505934215, 0.4729569345540861, 0.48262122753475756, 0.4605274315271771, 0.4816416518273552, 0.4730772492156364, 0.4595098787416264, 0.4755363351140507, 

In [39]:
report_ucr_down = pd.DataFrame(report_ucr_down)
print(report_ucr_down)

    accuracy_baggin  accuracy_knn  accuracy_pas  accuracy_rnf  accuracy_sgd  \
0          0.443625      0.471092      0.358427      0.506189      0.398779   
1          0.430231      0.467192      0.394286      0.512631      0.377077   
2          0.448372      0.453289      0.407935      0.512547      0.373008   
3          0.438538      0.472957      0.339946      0.513818      0.345202   
4          0.468718      0.482621      0.384876      0.535775      0.411919   
5          0.443059      0.460527      0.364708      0.508268      0.372848   
6          0.463834      0.481642      0.380819      0.525057      0.380395   
7          0.460867      0.473077      0.387857      0.517426      0.395065   
8          0.440431      0.459510      0.334520      0.508098      0.365386   
9          0.453405      0.475536      0.340880      0.521157      0.392436   
10         0.458916      0.474858      0.375307      0.514458      0.438565   
11         0.455016      0.471042      0.379208     

In [40]:
 with open('undersampling_reports.pickle', 'wb') as b:
            pickle.dump([report_offensegroup_up, report_ucr_up, report_offensegroup_down, report_ucr_down ] , b)

# Calculating averages and selecting best model for demo:

In [41]:
score_list = [report_offensegroup_up, report_offensegroup_down,report_ucr_up, report_ucr_down ]

for i in score_list:
    
    print(i.mean())
    print('------------------------\n')

accuracy_baggin     0.629946
accuracy_knn        0.664326
accuracy_pas        0.539137
accuracy_rnf        0.792144
accuracy_sgd        0.598143
f1_baggin           0.630570
f1_knn              0.679606
f1_pas              0.536170
f1_rnf              0.747655
f1_sgd              0.586036
precision_baggin    0.629719
precision_knn       0.650003
precision_pas       0.548957
precision_rnf       0.902748
precision_sgd       0.604442
recall_baggin       0.632249
recall_knn          0.712156
recall_pas          0.550291
recall_rnf          0.651299
recall_sgd          0.569452
dtype: float64
------------------------

accuracy_baggin     0.630212
accuracy_knn        0.639825
accuracy_pas        0.501400
accuracy_rnf        0.662017
accuracy_sgd        0.587563
f1_baggin           0.631583
f1_knn              0.641548
f1_pas              0.452285
f1_rnf              0.662562
f1_sgd              0.563635
precision_baggin    0.629664
precision_knn       0.638448
precision_pas       0.504541
pr

# The best scores are:

for offensegroup : knn, random forest and bagging and with upsampling
    
for ucr-rank3 clasess : Knn, random forest and bagging with upsampling
    

    


Classification reports: See the demostration notebook

# Classification reports: (from the last Kfold)

In [25]:
reports = [ 'offensegroup_upsampled_report.pickle', 'offensegroup_downsampled_report.pickle',
           'ucrrank_upsampled_report.pickle', 'ucrrank_downsampled_report.pickle']
classification_matrix = []



with open('offensegroup_upsampled_report.pickle', 'rb') as upsampled_model_offensegroup:
    
        estimator_upsampled_offensegroup = pickle.load(upsampled_model_offensegroup)
        
with open('ucrrank_upsampled_report.pickle', 'rb') as upsampled_model_ucrrank:
    
        estimator_upsampled_ucrrank = pickle.load(upsampled_model_ucrrank)
        

with open('offensegroup_downsampled_report.pickle', 'rb') as downsampled_model_offensegroup:
    
        estimator_downsampled_offensegroup = pickle.load(downsampled_model_offensegroup)
        
with open('ucrrank_downsampled_report.pickle', 'rb') as downsampled_model_ucrrank:
    
        estimator_downsampled_ucrrank = pickle.load(downsampled_model_ucrrank)
        

list order: [predicted_knn, predicted_rnf, predicted_sgd, predicted_baggin, predicted_pas, report_knn, report_rnf, report_sgd, report_baggin, report_pas] 

In [27]:
print("Classification Report for Upsampled Knn on offensegroup: \n\n"+estimator_upsampled_offensegroup[5])
print("==============================================================================")
print("Classification Report for Upsampled RandomForest on offensegroup: \n\n"+estimator_upsampled_offensegroup[6])
print("==============================================================================")
# 0 Nonviolent, 1 is violent.
print("Classification Report for Upsampled BaggingClassifier on offensegroup: \n\n"+estimator_upsampled_offensegroup[8])
print("==============================================================================")
print("Classification Report for Upsampled Knn on ucrrank: \n\n"+estimator_upsampled_ucrrank[5])
print("==============================================================================")
print("Classification Report for Upsampled RandomForest on ucrrank: \n\n"+estimator_upsampled_ucrrank[6])
print("==============================================================================")
# 0 Nonviolent, 1 is violent.
print("Classification Report for Upsampled BaggingClassifier on ucrrank: \n\n"+estimator_upsampled_ucrrank[8])

Classification Report for Upsampled Knn on offensegroup: 

             precision    recall  f1-score   support

          0       0.68      0.63      0.65     19218
          1       0.66      0.70      0.68     19218

avg / total       0.67      0.67      0.67     38436

Classification Report for Upsampled RandomForest on offensegroup: 

             precision    recall  f1-score   support

          0       0.64      0.94      0.76     19218
          1       0.89      0.48      0.62     19218

avg / total       0.77      0.71      0.69     38436

Classification Report for Upsampled BaggingClassifier on offensegroup: 

             precision    recall  f1-score   support

          0       0.63      0.64      0.63     19218
          1       0.63      0.62      0.63     19218

avg / total       0.63      0.63      0.63     38436

Classification Report for Upsampled Knn on ucrrank: 

             precision    recall  f1-score   support

          1       0.49      0.44      0.46     

In [30]:
# extra prediction. judith:

In [28]:
print("Classification Report for downsampled Knn on offensegroup: \n\n"+estimator_downsampled_offensegroup[5])
print("==============================================================================")
print("Classification Report for downsampled RandomForest on offensegroup: \n\n"+estimator_downsampled_offensegroup[6])
print("==============================================================================")
# 0 Nonviolent, 1 is violent.
print("Classification Report for downsampled BaggingClassifier on offensegroup: \n\n"+estimator_downsampled_offensegroup[8])
print("==============================================================================")
print("Classification Report for downsampled Knn on ucrrank: \n\n"+estimator_downsampled_ucrrank[5])
print("==============================================================================")
print("Classification Report for downsampled RandomForest on ucrrank: \n\n"+estimator_downsampled_ucrrank[6])
print("==============================================================================")
# 0 Nonviolent, 1 is violent.
print("Classification Report for downsampled BaggingClassifier on ucrrank: \n\n"+estimator_downsampled_ucrrank[8])

Classification Report for downsampled Knn on offensegroup: 

             precision    recall  f1-score   support

          0       0.64      0.64      0.64      3931
          1       0.64      0.65      0.64      3931

avg / total       0.64      0.64      0.64      7862

Classification Report for downsampled RandomForest on offensegroup: 

             precision    recall  f1-score   support

          0       0.66      0.66      0.66      3931
          1       0.66      0.66      0.66      3931

avg / total       0.66      0.66      0.66      7862

Classification Report for downsampled BaggingClassifier on offensegroup: 

             precision    recall  f1-score   support

          0       0.63      0.64      0.64      3931
          1       0.64      0.63      0.63      3931

avg / total       0.63      0.63      0.63      7862

Classification Report for downsampled Knn on ucrrank: 

             precision    recall  f1-score   support

          1       0.48      0.49      0