In [1]:
import tkinter
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import metrics
from sklearn import datasets
import pandas as pd
import numpy as np
import pandas_profiling as pp
import re

## Load and Clean Data

In [2]:
# Read in CSV
df = pd.read_csv("final_project.csv")
print(df.shape)

(160000, 51)


In [3]:
# Rename obvious columns
df.rename(columns={'x24': 'continent', 'x29': 'month', 'x30': 'day'}, inplace = True)
#list(df.columns) 

In [4]:
# Correct misspellings and standardize values in labeled columns
df['continent'].replace('euorpe', 'europe',inplace=True)
df['month'].replace('Dev', 'December',inplace=True)
df['month'].replace('Aug', 'August',inplace=True)
df['month'].replace('Jun', 'June',inplace=True)
df['month'].replace('Apr', 'April',inplace=True)
df['month'].replace('Nov', 'November',inplace=True)
df['month'].replace('sept.', 'September',inplace=True)
df['month'].replace('Oct', 'October',inplace=True)
df['month'].replace('Mar', 'March',inplace=True)
df['day'].replace('thurday', 'thursday',inplace=True)

# Fill NA with 'other' in labeled columns
df['continent'] = df['continent'].fillna('other')
df['month'] = df['month'].fillna('other')
df['day'] = df['day'].fillna('other')

# check unique values in labeled columns
print (df['continent'].unique())
print (df['month'].unique())
print (df['day'].unique())

['europe' 'asia' 'america' 'other']
['July' 'August' 'June' 'May' 'September' 'April' 'November' 'October'
 'other' 'March' 'Feb' 'December' 'January']
['tuesday' 'wednesday' 'thursday' 'monday' 'friday' 'other']


In [5]:
# Correct misspellings and standardize values in labeled columns
df['continent'].replace('euorpe', 'europe',inplace=True)
df['month'].replace('Dev', '12',inplace=True)
df['month'].replace('Aug', '8',inplace=True)
df['month'].replace('Jun', '6',inplace=True)
df['month'].replace('Apr', '4',inplace=True)
df['month'].replace('Nov', '11',inplace=True)
df['month'].replace('sept.', '9',inplace=True)
df['month'].replace('Oct', '10',inplace=True)
df['month'].replace('Mar', '3',inplace=True)
df['month'].replace('January', '1',inplace=True)
df['month'].replace('Feb', '2',inplace=True)
df['month'].replace('May', '5',inplace=True)
df['month'].replace('July', '7',inplace=True)
df['month'].replace('December', '12',inplace=True)
df['month'].replace('August', '8',inplace=True)
df['month'].replace('June', '6',inplace=True)
df['month'].replace('April', '4',inplace=True)
df['month'].replace('November', '11',inplace=True)
df['month'].replace('September', '9',inplace=True)
df['month'].replace('October', '10',inplace=True)
df['month'].replace('March', '3',inplace=True)
df['day'].replace('thurday', 'thursday',inplace=True)


# Fill NA with 'other' in labeled columns
df['continent'] = df['continent'].fillna('other')
df['month'] = df['month'].fillna('other')
df['day'] = df['day'].fillna('other')
df['month'].replace('other','0', inplace=True)

# check unique values in labeled columns
print (df['continent'].unique())
print (df['month'].unique())
print (df['day'].unique())

['europe' 'asia' 'america' 'other']
['7' '8' '6' '5' '9' '4' '11' '10' '0' '3' '2' '12' '1']
['tuesday' 'wednesday' 'thursday' 'monday' 'friday' 'other']


In [6]:
# initialize temp for x37 column
temp_x37 = []

# Remove $ ) , characters and replace '(' with '-' 
for i in range (0,len(df)) :
    try :
        n = df['x37'][i]
        nstr = re.sub(r'[$|,|)]',r'', n)
        nstr = re.sub(r'[(]',r'-',nstr)
        #nstr= float(nstr)
        temp_x37.append(nstr)
    except :
        nstr = ''
        temp_x37.append(nstr)

In [7]:
# Verify len of both x37 matches
print(len(df['x37']))
print(len(temp_x37))

# Replace 'x37' with new values and convert to numeric
df['x37'] = temp_x37
df["x37"] = pd.to_numeric(df["x37"])
df['x37']

160000
160000


0         1313.96
1         1962.78
2          430.47
3        -2366.29
4         -620.66
           ...   
159995    -891.96
159996    1588.65
159997     687.46
159998     439.21
159999   -1229.34
Name: x37, Length: 160000, dtype: float64

In [8]:
# Subsetting data set by continent and print length of each
cont = ['asia', 'america','europe', 'other']

for n in cont :
    temp = df['continent'] == n
    df_temp = df[temp]
    #df_[n] = df_temp
    print (n, 'length is', len(df_temp))

# Subsetting by continent    
is_asia = df['continent']=='asia'
df_asia = df[is_asia]

is_europe = df['continent']=='europe'
df_europe = df[is_europe]

is_america = df['continent']=='america'
df_america = df[is_america]

is_other = df['continent']=='other'
df_other = df[is_other]

asia length is 138965
america length is 4469
europe length is 16538
other length is 28


## Explore Data

In [9]:
# Simple and fast exploratory data analysis 
pp.ProfileReport(df_america)

  variable_stats = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)


0,1
Number of variables,52
Number of observations,4469
Total Missing (%),0.0%
Total size in memory,1.8 MiB
Average record size in memory,416.0 B

0,1
Numeric,45
Categorical,3
Boolean,1
Date,0
Text (Unique),0
Rejected,3
Unsupported,0

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,80692
Minimum,11
Maximum,159992
Zeros (%),0.0%

0,1
Minimum,11.0
5-th percentile,7889.6
Q1,40450.0
Median,80882.0
Q3,121250.0
95-th percentile,151870.0
Maximum,159992.0
Range,159981.0
Interquartile range,80804.0

0,1
Standard deviation,46306
Coef of variation,0.57386
Kurtosis,-1.221
Mean,80692
MAD,40280
Skewness,-0.024556
Sum,360614096
Variance,2144200000
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
81916,1,0.0%,
47754,1,0.0%,
68224,1,0.0%,
123521,1,0.0%,
115197,1,0.0%,
19076,1,0.0%,
113286,1,0.0%,
12935,1,0.0%,
119435,1,0.0%,
24674,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
11,1,0.0%,
30,1,0.0%,
39,1,0.0%,
69,1,0.0%,
93,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
159901,1,0.0%,
159912,1,0.0%,
159920,1,0.0%,
159987,1,0.0%,
159992,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.0011259
Minimum,-1.4799
Maximum,1.4695
Zeros (%),0.0%

0,1
Minimum,-1.4799
5-th percentile,-0.61205
Q1,-0.25995
Median,0.0016202
Q3,0.2546
95-th percentile,0.60987
Maximum,1.4695
Range,2.9494
Interquartile range,0.51455

0,1
Standard deviation,0.37348
Coef of variation,-331.7
Kurtosis,-0.095404
Mean,-0.0011259
MAD,0.30019
Skewness,0.0063479
Sum,-5.0307
Variance,0.13948
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-0.057825872559253214,1,0.0%,
-0.12083968126419445,1,0.0%,
0.3631663378270901,1,0.0%,
-0.16002392994808515,1,0.0%,
0.11704868535375786,1,0.0%,
-0.029843646205759806,1,0.0%,
0.2000907816706446,1,0.0%,
0.9393589529946952,1,0.0%,
0.10884046228200664,1,0.0%,
0.22348331387325426,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-1.479927324820506,1,0.0%,
-1.308940171853241,1,0.0%,
-1.2203676249592703,1,0.0%,
-1.1662523583455482,1,0.0%,
-1.135280098338538,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1.1598133469471033,1,0.0%,
1.250157018593822,1,0.0%,
1.2668684740207026,1,0.0%,
1.3621380505513272,1,0.0%,
1.469505160621286,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.045134
Minimum,-22.261
Maximum,21.876
Zeros (%),0.0%

0,1
Minimum,-22.261
5-th percentile,-10.483
Q1,-4.391
Median,-0.09152
Q3,4.1298
95-th percentile,10.629
Maximum,21.876
Range,44.137
Interquartile range,8.5208

0,1
Standard deviation,6.3591
Coef of variation,-140.89
Kurtosis,0.0040874
Mean,-0.045134
MAD,5.059
Skewness,0.0060594
Sum,-201.71
Variance,40.439
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-12.113527565857536,1,0.0%,
-7.6481104578237,1,0.0%,
1.7252467571408219,1,0.0%,
-0.2692326681571991,1,0.0%,
-1.1763316146272849,1,0.0%,
1.9299941470287063,1,0.0%,
3.4553142368068164,1,0.0%,
2.0962127597377886,1,0.0%,
3.3836244003754934,1,0.0%,
6.814520683297565,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-22.26074144865308,1,0.0%,
-22.22580987788904,1,0.0%,
-20.88317176882016,1,0.0%,
-20.58886901596813,1,0.0%,
-20.233787630773424,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
18.5025585255357,1,0.0%,
18.68135847280788,1,0.0%,
18.73701997769821,1,0.0%,
21.5185830985432,1,0.0%,
21.87625842559403,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.48552
Minimum,-50.56
Maximum,49.259
Zeros (%),0.0%

0,1
Minimum,-50.56
5-th percentile,-20.557
Q1,-7.9238
Median,0.51785
Q3,9.3742
95-th percentile,21.094
Maximum,49.259
Range,99.819
Interquartile range,17.298

0,1
Standard deviation,12.907
Coef of variation,26.585
Kurtosis,0.22895
Mean,0.48552
MAD,10.239
Skewness,-0.11809
Sum,2169.8
Variance,166.6
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
25.70469269947881,1,0.0%,
3.7891137072323082,1,0.0%,
-15.70421672730438,1,0.0%,
-17.92293786269011,1,0.0%,
25.81606837829421,1,0.0%,
8.127096522851517,1,0.0%,
11.455684037135056,1,0.0%,
-1.4160410459061712,1,0.0%,
1.5137499520448598,1,0.0%,
16.927624072074213,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-50.56016834423183,1,0.0%,
-50.36327729364695,1,0.0%,
-48.6434927907919,1,0.0%,
-46.19341042180216,1,0.0%,
-43.06075730418448,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
37.87190812897983,1,0.0%,
41.8253044966414,1,0.0%,
42.359017645195365,1,0.0%,
47.39042900018739,1,0.0%,
49.25927735376687,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.21077
Minimum,-33.865
Maximum,29.078
Zeros (%),0.0%

0,1
Minimum,-33.865
5-th percentile,-13.367
Q1,-5.742
Median,-0.092809
Q3,5.4121
95-th percentile,12.741
Maximum,29.078
Range,62.943
Interquartile range,11.154

0,1
Standard deviation,8.092
Coef of variation,-38.393
Kurtosis,0.037261
Mean,-0.21077
MAD,6.4633
Skewness,-0.027111
Sum,-941.72
Variance,65.48
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
5.217910981142366,1,0.0%,
10.828260414367161,1,0.0%,
0.5750027291758699,1,0.0%,
2.4497869525109848,1,0.0%,
7.964575295842072,1,0.0%,
1.5240573049293178,1,0.0%,
2.159707072207002,1,0.0%,
4.139054265636728,1,0.0%,
-4.487735475037691,1,0.0%,
-5.47532950836148,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-33.86482705623085,1,0.0%,
-28.32358396484668,1,0.0%,
-28.210547613616363,1,0.0%,
-27.635574094085136,1,0.0%,
-25.52826196589439,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
25.99975245140345,1,0.0%,
26.4435851231186,1,0.0%,
26.836089116123308,1,0.0%,
26.875774353592483,1,0.0%,
29.07819356708271,1,0.0%,

0,1
Distinct count,4467
Unique (%),100.0%
Missing (%),0.1%
Missing (n),3
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.15582
Minimum,-22.071
Maximum,22.972
Zeros (%),0.0%

0,1
Minimum,-22.071
5-th percentile,-10.428
Q1,-4.299
Median,0.22065
Q3,4.5052
95-th percentile,10.607
Maximum,22.972
Range,45.043
Interquartile range,8.8042

0,1
Standard deviation,6.4016
Coef of variation,41.083
Kurtosis,-0.045178
Mean,0.15582
MAD,5.1149
Skewness,0.0080131
Sum,695.9
Variance,40.981
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-5.8622610812068405,1,0.0%,
3.960285287944069,1,0.0%,
-11.693648663626115,1,0.0%,
2.343155048686268,1,0.0%,
-8.110083995186269,1,0.0%,
7.7305717531736615,1,0.0%,
-3.792110776902701,1,0.0%,
-7.28663336686937,1,0.0%,
6.4306734609140275,1,0.0%,
-17.17908019406292,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-22.07064764334438,1,0.0%,
-21.79928873217019,1,0.0%,
-20.572153325778967,1,0.0%,
-19.74270561639045,1,0.0%,
-19.28524664071549,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
19.89209581308152,1,0.0%,
19.98130778235576,1,0.0%,
21.506094148265767,1,0.0%,
22.23635321029156,1,0.0%,
22.972433446849426,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.089797
Minimum,-29.068
Maximum,28.053
Zeros (%),0.0%

0,1
Minimum,-29.068
5-th percentile,-13.072
Q1,-5.2331
Median,-0.21045
Q3,5.2261
95-th percentile,12.947
Maximum,28.053
Range,57.121
Interquartile range,10.459

0,1
Standard deviation,7.8566
Coef of variation,-87.492
Kurtosis,-0.020625
Mean,-0.089797
MAD,6.2631
Skewness,-0.0060831
Sum,-401.21
Variance,61.726
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-4.5513022682177215,1,0.0%,
-8.657916020010335,1,0.0%,
-10.129011896291363,1,0.0%,
-13.26851584683671,1,0.0%,
7.688712913003206,1,0.0%,
-10.614400248810767,1,0.0%,
-13.419193337155106,1,0.0%,
-2.766459369813969,1,0.0%,
-1.877565684244182,1,0.0%,
-4.821896554287823,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-29.06837329246414,1,0.0%,
-26.217484700340208,1,0.0%,
-25.490972448814063,1,0.0%,
-25.223511465218635,1,0.0%,
-23.72258858554004,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
23.58836128634839,1,0.0%,
24.54602387849461,1,0.0%,
25.148455276279503,1,0.0%,
25.644343748114625,1,0.0%,
28.052835218154023,1,0.0%,

0,1
Correlation,1

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,16.86
Minimum,-98.747
Maximum,130.99
Zeros (%),0.0%

0,1
Minimum,-98.747
5-th percentile,-34.438
Q1,-2.7079
Median,17.242
Q3,37.129
95-th percentile,65.9
Maximum,130.99
Range,229.74
Interquartile range,39.837

0,1
Standard deviation,30.34
Coef of variation,1.7995
Kurtosis,0.27659
Mean,16.86
MAD,23.888
Skewness,-0.090203
Sum,75330
Variance,920.52
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
52.40534143765045,1,0.0%,
29.29422963047417,1,0.0%,
-6.322979565940579,1,0.0%,
45.58760393121211,1,0.0%,
11.573897152040553,1,0.0%,
-1.7535926448332992,1,0.0%,
36.737700394321884,1,0.0%,
11.514521566071114,1,0.0%,
4.878837112541796,1,0.0%,
-38.544103573877024,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-98.7472338814783,1,0.0%,
-98.1288083545044,1,0.0%,
-94.71936338105118,1,0.0%,
-89.07251723306439,1,0.0%,
-88.464237522865,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
112.57661358816836,1,0.0%,
113.0648486363908,1,0.0%,
117.11163880498812,1,0.0%,
129.91875516336086,1,0.0%,
130.98991016568792,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.029826
Minimum,-32.801
Maximum,28.608
Zeros (%),0.0%

0,1
Minimum,-32.801
5-th percentile,-14.862
Q1,-5.9616
Median,-0.048961
Q3,6.1108
95-th percentile,14.865
Maximum,28.608
Range,61.409
Interquartile range,12.072

0,1
Standard deviation,8.9954
Coef of variation,301.59
Kurtosis,-0.00086557
Mean,0.029826
MAD,7.174
Skewness,-0.034713
Sum,133.23
Variance,80.917
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-1.7328206231170031,1,0.0%,
-8.277942039355175,1,0.0%,
-9.423148271256183,1,0.0%,
-10.926642180143778,1,0.0%,
-9.827960242972654,1,0.0%,
8.934756695120988,1,0.0%,
-5.269642586903559,1,0.0%,
0.7035174724806882,1,0.0%,
-13.244458336636006,1,0.0%,
18.12168494253748,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-32.800532912430214,1,0.0%,
-29.389953845335757,1,0.0%,
-29.294782259332965,1,0.0%,
-28.335129604758887,1,0.0%,
-27.39386109356152,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
25.83288525391406,1,0.0%,
26.44824580828925,1,0.0%,
26.63186731376036,1,0.0%,
27.90567658407264,1,0.0%,
28.608463807913928,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.024181
Minimum,-20.659
Maximum,26.138
Zeros (%),0.0%

0,1
Minimum,-20.659
5-th percentile,-10.355
Q1,-4.263
Median,0.022617
Q3,4.2343
95-th percentile,10.372
Maximum,26.138
Range,46.797
Interquartile range,8.4973

0,1
Standard deviation,6.2644
Coef of variation,259.06
Kurtosis,0.0059922
Mean,0.024181
MAD,4.9926
Skewness,0.04764
Sum,108.06
Variance,39.243
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-2.8621648603890595,1,0.0%,
-7.261046066467135,1,0.0%,
6.500859939537567,1,0.0%,
1.52870483464223,1,0.0%,
-1.8697786249904755,1,0.0%,
0.44644693660512413,1,0.0%,
-3.05674854518754,1,0.0%,
0.5539905271210673,1,0.0%,
-4.928341304211387,1,0.0%,
-2.35602599251502,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-20.658569063288912,1,0.0%,
-20.481424996491533,1,0.0%,
-20.0028074654825,1,0.0%,
-19.269322443178822,1,0.0%,
-18.83272646204006,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
19.75113367577514,1,0.0%,
19.785859422228743,1,0.0%,
20.193179697678943,1,0.0%,
20.42992257102105,1,0.0%,
26.1382764799984,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.069416
Minimum,-31.87
Maximum,30.603
Zeros (%),0.0%

0,1
Minimum,-31.87
5-th percentile,-12.985
Q1,-5.4861
Median,-0.032922
Q3,5.4115
95-th percentile,12.688
Maximum,30.603
Range,62.473
Interquartile range,10.898

0,1
Standard deviation,7.9284
Coef of variation,-114.22
Kurtosis,-0.018637
Mean,-0.069416
MAD,6.3376
Skewness,-0.032119
Sum,-310.15
Variance,62.859
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-0.05259592100066167,1,0.0%,
2.287912273297035,1,0.0%,
9.985691717956152,1,0.0%,
-12.139593448217724,1,0.0%,
-6.016268053903916,1,0.0%,
4.647643852746413,1,0.0%,
-12.514741234012515,1,0.0%,
9.4800369170275,1,0.0%,
2.1471250348036417,1,0.0%,
-0.2204986370753552,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-31.87039233755074,1,0.0%,
-29.3427080856351,1,0.0%,
-28.284163599657283,1,0.0%,
-27.66968626425821,1,0.0%,
-25.87189582902308,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
22.6142900424927,1,0.0%,
24.30680563762693,1,0.0%,
28.23889348094098,1,0.0%,
28.33332929237465,1,0.0%,
30.6027872968988,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.042828
Minimum,-30.24
Maximum,29.509
Zeros (%),0.0%

0,1
Minimum,-30.24
5-th percentile,-14.422
Q1,-5.9468
Median,-0.1585
Q3,5.8745
95-th percentile,14.599
Maximum,29.509
Range,59.749
Interquartile range,11.821

0,1
Standard deviation,8.8166
Coef of variation,-205.86
Kurtosis,-0.041036
Mean,-0.042828
MAD,7.0031
Skewness,0.015766
Sum,-191.36
Variance,77.733
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-5.497349499203386,1,0.0%,
6.027478968914782,1,0.0%,
18.770938164067744,1,0.0%,
-8.258461817801813,1,0.0%,
3.1880934714321207,1,0.0%,
-15.694137296793935,1,0.0%,
2.2053815798117555,1,0.0%,
-7.353961121514495,1,0.0%,
-2.356443580690987,1,0.0%,
15.103539784453911,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-30.239911173885694,1,0.0%,
-30.027554762883053,1,0.0%,
-27.5762244742663,1,0.0%,
-27.28829045987884,1,0.0%,
-27.04597569449873,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
27.30113668029614,1,0.0%,
27.349739075459667,1,0.0%,
28.538178449930435,1,0.0%,
28.90509975865337,1,0.0%,
29.508797451690093,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-16.089
Minimum,-60.553
Maximum,30.12
Zeros (%),0.0%

0,1
Minimum,-60.553
5-th percentile,-35.898
Q1,-23.96
Median,-16.096
Q3,-8.1718
95-th percentile,3.7743
Maximum,30.12
Range,90.673
Interquartile range,15.789

0,1
Standard deviation,12.104
Coef of variation,-0.75236
Kurtosis,0.13506
Mean,-16.089
MAD,9.5904
Skewness,-0.01111
Sum,-71868
Variance,146.52
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-7.979833841281475,1,0.0%,
-15.47824760977928,1,0.0%,
-17.68577428711206,1,0.0%,
-30.104305574837774,1,0.0%,
-7.6716489154508265,1,0.0%,
-14.48651214823925,1,0.0%,
-16.55306351317601,1,0.0%,
-23.14585302885029,1,0.0%,
-26.271631765718194,1,0.0%,
-4.32647331918942,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-60.55287388818915,1,0.0%,
-59.603342099714496,1,0.0%,
-58.22914552801427,1,0.0%,
-55.5092627681605,1,0.0%,
-54.902522716585466,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
22.452338754563733,1,0.0%,
22.75297866647705,1,0.0%,
24.09642540128832,1,0.0%,
26.750100010585918,1,0.0%,
30.11995426225397,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.29833
Minimum,-30.026
Maximum,28.905
Zeros (%),0.0%

0,1
Minimum,-30.026
5-th percentile,-15.014
Q1,-6.3393
Median,-0.31421
Q3,5.7777
95-th percentile,13.97
Maximum,28.905
Range,58.931
Interquartile range,12.117

0,1
Standard deviation,8.9049
Coef of variation,-29.849
Kurtosis,-0.071407
Mean,-0.29833
MAD,7.1149
Skewness,-0.034019
Sum,-1332.6
Variance,79.296
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-7.525704331801447,1,0.0%,
14.779856212348495,1,0.0%,
-12.280157999217904,1,0.0%,
8.302966082744772,1,0.0%,
18.35016285380551,1,0.0%,
-4.853009619089292,1,0.0%,
-5.657237349196397,1,0.0%,
-4.821645937959626,1,0.0%,
-0.7682362299710928,1,0.0%,
4.1195925594946745,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-30.026195540759268,1,0.0%,
-29.32842737109933,1,0.0%,
-28.98159276624745,1,0.0%,
-27.09411766802777,1,0.0%,
-26.925786839609216,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
26.22315028275304,1,0.0%,
27.71549181235019,1,0.0%,
27.927849143071548,1,0.0%,
28.34402248107707,1,0.0%,
28.904720200239076,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.082059
Minimum,-26.696
Maximum,25.846
Zeros (%),0.0%

0,1
Minimum,-26.696
5-th percentile,-11.144
Q1,-4.6837
Median,-0.18588
Q3,4.5264
95-th percentile,11.39
Maximum,25.846
Range,52.541
Interquartile range,9.2101

0,1
Standard deviation,6.8695
Coef of variation,-83.714
Kurtosis,0.070122
Mean,-0.082059
MAD,5.4696
Skewness,0.010106
Sum,-366.72
Variance,47.19
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-6.615966370803737,1,0.0%,
11.312514094244035,1,0.0%,
9.53929367621109,1,0.0%,
-2.428580504967216,1,0.0%,
-3.637900053332896,1,0.0%,
6.9993782166543035,1,0.0%,
1.3133003234340648,1,0.0%,
-7.7307215113866565,1,0.0%,
-4.552276482070889,1,0.0%,
5.144484619812721,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-26.69553161198739,1,0.0%,
-25.29663041253025,1,0.0%,
-24.42855278260763,1,0.0%,
-23.969896257471238,1,0.0%,
-23.270777303493237,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
20.15515909119704,1,0.0%,
20.840043680483756,1,0.0%,
21.610877588323213,1,0.0%,
25.631217707712665,1,0.0%,
25.845626443,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.016464
Minimum,-15.011
Maximum,11.977
Zeros (%),0.0%

0,1
Minimum,-15.011
5-th percentile,-5.4399
Q1,-2.2139
Median,0.0096659
Q3,2.2567
95-th percentile,5.3598
Maximum,11.977
Range,26.988
Interquartile range,4.4706

0,1
Standard deviation,3.305
Coef of variation,200.74
Kurtosis,0.068021
Mean,0.016464
MAD,2.6416
Skewness,-0.043649
Sum,73.578
Variance,10.923
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-3.4473704754776633,1,0.0%,
-0.791109893119567,1,0.0%,
2.633863807239732,1,0.0%,
3.627310466871772,1,0.0%,
-1.750308993719624,1,0.0%,
-0.780257038777833,1,0.0%,
0.649166063166016,1,0.0%,
-3.8939648907646607,1,0.0%,
4.160278080618944,1,0.0%,
1.2547863614842856,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-15.010778810533994,1,0.0%,
-11.589709853823598,1,0.0%,
-10.933205766676886,1,0.0%,
-10.586983515487518,1,0.0%,
-10.563531847937838,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
10.401336872258714,1,0.0%,
10.816199473975365,1,0.0%,
11.098724823534008,1,0.0%,
11.177761591443009,1,0.0%,
11.977350609227775,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.019296
Minimum,-17.552
Maximum,16.293
Zeros (%),0.0%

0,1
Minimum,-17.552
5-th percentile,-8.2498
Q1,-3.3019
Median,0.0055167
Q3,3.3009
95-th percentile,7.9296
Maximum,16.293
Range,33.845
Interquartile range,6.6028

0,1
Standard deviation,4.9308
Coef of variation,-255.53
Kurtosis,-0.006487
Mean,-0.019296
MAD,3.9226
Skewness,-0.032316
Sum,-86.235
Variance,24.312
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
2.706718904759818,1,0.0%,
-1.916034965715788,1,0.0%,
1.394562392206704,1,0.0%,
-0.4374868042057969,1,0.0%,
0.9948582253624378,1,0.0%,
6.766144107049088,1,0.0%,
-6.177834854707133,1,0.0%,
1.5237552774307692,1,0.0%,
1.9980680266684416,1,0.0%,
7.305586512167348,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-17.55229629445283,1,0.0%,
-16.64791829403807,1,0.0%,
-15.402466183528665,1,0.0%,
-14.989851816954369,1,0.0%,
-14.593763000769306,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
14.276115107794,1,0.0%,
14.505205200339695,1,0.0%,
14.579497512178436,1,0.0%,
15.310141061754644,1,0.0%,
16.293201588922454,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.021966
Minimum,-25.391
Maximum,28.481
Zeros (%),0.0%

0,1
Minimum,-25.391
5-th percentile,-12.63
Q1,-4.9331
Median,0.16187
Q3,4.9232
95-th percentile,12.396
Maximum,28.481
Range,53.873
Interquartile range,9.8564

0,1
Standard deviation,7.5505
Coef of variation,343.73
Kurtosis,0.011029
Mean,0.021966
MAD,5.9905
Skewness,-0.022506
Sum,98.144
Variance,57.01
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
5.6021579734014475,1,0.0%,
-2.8999165892104286,1,0.0%,
9.636193698861623,1,0.0%,
6.8719776815717015,1,0.0%,
2.1331265574848253,1,0.0%,
4.1302011882325385,1,0.0%,
-6.175745299390532,1,0.0%,
4.5256723030760595,1,0.0%,
-6.899211238787519,1,0.0%,
-4.917656461621584,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-25.391479766872568,1,0.0%,
-23.744404109160943,1,0.0%,
-23.45728892641725,1,0.0%,
-23.365312225061665,1,0.0%,
-23.014943926079248,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
23.11009714435829,1,0.0%,
25.37883249340028,1,0.0%,
25.86795196365034,1,0.0%,
26.59397347362891,1,0.0%,
28.48147040887629,1,0.0%,

0,1
Distinct count,4467
Unique (%),100.0%
Missing (%),0.1%
Missing (n),3
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.015185
Minimum,-18.79
Maximum,16.993
Zeros (%),0.0%

0,1
Minimum,-18.79
5-th percentile,-7.335
Q1,-2.9584
Median,0.0026455
Q3,2.9783
95-th percentile,7.308
Maximum,16.993
Range,35.783
Interquartile range,5.9367

0,1
Standard deviation,4.469
Coef of variation,-294.31
Kurtosis,0.054835
Mean,-0.015185
MAD,3.5524
Skewness,0.0049746
Sum,-67.815
Variance,19.972
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
3.1322821434988053,1,0.0%,
0.3992965890735611,1,0.0%,
-4.4006046729517765,1,0.0%,
-5.229196028626905,1,0.0%,
2.5818070636652632,1,0.0%,
4.809599053428455,1,0.0%,
-2.023373410915774,1,0.0%,
-5.709890592301653,1,0.0%,
-5.949865908739637,1,0.0%,
-0.7915965549946393,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-18.790240256365674,1,0.0%,
-15.187920054233894,1,0.0%,
-14.44396245788618,1,0.0%,
-14.383998978757454,1,0.0%,
-14.051434193969458,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
14.363128034484689,1,0.0%,
14.635140105606096,1,0.0%,
14.849375962868509,1,0.0%,
15.259967024853358,1,0.0%,
16.993098198881086,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.13577
Minimum,-25.046
Maximum,26.361
Zeros (%),0.0%

0,1
Minimum,-25.046
5-th percentile,-12.418
Q1,-4.8372
Median,0.23095
Q3,5.1639
95-th percentile,12.425
Maximum,26.361
Range,51.408
Interquartile range,10.001

0,1
Standard deviation,7.548
Coef of variation,55.595
Kurtosis,-0.032884
Mean,0.13577
MAD,6.0161
Skewness,-0.014689
Sum,606.74
Variance,56.972
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
3.1749153070801284,1,0.0%,
-8.231100635138294,1,0.0%,
-0.9984283878398906,1,0.0%,
17.189711632878705,1,0.0%,
0.3908254668393089,1,0.0%,
5.355999207081491,1,0.0%,
-5.72665429851253,1,0.0%,
-3.6548581508263553,1,0.0%,
-11.453429400505595,1,0.0%,
-0.024957967374815232,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-25.04635980019599,1,0.0%,
-23.8712919442918,1,0.0%,
-23.620926753509217,1,0.0%,
-23.596215630242604,1,0.0%,
-22.993080298318574,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
23.498188903576512,1,0.0%,
23.862476258790423,1,0.0%,
24.216324416635917,1,0.0%,
24.54702539226413,1,0.0%,
26.361491450866907,1,0.0%,

0,1
Distinct count,4467
Unique (%),100.0%
Missing (%),0.1%
Missing (n),3
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.306
Minimum,-21.331
Maximum,27.815
Zeros (%),0.0%

0,1
Minimum,-21.331
5-th percentile,-7.7973
Q1,-0.9139
Median,3.5171
Q3,7.6567
95-th percentile,13.382
Maximum,27.815
Range,49.145
Interquartile range,8.5706

0,1
Standard deviation,6.3829
Coef of variation,1.9307
Kurtosis,-0.019105
Mean,3.306
MAD,5.1011
Skewness,-0.14964
Sum,14765
Variance,40.741
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-0.7500626895891707,1,0.0%,
3.987388072182671,1,0.0%,
13.168879832752864,1,0.0%,
19.070786335863488,1,0.0%,
-2.7859731103279306,1,0.0%,
7.345923566484381,1,0.0%,
7.0628143170857385,1,0.0%,
-2.2629863774672248,1,0.0%,
6.142198118896494,1,0.0%,
4.04749224995959,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-21.33056387743233,1,0.0%,
-19.67025402476245,1,0.0%,
-18.620075629424,1,0.0%,
-17.130588370011182,1,0.0%,
-16.209936420436126,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
21.40215413371352,1,0.0%,
22.53351949860859,1,0.0%,
22.75746846790808,1,0.0%,
22.97670494308669,1,0.0%,
27.814559709073272,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.30435
Minimum,-34.22
Maximum,31.478
Zeros (%),0.0%

0,1
Minimum,-34.22
5-th percentile,-15.491
Q1,-6.4517
Median,-0.38991
Q3,5.7541
95-th percentile,15.213
Maximum,31.478
Range,65.698
Interquartile range,12.206

0,1
Standard deviation,9.2729
Coef of variation,-30.468
Kurtosis,0.0084096
Mean,-0.30435
MAD,7.3882
Skewness,-0.011057
Sum,-1359.8
Variance,85.987
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-12.509738939191315,1,0.0%,
-4.111186831428038,1,0.0%,
7.623558430583493,1,0.0%,
5.121305939555113,1,0.0%,
-14.128215225607285,1,0.0%,
0.7241749195574324,1,0.0%,
5.596848212432418,1,0.0%,
14.583846976693136,1,0.0%,
-9.165078912840212,1,0.0%,
-15.179920599554288,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-34.22024622567635,1,0.0%,
-31.887553891149643,1,0.0%,
-31.160600126361118,1,0.0%,
-30.62402876406793,1,0.0%,
-28.45732475803375,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
27.255370036758237,1,0.0%,
28.334054436190964,1,0.0%,
29.247701216593853,1,0.0%,
30.935041255270267,1,0.0%,
31.477939872316504,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.057173
Minimum,-19.619
Maximum,21.022
Zeros (%),0.0%

0,1
Minimum,-19.619
5-th percentile,-9.1368
Q1,-3.7547
Median,-0.11721
Q3,3.6957
95-th percentile,9.0318
Maximum,21.022
Range,40.641
Interquartile range,7.4504

0,1
Standard deviation,5.5105
Coef of variation,-96.383
Kurtosis,-0.070026
Mean,-0.057173
MAD,4.413
Skewness,0.013502
Sum,-255.39
Variance,30.366
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-6.797182926188724,1,0.0%,
1.1283245852016968,1,0.0%,
2.2882076033199104,1,0.0%,
-3.241590952714198,1,0.0%,
-6.5015523988173225,1,0.0%,
0.003840580756342975,1,0.0%,
-10.736177579406046,1,0.0%,
9.463204755227721,1,0.0%,
1.1840364273540591,1,0.0%,
0.1537777786799515,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-19.61859346529544,1,0.0%,
-18.83770306139291,1,0.0%,
-17.202341014043093,1,0.0%,
-16.390146921723403,1,0.0%,
-16.385499406915965,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
17.481955205278034,1,0.0%,
17.543668295688793,1,0.0%,
17.563524119077925,1,0.0%,
18.374263312869463,1,0.0%,
21.022108668613075,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-2.433
Minimum,-52.878
Maximum,44.184
Zeros (%),0.0%

0,1
Minimum,-52.878
5-th percentile,-24.132
Q1,-11.011
Median,-2.1736
Q3,6.5228
95-th percentile,17.707
Maximum,44.184
Range,97.062
Interquartile range,17.534

0,1
Standard deviation,12.85
Coef of variation,-5.2816
Kurtosis,0.11596
Mean,-2.433
MAD,10.278
Skewness,-0.16251
Sum,-10870
Variance,165.12
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
4.35042230475769,1,0.0%,
14.225029694014163,1,0.0%,
-1.3573973621501751,1,0.0%,
-21.357863095122735,1,0.0%,
7.0939579739921435,1,0.0%,
28.265127696267392,1,0.0%,
8.069395348792131,1,0.0%,
-0.6153228717872352,1,0.0%,
9.626156223786401,1,0.0%,
-10.17232723272592,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-52.87770068218416,1,0.0%,
-52.53919632675114,1,0.0%,
-48.79824939718164,1,0.0%,
-47.9047436632413,1,0.0%,
-47.78626820931755,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
36.01733292876333,1,0.0%,
39.35964295040412,1,0.0%,
40.11662017758572,1,0.0%,
43.07076627290473,1,0.0%,
44.18414940785128,1,0.0%,

0,1
Constant value,america

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.041566
Minimum,-3.934
Maximum,4.8382
Zeros (%),0.0%

0,1
Minimum,-3.934
5-th percentile,-2.0238
Q1,-0.78111
Median,0.03349
Q3,0.84686
95-th percentile,2.0997
Maximum,4.8382
Range,8.7722
Interquartile range,1.628

0,1
Standard deviation,1.2476
Coef of variation,30.015
Kurtosis,0.1254
Mean,0.041566
MAD,0.98403
Skewness,0.060521
Sum,185.76
Variance,1.5565
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.7828305304537768,1,0.0%,
1.7416244803878642,1,0.0%,
-0.4232679634969272,1,0.0%,
-0.5936006227681525,1,0.0%,
-0.3426075007459539,1,0.0%,
2.878138317300146,1,0.0%,
-2.9855510758321366,1,0.0%,
0.7865501489564894,1,0.0%,
-1.0079195553040972,1,0.0%,
-0.8240942777990179,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-3.9339568964401472,1,0.0%,
-3.7798791598046177,1,0.0%,
-3.7734321971181215,1,0.0%,
-3.6792565678665503,1,0.0%,
-3.642133421018359,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
4.138180167004918,1,0.0%,
4.250995820307094,1,0.0%,
4.293054813418442,1,0.0%,
4.362261502111472,1,0.0%,
4.838207229947174,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.011525
Minimum,-2.6343
Maximum,3.2421
Zeros (%),0.0%

0,1
Minimum,-2.6343
5-th percentile,-1.3464
Q1,-0.55179
Median,0.007749
Q3,0.56738
95-th percentile,1.3721
Maximum,3.2421
Range,5.8763
Interquartile range,1.1192

0,1
Standard deviation,0.82685
Coef of variation,71.742
Kurtosis,-0.062373
Mean,0.011525
MAD,0.66029
Skewness,0.04149
Sum,51.507
Variance,0.68368
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-1.0456126932792007,1,0.0%,
0.08997282496604904,1,0.0%,
-1.1145614105848838,1,0.0%,
-0.9872443450248846,1,0.0%,
0.17865693299994054,1,0.0%,
1.4173583896230506,1,0.0%,
-0.01290880603407512,1,0.0%,
-0.2413156515272242,1,0.0%,
-0.29326836902829384,1,0.0%,
0.6242305734601357,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-2.634265502660414,1,0.0%,
-2.6280488709704533,1,0.0%,
-2.492187370257158,1,0.0%,
-2.4619045969863445,1,0.0%,
-2.388117418897206,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
2.544629465479684,1,0.0%,
2.5490958012551164,1,0.0%,
2.7027337647352723,1,0.0%,
2.723563873794272,1,0.0%,
3.242083086310397,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-4.3437
Minimum,-26.086
Maximum,26.654
Zeros (%),0.0%

0,1
Minimum,-26.086
5-th percentile,-14.882
Q1,-8.9622
Median,-4.5828
Q3,-0.0071951
95-th percentile,7.2125
Maximum,26.654
Range,52.739
Interquartile range,8.955

0,1
Standard deviation,6.7641
Coef of variation,-1.5572
Kurtosis,0.31257
Mean,-4.3437
MAD,5.3415
Skewness,0.27715
Sum,-19412
Variance,45.753
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-5.361382481239838,1,0.0%,
12.898620381160375,1,0.0%,
1.0290897156519931,1,0.0%,
-5.115572659662968,1,0.0%,
-2.859903076100317,1,0.0%,
-14.163522537358984,1,0.0%,
5.930787100752706,1,0.0%,
-4.118572334818547,1,0.0%,
-9.21656207330935,1,0.0%,
15.476748782846984,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-26.08570205915005,1,0.0%,
-25.15154655639737,1,0.0%,
-25.10058084690204,1,0.0%,
-24.699987599881982,1,0.0%,
-24.25733729412005,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
19.63978509033195,1,0.0%,
19.915090248010934,1,0.0%,
20.63176544397468,1,0.0%,
24.614029133857017,1,0.0%,
26.65377421242733,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,9.0608
Minimum,-42.621
Maximum,67.754
Zeros (%),0.0%

0,1
Minimum,-42.621
5-th percentile,-16.938
Q1,-1.9971
Median,9.5122
Q3,20.535
95-th percentile,33.46
Maximum,67.754
Range,110.37
Interquartile range,22.532

0,1
Standard deviation,15.628
Coef of variation,1.7248
Kurtosis,-0.34015
Mean,9.0608
MAD,12.695
Skewness,-0.096772
Sum,40475
Variance,244.25
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-2.665581231183205,1,0.0%,
42.2082295200669,1,0.0%,
26.514693214447853,1,0.0%,
-1.3640042266708736,1,0.0%,
-5.904409621979933,1,0.0%,
1.9770196238013555,1,0.0%,
9.23343659481974,1,0.0%,
3.997880899566248,1,0.0%,
2.880582745007556,1,0.0%,
-4.126935223032486,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-42.62053484159429,1,0.0%,
-39.11597868452432,1,0.0%,
-37.33622043629929,1,0.0%,
-36.80315571641903,1,0.0%,
-35.33142159039651,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
50.17304342163949,1,0.0%,
50.29502259448756,1,0.0%,
51.08412911629132,1,0.0%,
57.22903718768258,1,0.0%,
67.7538448730907,1,0.0%,

0,1
Distinct count,11
Unique (%),0.2%
Missing (%),0.0%
Missing (n),0

0,1
7,1278
6,1183
8,815
Other values (8),1193

Value,Count,Frequency (%),Unnamed: 3
7,1278,28.6%,
6,1183,26.5%,
8,815,18.2%,
5,595,13.3%,
9,301,6.7%,
4,179,4.0%,
10,66,1.5%,
3,37,0.8%,
11,10,0.2%,
2,4,0.1%,

0,1
Distinct count,6
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
wednesday,2796
thursday,842
tuesday,809
Other values (3),22

Value,Count,Frequency (%),Unnamed: 3
wednesday,2796,62.6%,
thursday,842,18.8%,
tuesday,809,18.1%,
friday,12,0.3%,
monday,9,0.2%,
other,1,0.0%,

0,1
Distinct count,4467
Unique (%),100.0%
Missing (%),0.1%
Missing (n),3
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.033793
Minimum,-8.9438
Maximum,9.7344
Zeros (%),0.0%

0,1
Minimum,-8.9438
5-th percentile,-4.5474
Q1,-1.8363
Median,0.090396
Q3,1.8308
95-th percentile,4.4772
Maximum,9.7344
Range,18.678
Interquartile range,3.667

0,1
Standard deviation,2.7186
Coef of variation,80.45
Kurtosis,-0.015079
Mean,0.033793
MAD,2.1691
Skewness,-0.027535
Sum,150.92
Variance,7.391
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
2.389974186709451,1,0.0%,
-3.3433344777582783,1,0.0%,
4.589858984724875,1,0.0%,
5.27534475926634,1,0.0%,
2.852101144310164,1,0.0%,
-3.2440117442469343,1,0.0%,
-3.9278751723291783,1,0.0%,
2.03191018146868,1,0.0%,
4.996565635959217,1,0.0%,
1.9191554707495468,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-8.943778709704956,1,0.0%,
-8.918580761780966,1,0.0%,
-8.432330301486001,1,0.0%,
-8.398819528540418,1,0.0%,
-8.202451228313302,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
8.617522683955343,1,0.0%,
8.729777838359169,1,0.0%,
8.791012257819009,1,0.0%,
9.548349842358984,1,0.0%,
9.73436252049338,1,0.0%,

0,1
Distinct count,10
Unique (%),0.2%
Missing (%),0.0%
Missing (n),0

0,1
0.01%,1690
0.02%,1042
0.0%,692
Other values (7),1045

Value,Count,Frequency (%),Unnamed: 3
0.01%,1690,37.8%,
0.02%,1042,23.3%,
0.0%,692,15.5%,
-0.0%,371,8.3%,
-0.01%,307,6.9%,
0.03%,279,6.2%,
-0.02%,51,1.1%,
0.04%,27,0.6%,
-0.03%,9,0.2%,
0.05%,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.003333
Minimum,-6.7425
Maximum,6.6065
Zeros (%),0.0%

0,1
Minimum,-6.7425
5-th percentile,-2.8841
Q1,-1.1729
Median,-0.021338
Q3,1.1809
95-th percentile,2.8983
Maximum,6.6065
Range,13.349
Interquartile range,2.3539

0,1
Standard deviation,1.7532
Coef of variation,526.03
Kurtosis,0.021686
Mean,0.003333
MAD,1.3951
Skewness,0.01735
Sum,14.892
Variance,3.0738
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-1.0689818573129468,1,0.0%,
1.5034624091998434,1,0.0%,
-0.9566487155103964,1,0.0%,
-1.9167049063192063,1,0.0%,
1.6018535792069346,1,0.0%,
-0.09842990152078723,1,0.0%,
0.20702295158009024,1,0.0%,
1.3675366972180525,1,0.0%,
0.35977687694074456,1,0.0%,
-0.33969387430540243,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-6.7424651987660456,1,0.0%,
-5.956002335051532,1,0.0%,
-5.864391387235727,1,0.0%,
-5.795931312071402,1,0.0%,
-5.729283875002122,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
5.465416977434945,1,0.0%,
5.682795802953549,1,0.0%,
5.718434265714837,1,0.0%,
5.975902582444276,1,0.0%,
6.606490649150745,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.17268
Minimum,-28.536
Maximum,30.613
Zeros (%),0.0%

0,1
Minimum,-28.536
5-th percentile,-13.31
Q1,-5.6405
Median,-0.054009
Q3,5.3066
95-th percentile,12.98
Maximum,30.613
Range,59.149
Interquartile range,10.947

0,1
Standard deviation,8.036
Coef of variation,-46.538
Kurtosis,-0.077069
Mean,-0.17268
MAD,6.4509
Skewness,-0.00078988
Sum,-771.35
Variance,64.576
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
12.833165970035546,1,0.0%,
7.10205180512794,1,0.0%,
6.701068837575308,1,0.0%,
-6.114635867357286,1,0.0%,
-4.863471021968555,1,0.0%,
-1.0040729735225609,1,0.0%,
-0.7371421451257926,1,0.0%,
4.422047161821549,1,0.0%,
12.246637622715616,1,0.0%,
4.5436668308875925,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-28.53638279288436,1,0.0%,
-25.8486456057414,1,0.0%,
-25.737552481276254,1,0.0%,
-25.40749055172579,1,0.0%,
-25.175560990264373,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
25.02272148100296,1,0.0%,
27.681776187880413,1,0.0%,
28.639045012590103,1,0.0%,
29.312437278552505,1,0.0%,
30.613049318037717,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.0081244
Minimum,-8.168
Maximum,9.5047
Zeros (%),0.0%

0,1
Minimum,-8.168
5-th percentile,-3.8248
Q1,-1.5672
Median,-0.028405
Q3,1.5707
95-th percentile,3.9462
Maximum,9.5047
Range,17.673
Interquartile range,3.138

0,1
Standard deviation,2.3767
Coef of variation,292.53
Kurtosis,-0.019258
Mean,0.0081244
MAD,1.9023
Skewness,0.067751
Sum,36.3
Variance,5.6486
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0.3823305755864836,1,0.0%,
-1.3166087130708228,1,0.0%,
0.15431536693338505,1,0.0%,
0.0560231284253466,1,0.0%,
-1.4312421795047658,1,0.0%,
0.5262910080298643,1,0.0%,
1.7602742313756323,1,0.0%,
-2.7097911356145654,1,0.0%,
-1.9652287377267768,1,0.0%,
1.3985977551612223,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-8.168005825204558,1,0.0%,
-7.879610011364503,1,0.0%,
-7.095469509219196,1,0.0%,
-6.936538158886386,1,0.0%,
-6.895017806470501,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
7.859942379755318,1,0.0%,
8.025652654471081,1,0.0%,
8.259333121143937,1,0.0%,
8.402731762629509,1,0.0%,
9.504671764186217,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.023929
Minimum,-5.871
Maximum,5.3723
Zeros (%),0.0%

0,1
Minimum,-5.871
5-th percentile,-2.7021
Q1,-1.1481
Median,-0.019758
Q3,1.0957
95-th percentile,2.6329
Maximum,5.3723
Range,11.243
Interquartile range,2.2438

0,1
Standard deviation,1.6327
Coef of variation,-68.229
Kurtosis,-0.086295
Mean,-0.023929
MAD,1.3038
Skewness,-0.012958
Sum,-106.89
Variance,2.6656
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
1.4916608969194889,1,0.0%,
-2.5743027841711097,1,0.0%,
1.1532448184463795,1,0.0%,
-0.06517992072976674,1,0.0%,
0.4998284560800683,1,0.0%,
-0.789833253102006,1,0.0%,
0.2587271797675965,1,0.0%,
0.6893036500405944,1,0.0%,
-0.6878113978356027,1,0.0%,
0.1817623502306864,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-5.870982539130687,1,0.0%,
-5.048254217129432,1,0.0%,
-5.0340895318405865,1,0.0%,
-4.997440990834222,1,0.0%,
-4.943731177936291,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
5.014088575937366,1,0.0%,
5.040209289356882,1,0.0%,
5.052309122120793,1,0.0%,
5.23310940010934,1,0.0%,
5.372341333560851,1,0.0%,

0,1
Distinct count,4426
Unique (%),99.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-829.72
Minimum,-4021.5
Maximum,1968.6
Zeros (%),0.0%

0,1
Minimum,-4021.5
5-th percentile,-2273.8
Q1,-1384.9
Median,-817.76
Q3,-253.73
95-th percentile,568.71
Maximum,1968.6
Range,5990.1
Interquartile range,1131.2

0,1
Standard deviation,854.41
Coef of variation,-1.0298
Kurtosis,0.016129
Mean,-829.72
MAD,678.22
Skewness,-0.10567
Sum,-3706400
Variance,730010
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-649.5,2,0.0%,
-528.88,2,0.0%,
-1184.38,2,0.0%,
-172.88,2,0.0%,
-1094.32,2,0.0%,
-804.6,2,0.0%,
119.46,2,0.0%,
-923.03,2,0.0%,
-728.27,2,0.0%,
161.52,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-4021.48,1,0.0%,
-3870.06,1,0.0%,
-3716.51,1,0.0%,
-3697.32,1,0.0%,
-3582.66,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1546.15,1,0.0%,
1582.61,1,0.0%,
1602.7,1,0.0%,
1729.99,1,0.0%,
1968.64,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,4.5034
Minimum,-49.79
Maximum,68.563
Zeros (%),0.0%

0,1
Minimum,-49.79
5-th percentile,-21.791
Q1,-6.2784
Median,4.174
Q3,15.165
95-th percentile,30.879
Maximum,68.563
Range,118.35
Interquartile range,21.444

0,1
Standard deviation,16.162
Coef of variation,3.5888
Kurtosis,0.26111
Mean,4.5034
MAD,12.755
Skewness,0.066027
Sum,20121
Variance,261.2
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-7.241649474287502,1,0.0%,
-15.124249576491582,1,0.0%,
-5.3907015292700144,1,0.0%,
-9.83662156064589,1,0.0%,
9.041522771112813,1,0.0%,
8.81642159328643,1,0.0%,
8.007430693612758,1,0.0%,
-10.808016347304337,1,0.0%,
11.629597888526225,1,0.0%,
8.426191564866643,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-49.789893921182326,1,0.0%,
-48.32061540412582,1,0.0%,
-47.51081315746918,1,0.0%,
-45.94776268153176,1,0.0%,
-45.89686855301883,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
58.09914661331234,1,0.0%,
60.282500231057774,1,0.0%,
63.89084144507764,1,0.0%,
64.09815979636967,1,0.0%,
68.56284679555779,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.12452
Minimum,-18.495
Maximum,19.013
Zeros (%),0.0%

0,1
Minimum,-18.495
5-th percentile,-8.3037
Q1,-3.2165
Median,0.10406
Q3,3.41
95-th percentile,8.5317
Maximum,19.013
Range,37.508
Interquartile range,6.6265

0,1
Standard deviation,5.0256
Coef of variation,40.359
Kurtosis,0.051527
Mean,0.12452
MAD,3.9843
Skewness,0.0034503
Sum,556.5
Variance,25.257
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-8.45232113960274,1,0.0%,
7.4663026088419775,1,0.0%,
-1.7404685027694724,1,0.0%,
2.1356378150598565,1,0.0%,
9.76496707160964,1,0.0%,
-1.801786286050672,1,0.0%,
2.1759490258320797,1,0.0%,
-1.7407753099982886,1,0.0%,
6.708172162178079,1,0.0%,
-0.25835941777772503,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-18.49500824001597,1,0.0%,
-16.87617789074068,1,0.0%,
-16.02558762846165,1,0.0%,
-15.975487064779244,1,0.0%,
-15.30031811559929,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
16.614442545583106,1,0.0%,
16.66732680864385,1,0.0%,
16.917809658931297,1,0.0%,
17.876123082083193,1,0.0%,
19.01258002571323,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,9.7426
Minimum,-41.376
Maximum,88.824
Zeros (%),0.0%

0,1
Minimum,-41.376
5-th percentile,-17.272
Q1,-2.6168
Median,8.0902
Q3,20.784
95-th percentile,41.293
Maximum,88.824
Range,130.2
Interquartile range,23.401

0,1
Standard deviation,17.975
Coef of variation,1.845
Kurtosis,0.34675
Mean,9.7426
MAD,14.207
Skewness,0.41864
Sum,43540
Variance,323.12
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
11.791643665890724,1,0.0%,
-6.772765502646752,1,0.0%,
-19.471920207054428,1,0.0%,
-8.672051201155512,1,0.0%,
-15.080539898400364,1,0.0%,
12.830074184668238,1,0.0%,
15.627638088652933,1,0.0%,
-9.405697436453334,1,0.0%,
2.910434352297555,1,0.0%,
-0.08648543155412086,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-41.37566097321753,1,0.0%,
-41.14049401467911,1,0.0%,
-40.892167471486346,1,0.0%,
-40.09452646983628,1,0.0%,
-39.145043453005314,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
73.16800394151879,1,0.0%,
81.09885384081745,1,0.0%,
85.46955861215541,1,0.0%,
88.52572426438734,1,0.0%,
88.82447749562868,1,0.0%,

0,1
Correlation,1

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-6.9011
Minimum,-27.757
Maximum,11.05
Zeros (%),0.0%

0,1
Minimum,-27.757
5-th percentile,-14.305
Q1,-9.4876
Median,-6.8062
Q3,-4.0963
95-th percentile,0.085666
Maximum,11.05
Range,38.807
Interquartile range,5.3913

0,1
Standard deviation,4.3988
Coef of variation,-0.63741
Kurtosis,0.89488
Mean,-6.9011
MAD,3.3905
Skewness,-0.25636
Sum,-30841
Variance,19.35
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-2.2347317127314037,1,0.0%,
-14.657343896823495,1,0.0%,
-8.088648801792287,1,0.0%,
-9.17998457198361,1,0.0%,
-2.982828407014125,1,0.0%,
-5.0796556896899325,1,0.0%,
-4.8585478013164405,1,0.0%,
-4.492583630240599,1,0.0%,
-8.521018702984213,1,0.0%,
-10.871127171349976,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-27.756707924111662,1,0.0%,
-24.728843788536945,1,0.0%,
-24.581756361737074,1,0.0%,
-24.14484435817068,1,0.0%,
-24.021883305408597,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
6.701164797590043,1,0.0%,
9.289970680397866,1,0.0%,
10.670290793079367,1,0.0%,
11.007110819537829,1,0.0%,
11.04991368807,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.027722
Minimum,-5.2166
Maximum,6.0611
Zeros (%),0.0%

0,1
Minimum,-5.2166
5-th percentile,-2.5263
Q1,-1.0233
Median,0.029777
Q3,1.0482
95-th percentile,2.5374
Maximum,6.0611
Range,11.278
Interquartile range,2.0715

0,1
Standard deviation,1.5468
Coef of variation,55.799
Kurtosis,0.076415
Mean,0.027722
MAD,1.2288
Skewness,-0.0030102
Sum,123.86
Variance,2.3927
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-1.0871665766042882,1,0.0%,
-0.6927096363062802,1,0.0%,
-2.7222091179293564,1,0.0%,
1.7133822515256665,1,0.0%,
-2.0695356112195538,1,0.0%,
2.7270447809854192,1,0.0%,
-0.11797703307838675,1,0.0%,
1.6015133749876578,1,0.0%,
1.2916025470308201,1,0.0%,
0.2168309712721626,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-5.216645338021992,1,0.0%,
-5.215317984342361,1,0.0%,
-4.924495168697308,1,0.0%,
-4.919998384525472,1,0.0%,
-4.868176249829278,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
5.175180452834123,1,0.0%,
5.323667236661828,1,0.0%,
5.371634742792066,1,0.0%,
5.970818044219025,1,0.0%,
6.061109558661992,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-0.1761
Minimum,-15.804
Maximum,14.905
Zeros (%),0.0%

0,1
Minimum,-15.804
5-th percentile,-7.299
Q1,-2.9147
Median,-0.084612
Q3,2.6125
95-th percentile,6.5833
Maximum,14.905
Range,30.71
Interquartile range,5.5272

0,1
Standard deviation,4.1672
Coef of variation,-23.664
Kurtosis,0.053969
Mean,-0.1761
MAD,3.3119
Skewness,-0.070235
Sum,-786.83
Variance,17.366
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-2.1011343263253868,1,0.0%,
3.471733719538737,1,0.0%,
2.6281062989140733,1,0.0%,
-0.14640795512161556,1,0.0%,
-4.001867806959732,1,0.0%,
-6.950348667164909,1,0.0%,
-3.66282360339242,1,0.0%,
-4.623907423772174,1,0.0%,
0.22247345139038133,1,0.0%,
-10.471797370116628,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-15.804084627016543,1,0.0%,
-14.735720379566128,1,0.0%,
-13.725624582949868,1,0.0%,
-13.571963158920653,1,0.0%,
-12.9458536614378,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
13.315689811299698,1,0.0%,
13.426704150338724,1,0.0%,
13.952469686371623,1,0.0%,
14.351013655633135,1,0.0%,
14.905449368028435,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.0022922
Minimum,-1.4545
Maximum,1.4616
Zeros (%),0.0%

0,1
Minimum,-1.4545
5-th percentile,-0.65574
Q1,-0.2681
Median,0.0055393
Q3,0.2639
95-th percentile,0.66976
Maximum,1.4616
Range,2.916
Interquartile range,0.53201

0,1
Standard deviation,0.39795
Coef of variation,173.61
Kurtosis,0.019339
Mean,0.0022922
MAD,0.3162
Skewness,0.017906
Sum,10.242
Variance,0.15836
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-0.08502265758092868,1,0.0%,
0.31063728979534105,1,0.0%,
0.017499674277586585,1,0.0%,
-0.09604751016264124,1,0.0%,
-0.01980604818342501,1,0.0%,
0.16893615777403706,1,0.0%,
-0.23740769454685914,1,0.0%,
-0.12821995342072512,1,0.0%,
0.38470326648583897,1,0.0%,
0.9874233620075382,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-1.45446911764342,1,0.0%,
-1.305060739145469,1,0.0%,
-1.2775888921172094,1,0.0%,
-1.1919779745045465,1,0.0%,
-1.1895614046950314,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1.30205258145964,1,0.0%,
1.354659595838917,1,0.0%,
1.3755104069517814,1,0.0%,
1.422002568568573,1,0.0%,
1.461566542911512,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-101.8
Minimum,-201.83
Maximum,-84.269
Zeros (%),0.0%

0,1
Minimum,-201.83
5-th percentile,-133.3
Q1,-109.36
Median,-97.3
Q3,-89.921
95-th percentile,-85.373
Maximum,-84.269
Range,117.56
Interquartile range,19.437

0,1
Standard deviation,15.756
Coef of variation,-0.15478
Kurtosis,2.9222
Mean,-101.8
MAD,12.109
Skewness,-1.5137
Sum,-454830
Variance,248.24
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-107.81978837941195,1,0.0%,
-100.95100081899992,1,0.0%,
-109.07889242898196,1,0.0%,
-88.53267147407959,1,0.0%,
-88.48323592440909,1,0.0%,
-86.14136420285753,1,0.0%,
-116.49590244620434,1,0.0%,
-90.47343048088644,1,0.0%,
-90.59726768237955,1,0.0%,
-85.31958659304966,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-201.82682809490376,1,0.0%,
-199.3961182552316,1,0.0%,
-187.0522975495696,1,0.0%,
-182.9659182628193,1,0.0%,
-182.73896954213464,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-84.28409733050118,1,0.0%,
-84.27333679149291,1,0.0%,
-84.27327578196099,1,0.0%,
-84.27293477044898,1,0.0%,
-84.26852292776499,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),1
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.06118
Minimum,-17.22
Maximum,16.592
Zeros (%),0.0%

0,1
Minimum,-17.22
5-th percentile,-7.8309
Q1,-3.3013
Median,0.064113
Q3,3.4455
95-th percentile,8.1273
Maximum,16.592
Range,33.812
Interquartile range,6.7468

0,1
Standard deviation,4.8759
Coef of variation,79.697
Kurtosis,-0.066647
Mean,0.06118
MAD,3.9209
Skewness,0.0078969
Sum,273.35
Variance,23.774
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-3.2818786533977296,1,0.0%,
8.591887505858315,1,0.0%,
-0.8723009338982131,1,0.0%,
-8.356096341340951,1,0.0%,
1.4034508149261098,1,0.0%,
7.0291903545959045,1,0.0%,
-2.6020095039230275,1,0.0%,
-6.489363661272378,1,0.0%,
3.4296049169776617,1,0.0%,
4.678952677554078,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-17.220374020551734,1,0.0%,
-16.636411526109818,1,0.0%,
-16.45811628199183,1,0.0%,
-15.037564412579506,1,0.0%,
-14.90515367102355,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
15.074761673264982,1,0.0%,
15.091793057123024,1,0.0%,
15.806179391253846,1,0.0%,
15.907582718011849,1,0.0%,
16.591767842503813,1,0.0%,

0,1
Distinct count,4469
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1.1324
Minimum,-6.2779
Maximum,7.3455
Zeros (%),0.0%

0,1
Minimum,-6.2779
5-th percentile,-1.9266
Q1,-0.12687
Median,1.1461
Q3,2.3888
95-th percentile,4.1437
Maximum,7.3455
Range,13.623
Interquartile range,2.5157

0,1
Standard deviation,1.8539
Coef of variation,1.6371
Kurtosis,-0.022824
Mean,1.1324
MAD,1.4873
Skewness,-0.022955
Sum,5060.8
Variance,3.437
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
1.6314248432460563,1,0.0%,
2.8017241892725373,1,0.0%,
1.8090743976105423,1,0.0%,
4.317311489342487,1,0.0%,
-1.0416034610379783,1,0.0%,
1.182221723448926,1,0.0%,
2.3336820978273787,1,0.0%,
-0.02625870148383609,1,0.0%,
-0.06422449462187903,1,0.0%,
3.9036072498981915,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-6.2779248408212345,1,0.0%,
-5.736081376325678,1,0.0%,
-5.256345434990169,1,0.0%,
-4.939138365689004,1,0.0%,
-4.889595751097547,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
6.809471602126488,1,0.0%,
6.887992276147951,1,0.0%,
6.960823145016482,1,0.0%,
7.2591398008988515,1,0.0%,
7.345547248719312,1,0.0%,

0,1
Distinct count,4468
Unique (%),100.0%
Missing (%),0.0%
Missing (n),2
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,-23.434
Minimum,-65.791
Maximum,35.534
Zeros (%),0.0%

0,1
Minimum,-65.791
5-th percentile,-40.14
Q1,-29.786
Median,-23.366
Q3,-16.826
95-th percentile,-7.8741
Maximum,35.534
Range,101.32
Interquartile range,12.96

0,1
Standard deviation,10.286
Coef of variation,-0.43895
Kurtosis,1.5932
Mean,-23.434
MAD,7.9016
Skewness,0.084426
Sum,-104680
Variance,105.81
Memory size,35.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-38.10069343626941,1,0.0%,
-40.16855482946342,1,0.0%,
-23.813327491421127,1,0.0%,
-33.78950592797121,1,0.0%,
-34.866243925835576,1,0.0%,
-16.823591841622413,1,0.0%,
-7.681789666832546,1,0.0%,
-30.34003435896119,1,0.0%,
-17.917553374284395,1,0.0%,
-17.757011264149988,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
-65.79119056246057,1,0.0%,
-63.95624967212742,1,0.0%,
-63.19851993387126,1,0.0%,
-61.66015273339567,1,0.0%,
-61.38929698388314,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
25.23503862698425,1,0.0%,
25.59828562797256,1,0.0%,
28.662607053526425,1,0.0%,
30.06745751562435,1,0.0%,
35.533602178425824,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Mean,0.25531

0,1
0,3328
1,1141

Value,Count,Frequency (%),Unnamed: 3
0,3328,74.5%,
1,1141,25.5%,

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20,x21,x22,x23,continent,x25,x26,x27,x28,month,day,x31,x32,x33,x34,x35,x36,x37,x38,x39,x40,x41,x42,x43,x44,x45,x46,x47,x48,x49,y
11,0.336104,3.737029,-6.41919,2.604201,-0.996173,2.329663,-9.33302,36.860887,4.199637,-0.595693,-10.292283,-14.708362,-21.687992,-0.773089,-9.884795,6.36346,0.767329,-1.28781,-9.625197,-4.252485,4.716943,15.393895,3.682097,-13.154223,america,1.674992,-0.304765,-13.541087,-10.86608,7,tuesday,-0.002595,0.02%,2.213868,1.571707,1.239489,0.534812,-622.76,-12.063685,-2.632316,7.565319,-13.341481,-8.749303,-2.927836,-4.118254,0.79009,-95.290753,-2.076083,0.841078,-16.644318,0
30,0.473574,-0.975466,-16.710254,12.998583,-12.397672,2.746008,-24.295453,38.873144,9.980948,2.700147,-3.328322,-0.236595,-8.995129,-1.794452,2.749708,-6.311608,0.537489,-3.878115,-4.755301,1.085369,8.261999,-14.490711,-0.438845,-34.641028,america,2.050443,-0.835634,-20.801691,-32.64618,6,wednesday,-0.811012,0.02%,-0.194338,-4.23567,0.457433,-0.145595,-389.97,-19.801181,5.623844,11.974305,-21.89854,-13.504938,2.929377,4.405485,-0.534592,-108.273102,10.691333,3.082169,-29.013417,0
39,-0.598895,5.640068,1.761615,-5.129643,-3.988226,-1.652207,2.561256,32.444658,19.414154,10.32991,-4.586905,-3.85799,-29.303147,3.900026,-4.63034,-4.514393,-1.009291,-6.540557,1.144232,3.909843,7.113585,-0.58222,-1.06466,-11.300296,america,0.11725,-0.846229,13.691848,5.194113,9,tuesday,-2.263816,0.01%,-1.648624,-7.309995,-3.655752,-0.837797,-1860.44,-21.656151,3.046333,39.760224,-23.94999,0.748552,1.972688,2.333861,-0.290113,-95.501526,1.594271,0.132676,-45.877717,0
69,-0.793892,6.243681,29.384152,2.396063,-8.471854,0.058261,42.722349,56.078358,2.896866,8.990246,-3.10993,-9.275058,-31.46665,6.26259,-6.381541,2.258777,-1.867659,4.448603,-0.943086,-4.997349,-6.415845,-7.557339,-12.348108,-14.541555,america,-2.187625,-0.363272,-9.463591,-0.661231,10,wednesday,-2.278022,0.0%,3.06026,2.652959,-8.168006,-1.153144,1204.95,-1.407457,-2.046671,37.21033,-1.556537,-5.261604,4.071196,6.86736,-0.539489,-90.218617,3.177392,-0.26672,-37.970461,0
93,0.179677,2.645806,4.897157,7.39488,7.565403,-12.684415,7.120099,44.51511,-9.467328,2.017689,7.096032,-6.236529,-16.993702,-6.996975,2.226934,-0.123794,-1.749136,9.382814,-8.056079,-8.162687,-0.606486,-4.87373,-6.370596,-10.662164,america,1.114197,1.024113,-8.185873,23.134029,6,wednesday,4.269687,0.01%,-1.136625,-8.473726,0.044988,-0.632353,-713.77,0.074169,7.24707,14.013368,0.082025,-4.763575,-1.698867,-0.603731,-0.051201,-87.341519,0.587371,-0.925884,-12.895,1


## Logistic Regression

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

In [11]:
# Testing with complete data set - Drop continent, x41, x6 per EDA suggestion.
# Need to drop day, month, x32 since it's categorical
lr_df = df.drop(['x41', 'x6', 'continent', 'day','x32'], axis=1)

# Fill in NA with mean - LR needs values in each cell 
lr_df = lr_df.fillna(lr_df.mean())

# Alternative - Drop all rows with NA
lr_df_no = lr_df.dropna()

In [12]:
len(lr_df)

160000

In [13]:
y = lr_df['y']
X = lr_df.drop('y', axis = 1)

# Model Fitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [14]:
y_pred = logreg.predict(X_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test)*100),'%')

Accuracy of logistic regression classifier on test set: 70.27 %


In [15]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[23697  4924]
 [ 9345 10034]]


In [16]:
# Compute Precision, recall, F-Measure and Support
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.72      0.83      0.77     28621
           1       0.67      0.52      0.58     19379

   micro avg       0.70      0.70      0.70     48000
   macro avg       0.69      0.67      0.68     48000
weighted avg       0.70      0.70      0.69     48000



In [17]:
# ROC Curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

logit_roc_auc = roc_auc_score(y_test, logreg.predict(X_test))
fpr, tpr, thresholds = roc_curve(y_test, logreg.predict_proba(X_test)[:,1])
plt.figure()
plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig('Log_ROC')
plt.show()



## Random Forest

In [18]:
# Temp Dataset for Random Forest.  Dropped the highly correlated features.  Replace NaN with mean of column
#rf_df = df.drop(['x41', 'x6'], axis=1)
#rf_df = rf_df.fillna(lr_df.mean())

rf_df = lr_df

In [19]:
# One-hot encode the data using pandas get_dummies
#features = pd.get_dummies(rf_df)

# Display the first 5 rows of the last 12 columns
#features.iloc[:,5:].head(5)

In [20]:
# Feature of Importance
# ref: https://towardsdatascience.com/running-random-forests-inspect-the-feature-importances-with-this-code-2b00dd72b92e

y = lr_df['y']
X = lr_df.drop('y', axis = 1)

# Splitting data set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

## Import the random forest model.
from sklearn.ensemble import RandomForestClassifier 

## Initiating Random Forest Classifier. 
rf = RandomForestClassifier() 

## Fitting model on training data.
rf.fit(X_train, y_train) 

## Accuracy Score
rf.score(X_test, y_test)




0.8876875

In [21]:
feature_importances = pd.DataFrame(rf.feature_importances_,
                                   index = X_train.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances)

       importance
x23      0.074794
x48      0.063124
x49      0.062318
x42      0.059590
x20      0.058637
x38      0.054904
x12      0.054216
x37      0.049445
x28      0.046820
x40      0.046618
x27      0.045875
x46      0.043179
x7       0.042465
x2       0.040946
x43      0.008945
x26      0.008903
x35      0.008830
x13      0.008661
x8       0.008648
x3       0.008629
x39      0.008572
x1       0.008569
x25      0.008553
x31      0.008520
x11      0.008515
x15      0.008509
x44      0.008504
x16      0.008448
x5       0.008432
x34      0.008406
x17      0.008404
x33      0.008402
x9       0.008376
x21      0.008342
x14      0.008311
x19      0.008295
x18      0.008286
x10      0.008214
x36      0.008208
x47      0.008114
x4       0.008113
x45      0.008080
x22      0.008034
x0       0.008014
month    0.004233


## Decision Tree

In [22]:
#ref: https://towardsdatascience.com/decision-tree-in-python-b433ae57fb93
#https://dataaspirant.com/2017/02/01/decision-tree-algorithm-python-with-scikit-learn/
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.tree import export_graphviz
from sklearn.externals.six import StringIO 
from IPython.display import Image 
from pydot import graph_from_dot_data
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

In [23]:
dt = DecisionTreeClassifier(random_state=123)
dt.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=123,
            splitter='best')

In [24]:
y_pred = dt.predict(X_test)
print ("Accuracy is ", accuracy_score(y_test,y_pred)*100, 'with Gini Index.')

Accuracy is  83.75416666666666 with Gini Index.


In [25]:
dt_ent = DecisionTreeClassifier(criterion='entropy',random_state=123)
dt_ent.fit(X_train, y_train)


DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=123,
            splitter='best')

In [26]:
y_pred = dt_ent.predict(X_test)
print ("Accuracy is ", accuracy_score(y_test,y_pred)*100, 'with Information Gain.')

Accuracy is  84.09791666666668 with Information Gain.


## Decision Tree and Logistic Regressions using Features of Importance
Summary: Decision Tree yields the best accuracy at 86.85% using max_depth of 15 and entropy.  Logistic Regression showed tiny improvement from 70.27% to 70.33%.  Recommendation is to go with Decision Tree.

In [27]:
# Subsetting by feature of importance from RF
#rf_df_1 = lr_df [['x23', 'x12', 'x20', 'x48', 'x49', 'x27', 'x28','x37', 'x38', 'x42', 'x2', 'x7' ,'x46', 'x40', 'y']]

# importance > .04
rf_df_1 = lr_df [['x23', 'x20', 'x48', 'x49', 'x38', 'x12', 'x42', 'x27','x40', 'x37','x28','x7','x2', 'x46', 'y']]

#rf_df_1 = lr_df [['x23', 'x20', 'x48', 'x49', 'x38', 'y']] - this set yielded a worse accuracy. importance > 0.05
print(rf_df_1.head())
print(rf_df_1.shape)

         x23       x20       x48        x49        x38        x12       x42  \
0   3.553013 -1.909114  0.151589  -8.040166  -1.353729  25.665413  5.414063   
1  10.590601 -5.809984 -0.320283  16.719974  32.816804 -25.014934  4.490915   
2  -5.270615  1.700321 -2.090804  -7.869421  -0.333199  12.078602  9.088864   
3 -11.484431  1.923670  1.806070  -7.670847  14.188669  10.995330 -7.467775   
4 -15.998166 -9.026317 -0.894942  15.724742 -12.578926 -28.106348 -5.229937   

        x27        x40      x37        x28         x7         x2        x46  y  
0  1.005131 -10.612200  1313.96 -18.473784 -14.789997   4.621113  60.781427  0  
1  0.751086   2.147427  1962.78   3.749377  -6.725709  27.839856  15.805696  0  
2  4.171088  -0.863137   430.47  11.522448  11.060572  12.251561  30.856417  0  
3  9.215569  12.084421 -2366.29  30.595226 -18.913592 -24.149632 -72.424569  0  
4  1.811182  30.004727  -620.66  -4.094084  27.532281 -11.352593 -14.085435  1  
(160000, 15)


### Decision Tree w/ Feature of Importance

In [28]:
#y_1 = rf_df_1['y']
#X_1 = rf_df_1.drop('y', axis = 1)

#rf_df_sample = rf_df.sample(frac=.95)

y_1 = rf_df_1['y']
X_1 = rf_df_1.drop('y', axis = 1)

# Model Fitting
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_1, y_1, test_size=0.3, random_state=123)

In [29]:
dt = DecisionTreeClassifier(random_state=123)
dt.fit(X_train1, y_train1)
y_pred1 = dt.predict(X_test1)
print ("Accuracy is ", accuracy_score(y_test1,y_pred1)*100, 'with Gini Index.')

Accuracy is  85.87708333333333 with Gini Index.


In [30]:
dt_ent = DecisionTreeClassifier(criterion='entropy',random_state=123)
dt_ent.fit(X_train1, y_train1)
y_pred1 = dt_ent.predict(X_test1)
print ("Accuracy is ", accuracy_score(y_test1,y_pred1)*100, 'with Information Gain.')

Accuracy is  86.03541666666666 with Information Gain.


In [31]:
n_est = [5, 10, 11,12,13,14,15, 16, 17,18,19, 20, 22, 25, 50, 100]
for n in n_est :
    dt = DecisionTreeClassifier(random_state=123, max_depth = n)
    dt.fit(X_train1, y_train1)
    y_pred1 = dt.predict(X_test1)
    print ("Accuracy is ", accuracy_score(y_test1,y_pred1)*100, 'with Gini Index at', n, 'depth')

Accuracy is  76.93541666666667 with Gini Index at 5 depth
Accuracy is  84.64791666666667 with Gini Index at 10 depth
Accuracy is  85.25416666666666 with Gini Index at 11 depth
Accuracy is  86.15416666666667 with Gini Index at 12 depth
Accuracy is  86.34791666666666 with Gini Index at 13 depth
Accuracy is  86.65625 with Gini Index at 14 depth
Accuracy is  86.85208333333333 with Gini Index at 15 depth
Accuracy is  86.55000000000001 with Gini Index at 16 depth
Accuracy is  86.69583333333334 with Gini Index at 17 depth
Accuracy is  86.63125000000001 with Gini Index at 18 depth
Accuracy is  86.47083333333333 with Gini Index at 19 depth
Accuracy is  86.41041666666666 with Gini Index at 20 depth
Accuracy is  86.21041666666667 with Gini Index at 22 depth
Accuracy is  85.95208333333333 with Gini Index at 25 depth
Accuracy is  85.87708333333333 with Gini Index at 50 depth
Accuracy is  85.87708333333333 with Gini Index at 100 depth


In [32]:
n_est = [5, 10, 11,12,13,14,15, 16, 17,18,19, 20, 22, 25, 50, 100]
for n in n_est :
    dt = DecisionTreeClassifier(criterion = 'entropy',random_state=123, max_depth = n)
    dt.fit(X_train1, y_train1)
    y_pred1 = dt.predict(X_test1)
    print ("Accuracy is ", accuracy_score(y_test1,y_pred1)*100, 'with Information Gain at', n, 'depth')

Accuracy is  76.02499999999999 with Information Gain at 5 depth
Accuracy is  83.61041666666667 with Information Gain at 10 depth
Accuracy is  84.32916666666667 with Information Gain at 11 depth
Accuracy is  85.00208333333333 with Information Gain at 12 depth
Accuracy is  85.55416666666666 with Information Gain at 13 depth
Accuracy is  85.77291666666666 with Information Gain at 14 depth
Accuracy is  86.38958333333333 with Information Gain at 15 depth
Accuracy is  86.33541666666666 with Information Gain at 16 depth
Accuracy is  86.3 with Information Gain at 17 depth
Accuracy is  86.22916666666667 with Information Gain at 18 depth
Accuracy is  86.25625 with Information Gain at 19 depth
Accuracy is  86.12291666666667 with Information Gain at 20 depth
Accuracy is  86.17291666666667 with Information Gain at 22 depth
Accuracy is  85.99166666666666 with Information Gain at 25 depth
Accuracy is  86.03541666666666 with Information Gain at 50 depth
Accuracy is  86.03541666666666 with Information 

### Logistic Regression w/ Feature of Importance

In [33]:
logreg = LogisticRegression(random_state=123)
logreg.fit(X_train1, y_train1)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=123, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [34]:
y_pred1 = logreg.predict(X_test1)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test1, y_test1)*100), '%')

Accuracy of logistic regression classifier on test set: 70.33 %


In [35]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test1, y_pred1)
print(confusion_matrix)

[[23838  4998]
 [ 9243  9921]]


In [36]:
# Compute Precision, recall, F-Measure and Support
from sklearn.metrics import classification_report
print(classification_report(y_test1, y_pred1))

              precision    recall  f1-score   support

           0       0.72      0.83      0.77     28836
           1       0.66      0.52      0.58     19164

   micro avg       0.70      0.70      0.70     48000
   macro avg       0.69      0.67      0.68     48000
weighted avg       0.70      0.70      0.70     48000

