Analysis of the attack models data in order to see why they are majority classifiers

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from pathlib import Path
import os
import sys
import bz2
import pickle
np.random.seed(10)
# adds the visibility of the mlem module, needed to load the attack models
sys.path.append("../../../../..") 
import mlem
from mlem.ensemble import HardVotingClassifier
from mlem.utilities import report_and_confusion, load_pickle_bz2

In [2]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay

In [3]:
BLACK_BOX_PATH = '../BB_DATA/diva_rf_noclustering.bz2'
black_box = load_pickle_bz2(BLACK_BOX_PATH)

In [4]:
BASE_BB_DATA_PATH = "../BB_DATA/diva_rf_noclustering_data_nsamples_{n_samples}.npz"
loaded = np.load(BASE_BB_DATA_PATH.format(n_samples=5), allow_pickle=True)
for k in loaded.keys():
    print(k, end=" ")
x_train = loaded["x_train"]
x_test = loaded["x_test"]
y_train = loaded["y_train"]
y_test = loaded["y_test"]
x_test_clustered = loaded["x_test_clustered"]
y_test_clustered = loaded["y_test_clustered"]

x_train x_test y_train y_test x_test_clustered y_test_clustered 

# Load Attack Models and data

In [5]:
def load_atk_and_data(idx, base_path="/home/gerardozinno/Desktop/ExperimentsResult/ADA/DIVA/diva_results_noclust_5/gaussian/same/"):
    index_template = Path(base_path) / f"{idx}" / "attack"
    atk_0 = load_pickle_bz2(index_template / "0" / "model.pkl.bz2")
    atk_1 = load_pickle_bz2(index_template / "1" / "model.pkl.bz2")
    data_0 = np.load(index_template / "0" / "data.npz" ,allow_pickle=True)
    data_1 = np.load(index_template / "1" / "data.npz" ,allow_pickle=True)
    return (atk_0, data_0), (atk_1, data_1)

## idx 0

In [6]:
(atk_0, data_0), (atk_1, data_1) = load_atk_and_data(0)
for k in data_0.keys():
    print(k, end=" ")

x_train x_test y_train y_test 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


#### train

Trainset for the attack model trained to recognise the label 0

In [7]:
d0_train = pd.DataFrame(data_0['x_train'])
d0_train['Target'] = data_0['y_train']

d0_train.value_counts()

0         1         Target
0.603055  0.396945  in        4
0.607988  0.392012  in        3
0.589812  0.410188  in        2
0.565087  0.434913  in        2
0.586717  0.413283  in        2
                             ..
0.577820  0.422180  in        1
0.577818  0.422182  in        1
0.577816  0.422184  in        1
0.577815  0.422185  in        1
0.774163  0.225837  out       1
Length: 6971, dtype: int64

In [8]:
d0_train.Target.value_counts(normalize=True)

in     0.797772
out    0.202228
Name: Target, dtype: float64

In [10]:
d0_train.describe()

Unnamed: 0,0,1
count,7002.0,7002.0
mean,0.595589,0.404411
std,0.039238,0.039238
min,0.480527,0.225837
25%,0.569417,0.379078
50%,0.592677,0.407323
75%,0.620922,0.430583
max,0.774163,0.519473


Trainset for the attack model trained to recognise the label 1

In [11]:
d1_train = pd.DataFrame(data_1['x_train'])
d1_train['Target'] = data_1['y_train']
d1_train.value_counts()

0         1         Target
0.453318  0.546682  in        15
0.467609  0.532391  in        14
0.460569  0.539431  in        10
0.469808  0.530192  in        10
0.463605  0.536395  in         9
                              ..
0.444748  0.555252  in         1
0.457845  0.542155  in         1
0.444764  0.555236  in         1
0.457835  0.542165  in         1
0.554687  0.445313  out        1
Length: 3333, dtype: int64

The out class is not represented enough

In [12]:
d1_train.Target.value_counts(normalize=True)

in     0.996975
out    0.003025
Name: Target, dtype: float64

In [13]:
d1_train.describe()

Unnamed: 0,0,1
count,5620.0,5620.0
mean,0.454867,0.545133
std,0.013849,0.013849
min,0.381422,0.445313
25%,0.448948,0.535785
50%,0.457241,0.542759
75%,0.464215,0.551052
max,0.554687,0.618578


#### test

Testset for the attack model trained to recognise the label 0

In [14]:
d0_test = pd.DataFrame(data_0['x_test'])
d0_test['Target'] = data_0['y_test']
d0_test.value_counts()

0         1         Target
0.568251  0.431749  in        2
0.603055  0.396945  in        2
0.610694  0.389306  in        1
0.610284  0.389716  in        1
0.609980  0.390020  out       1
                             ..
0.578429  0.421571  in        1
0.578476  0.421524  in        1
0.578505  0.421495  in        1
0.578592  0.421408  in        1
0.743718  0.256282  in        1
Length: 2332, dtype: int64

In [15]:
d0_test.Target.value_counts(normalize=True)

in     0.805913
out    0.194087
Name: Target, dtype: float64

In [16]:
d0_test.describe()

Unnamed: 0,0,1
count,2334.0,2334.0
mean,0.596267,0.403733
std,0.038834,0.038834
min,0.476653,0.256282
25%,0.569284,0.377707
50%,0.593198,0.406802
75%,0.622293,0.430716
max,0.743718,0.523347


Testset for the attack model trained to recognise the label 1

In [17]:
d1_test = pd.DataFrame(data_1['x_test'])
d1_test['Target'] = data_1['y_test']
d1_test.value_counts()

0         1         Target
0.455963  0.544037  in        7
0.467609  0.532391  in        7
0.459987  0.540013  in        6
0.469576  0.530424  in        6
0.468676  0.531324  in        5
                             ..
0.452590  0.547410  in        1
0.452530  0.547470  in        1
0.452497  0.547503  in        1
0.452477  0.547523  in        1
0.531967  0.468033  out       1
Length: 1464, dtype: int64

The out class is not represented enough

In [18]:
d1_test.Target.value_counts(normalize=True)

in     0.994664
out    0.005336
Name: Target, dtype: float64

In [19]:
d1_test.describe()

Unnamed: 0,0,1
count,1874.0,1874.0
mean,0.455441,0.544559
std,0.01397,0.01397
min,0.384128,0.468033
25%,0.449211,0.535375
50%,0.458179,0.541821
75%,0.464625,0.550789
max,0.531967,0.615872


## idx 1

In [20]:
(atk_0, data_0), (atk_1, data_1) = load_atk_and_data(1)
for k in data_0.keys():
    print(k, end=" ")

x_train x_test y_train y_test 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


#### train

Trainset for the attack model trained to recognise the label 0

In [21]:
d0_train = pd.DataFrame(data_0['x_train'])
d0_train['Target'] = data_0['y_train']

d0_train.value_counts()

0         1         Target
0.576123  0.423877  in        6
0.579499  0.420501  in        4
0.567174  0.432826  in        3
0.571351  0.428649  in        3
0.543433  0.456567  in        3
                             ..
0.545696  0.454304  in        1
0.545674  0.454326  in        1
0.545664  0.454336  in        1
0.545660  0.454340  in        1
0.766829  0.233171  in        1
Length: 6748, dtype: int64

Class out misrepresented

In [22]:
d0_train.Target.value_counts(normalize=True)

in     0.800294
out    0.199706
Name: Target, dtype: float64

In [23]:
d0_train.describe()

Unnamed: 0,0,1
count,6795.0,6795.0
mean,0.55858,0.44142
std,0.032256,0.032256
min,0.48747,0.233171
25%,0.538438,0.427302
50%,0.557681,0.442319
75%,0.572698,0.461562
max,0.766829,0.51253


Trainset for the attack model trained to recognise the label 1

In [24]:
d1_train = pd.DataFrame(data_1['x_train'])
d1_train['Target'] = data_1['y_train']
d1_train.value_counts()

0         1         Target
0.479419  0.520581  in        14
0.463955  0.536045  in        10
0.479870  0.520130  in         9
0.471054  0.528946  in         9
0.486617  0.513383  in         8
                              ..
0.469076  0.530924  in         1
0.469077  0.530923  in         1
0.469082  0.530918  in         1
0.469103  0.530897  in         1
0.580531  0.419469  out        1
Length: 3855, dtype: int64

The out class is not represented enough

In [25]:
d1_train.Target.value_counts(normalize=True)

in     0.987298
out    0.012702
Name: Target, dtype: float64

In [26]:
d1_train.describe()

Unnamed: 0,0,1
count,5511.0,5511.0
mean,0.470639,0.529361
std,0.014792,0.014792
min,0.342252,0.419469
25%,0.46434,0.520561
50%,0.472718,0.527282
75%,0.479439,0.53566
max,0.580531,0.657748


#### test

Testset for the attack model trained to recognise the label 0

In [27]:
d0_test = pd.DataFrame(data_0['x_test'])
d0_test['Target'] = data_0['y_test']
d0_test.value_counts()

0         1         Target
0.574598  0.425402  in        2
0.585507  0.414493  in        2
0.550577  0.449423  in        2
0.585411  0.414589  out       2
0.579499  0.420501  in        2
                             ..
0.545663  0.454337  in        1
0.545642  0.454358  in        1
0.545638  0.454362  in        1
0.545632  0.454368  in        1
0.758679  0.241321  in        1
Length: 2258, dtype: int64

In [28]:
d0_test.Target.value_counts(normalize=True)

in     0.802649
out    0.197351
Name: Target, dtype: float64

In [29]:
d0_test.describe()

Unnamed: 0,0,1
count,2265.0,2265.0
mean,0.558715,0.441285
std,0.031059,0.031059
min,0.496028,0.241321
25%,0.538957,0.426823
50%,0.55827,0.44173
75%,0.573177,0.461043
max,0.758679,0.503972


Testset for the attack model trained to recognise the label 1

In [30]:
d1_test = pd.DataFrame(data_1['x_test'])
d1_test['Target'] = data_1['y_test']
d1_test.value_counts()

0         1         Target
0.463955  0.536045  in        5
0.473029  0.526971  in        4
0.463831  0.536169  in        4
0.468118  0.531882  in        4
0.459480  0.540520  in        3
                             ..
0.468271  0.531729  in        1
0.468268  0.531732  in        1
0.468267  0.531733  in        1
0.468217  0.531783  in        1
0.516605  0.483395  out       1
Length: 1619, dtype: int64

The out class is not represented enough

In [31]:
d1_test.Target.value_counts(normalize=True)

in     0.988024
out    0.011976
Name: Target, dtype: float64

In [32]:
d1_test.describe()

Unnamed: 0,0,1
count,1837.0,1837.0
mean,0.471077,0.528923
std,0.013738,0.013738
min,0.369268,0.483395
25%,0.464314,0.520217
50%,0.472945,0.527055
75%,0.479783,0.535686
max,0.516605,0.630732


## idx 2

In [33]:
(atk_0, data_0), (atk_1, data_1) = load_atk_and_data(2)
for k in data_0.keys():
    print(k, end=" ")

x_train x_test y_train y_test 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


#### train

Trainset for the attack model trained to recognise the label 0

In [34]:
d0_train = pd.DataFrame(data_0['x_train'])
d0_train['Target'] = data_0['y_train']

d0_train.value_counts()

0         1         Target
0.531389  0.468611  in        2
0.567083  0.432917  in        2
0.567322  0.432678  in        2
0.547197  0.452803  in        2
0.553511  0.446489  in        2
                             ..
0.538601  0.461399  out       1
0.538593  0.461407  out       1
0.538588  0.461412  out       1
0.538581  0.461419  in        1
0.646757  0.353243  out       1
Length: 6510, dtype: int64

This values appear with two different labels:

    0.535312  0.464688  in        52
    0.535312  0.464688  out       20

In [35]:
d0_train.Target.value_counts(normalize=True)

in     0.799263
out    0.200737
Name: Target, dtype: float64

In [36]:
d0_train.describe()

Unnamed: 0,0,1
count,6516.0,6516.0
mean,0.548697,0.451303
std,0.023278,0.023278
min,0.440733,0.353243
25%,0.532147,0.436751
50%,0.549563,0.450437
75%,0.563249,0.467853
max,0.646757,0.559267


Trainset for the attack model trained to recognise the label 1

In [37]:
d1_train = pd.DataFrame(data_1['x_train'])
d1_train['Target'] = data_1['y_train']
d1_train.value_counts()

0         1         Target
0.482243  0.517757  in        9
0.482822  0.517178  in        6
0.484641  0.515359  in        6
0.489135  0.510865  in        5
0.487548  0.512452  in        5
                             ..
0.474143  0.525857  in        1
                    in        1
                    in        1
0.474152  0.525848  in        1
0.552003  0.447997  out       1
Length: 4488, dtype: int64

The out class is not represented enough

In [38]:
d1_train.Target.value_counts(normalize=True)

in     0.976596
out    0.023404
Name: Target, dtype: float64

In [39]:
d1_train.describe()

Unnamed: 0,0,1
count,5341.0,5341.0
mean,0.475781,0.524219
std,0.012901,0.012901
min,0.379912,0.447997
25%,0.470911,0.516097
50%,0.478376,0.521624
75%,0.483903,0.529089
max,0.552003,0.620088


#### test

Testset for the attack model trained to recognise the label 0

In [40]:
d0_test = pd.DataFrame(data_0['x_test'])
d0_test['Target'] = data_0['y_test']
d0_test.value_counts()

0         1         Target
0.557862  0.442138  in        2
0.561351  0.438649  in        2
0.478880  0.521120  out       1
0.558512  0.441488  in        1
0.558386  0.441614  in        1
                             ..
0.539153  0.460847  in        1
0.539146  0.460854  in        1
0.539141  0.460859  out       1
0.539116  0.460884  in        1
0.657947  0.342053  in        1
Length: 2170, dtype: int64

In [41]:
d0_test.Target.value_counts(normalize=True)

in     0.805249
out    0.194751
Name: Target, dtype: float64

In [42]:
d0_test.describe()

Unnamed: 0,0,1
count,2172.0,2172.0
mean,0.547799,0.452201
std,0.022894,0.022894
min,0.47888,0.342053
25%,0.53154,0.43784
50%,0.548995,0.451005
75%,0.56216,0.46846
max,0.657947,0.52112


Testset for the attack model trained to recognise the label 1

In [43]:
d1_test = pd.DataFrame(data_1['x_test'])
d1_test['Target'] = data_1['y_test']
d1_test.value_counts()

0         1         Target
0.487548  0.512452  in        4
0.472044  0.527956  in        4
0.471639  0.528361  in        3
0.481525  0.518475  in        3
0.470344  0.529656  in        3
                             ..
0.473632  0.526368  in        1
0.473609  0.526391  in        1
0.473608  0.526392  in        1
0.473598  0.526402  in        1
0.537296  0.462704  out       1
Length: 1654, dtype: int64

The out class is not represented enough

In [44]:
d1_test.Target.value_counts(normalize=True)

in     0.977541
out    0.022459
Name: Target, dtype: float64

In [45]:
d1_test.describe()

Unnamed: 0,0,1
count,1781.0,1781.0
mean,0.475413,0.524587
std,0.012804,0.012804
min,0.380366,0.462704
25%,0.470403,0.516479
50%,0.477944,0.522056
75%,0.483521,0.529597
max,0.537296,0.619634


## idx 20

In [46]:
(atk_0, data_0), (atk_1, data_1) = load_atk_and_data(20)
for k in data_0.keys():
    print(k, end=" ")

x_train x_test y_train y_test 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


#### train

Trainset for the attack model trained to recognise the label 0

In [47]:
d0_train = pd.DataFrame(data_0['x_train'])
d0_train['Target'] = data_0['y_train']

d0_train.value_counts()

0         1         Target
0.524584  0.475416  in        10
0.543760  0.456240  in         6
0.536302  0.463698  in         5
0.540645  0.459355  in         3
0.554333  0.445667  in         3
                              ..
0.514836  0.485164  in         1
                    in         1
0.514831  0.485169  in         1
0.514815  0.485185  in         1
0.607947  0.392053  out        1
Length: 5183, dtype: int64

In [48]:
d0_train.Target.value_counts(normalize=True)

in     0.803922
out    0.196078
Name: Target, dtype: float64

In [49]:
d0_train.describe()

Unnamed: 0,0,1
count,5253.0,5253.0
mean,0.523426,0.476574
std,0.01559,0.01559
min,0.477492,0.392053
25%,0.511819,0.467906
50%,0.520981,0.479019
75%,0.532094,0.488181
max,0.607947,0.522508


Trainset for the attack model trained to recognise the label 1

In [50]:
d1_train = pd.DataFrame(data_1['x_train'])
d1_train['Target'] = data_1['y_train']
d1_train.value_counts()

0         1         Target
0.465504  0.534496  in        5
0.490578  0.509422  in        5
0.496845  0.503155  in        4
0.493168  0.506832  in        4
0.493609  0.506391  in        4
                             ..
0.478040  0.521960  out       1
0.478042  0.521958  in        1
0.478066  0.521934  in        1
0.478091  0.521909  in        1
0.530709  0.469291  out       1
Length: 4333, dtype: int64

The out class is not represented enough

In [51]:
d1_train.Target.value_counts(normalize=True)

in     0.917211
out    0.082789
Name: Target, dtype: float64

In [52]:
d1_train.describe()

Unnamed: 0,0,1
count,4590.0,4590.0
mean,0.478854,0.521146
std,0.016554,0.016554
min,0.392319,0.469291
25%,0.472468,0.50973
50%,0.483863,0.516137
75%,0.49027,0.527532
max,0.530709,0.607681


#### test

Testset for the attack model trained to recognise the label 0

In [53]:
d0_test = pd.DataFrame(data_0['x_test'])
d0_test['Target'] = data_0['y_test']
d0_test.value_counts()

0         1         Target
0.519973  0.480027  in        3
0.532148  0.467852  in        2
0.534986  0.465014  in        2
0.526243  0.473757  in        2
0.519246  0.480754  in        2
                             ..
0.514680  0.485320  in        1
0.514677  0.485323  out       1
0.514610  0.485390  in        1
0.514609  0.485391  in        1
0.597630  0.402370  in        1
Length: 1745, dtype: int64

In [54]:
d0_test.Target.value_counts(normalize=True)

in     0.794403
out    0.205597
Name: Target, dtype: float64

In [55]:
d0_test.describe()

Unnamed: 0,0,1
count,1751.0,1751.0
mean,0.523641,0.476359
std,0.015499,0.015499
min,0.48606,0.40237
25%,0.511906,0.467833
50%,0.521368,0.478632
75%,0.532167,0.488094
max,0.59763,0.51394


Testset for the attack model trained to recognise the label 1

In [56]:
d1_test = pd.DataFrame(data_1['x_test'])
d1_test['Target'] = data_1['y_test']
d1_test.value_counts()

0         1         Target
0.489638  0.510362  in        3
0.486861  0.513139  in        2
0.474181  0.525819  in        2
0.467689  0.532311  in        2
0.450647  0.549353  in        2
                             ..
0.477045  0.522955  in        1
0.476968  0.523032  in        1
0.476948  0.523052  in        1
0.476921  0.523079  in        1
0.521648  0.478352  out       1
Length: 1500, dtype: int64

The out class is not represented enough

In [57]:
d1_test.Target.value_counts(normalize=True)

in     0.917647
out    0.082353
Name: Target, dtype: float64

In [58]:
d1_test.describe()

Unnamed: 0,0,1
count,1530.0,1530.0
mean,0.478442,0.521558
std,0.016713,0.016713
min,0.403415,0.478352
25%,0.471914,0.509886
50%,0.48364,0.51636
75%,0.490114,0.528086
max,0.521648,0.596585


## idx 23

In [59]:
(atk_0, data_0), (atk_1, data_1) = load_atk_and_data(23)
for k in data_0.keys():
    print(k, end=" ")

x_train x_test y_train y_test 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


#### train

Trainset for the attack model trained to recognise the label 0

In [60]:
d0_train = pd.DataFrame(data_0['x_train'])
d0_train['Target'] = data_0['y_train']

d0_train.value_counts()

0         1         Target
0.548101  0.451899  in        4
0.519369  0.480631  in        3
0.520448  0.479552  in        2
0.519464  0.480536  in        2
0.520314  0.479686  in        2
                             ..
0.517305  0.482695  in        1
0.517302  0.482698  in        1
0.517299  0.482701  out       1
0.517293  0.482707  in        1
0.607944  0.392056  in        1
Length: 5565, dtype: int64

In [61]:
d0_train.Target.value_counts(normalize=True)

in     0.794692
out    0.205308
Name: Target, dtype: float64

In [62]:
d0_train.describe()

Unnamed: 0,0,1
count,5577.0,5577.0
mean,0.526977,0.473023
std,0.016595,0.016595
min,0.490788,0.392056
25%,0.513697,0.460729
50%,0.524363,0.475637
75%,0.539271,0.486303
max,0.607944,0.509212


Trainset for the attack model trained to recognise the label 1

In [63]:
d1_train = pd.DataFrame(data_1['x_train'])
d1_train['Target'] = data_1['y_train']
d1_train.value_counts()

0         1         Target
0.442306  0.557694  in        4
0.472749  0.527251  in        4
0.424472  0.575528  in        4
0.404176  0.595824  in        3
0.415371  0.584629  in        3
                             ..
0.474162  0.525838  in        1
0.474164  0.525836  in        1
0.474171  0.525829  in        1
0.474189  0.525811  in        1
0.539220  0.460780  out       1
Length: 4486, dtype: int64

The out class is not represented enough

In [64]:
d1_train.Target.value_counts(normalize=True)

in     0.937949
out    0.062051
Name: Target, dtype: float64

In [65]:
d1_train.describe()

Unnamed: 0,0,1
count,4738.0,4738.0
mean,0.473348,0.526652
std,0.022813,0.022813
min,0.361299,0.46078
25%,0.464706,0.51038
50%,0.48156,0.51844
75%,0.48962,0.535294
max,0.53922,0.638701


#### test

Testset for the attack model trained to recognise the label 0

In [66]:
d0_test = pd.DataFrame(data_0['x_test'])
d0_test['Target'] = data_0['y_test']
d0_test.value_counts()

0         1         Target
0.452710  0.547290  out       1
0.534627  0.465373  in        1
0.535055  0.464945  out       1
0.535021  0.464979  in        1
0.535009  0.464991  in        1
                             ..
0.517905  0.482095  out       1
0.517861  0.482139  in        1
0.517803  0.482197  out       1
0.517767  0.482233  in        1
0.602181  0.397819  in        1
Length: 1859, dtype: int64

In [67]:
d0_test.Target.value_counts(normalize=True)

in     0.805272
out    0.194728
Name: Target, dtype: float64

In [68]:
d0_test.describe()

Unnamed: 0,0,1
count,1859.0,1859.0
mean,0.527644,0.472356
std,0.017052,0.017052
min,0.45271,0.397819
25%,0.513901,0.460158
50%,0.525872,0.474128
75%,0.539842,0.486099
max,0.602181,0.54729


Testset for the attack model trained to recognise the label 1

In [69]:
d1_test = pd.DataFrame(data_1['x_test'])
d1_test['Target'] = data_1['y_test']
d1_test.value_counts()

0         1         Target
0.431444  0.568556  in        3
0.494761  0.505239  in        2
0.465780  0.534220  in        2
0.472626  0.527374  in        2
0.428533  0.571467  in        2
                             ..
0.474503  0.525497  in        1
0.474395  0.525605  in        1
0.474381  0.525619  in        1
0.474367  0.525633  in        1
0.513478  0.486522  out       1
Length: 1551, dtype: int64

The out class is not represented enough

In [70]:
d1_test.Target.value_counts(normalize=True)

in     0.939873
out    0.060127
Name: Target, dtype: float64

In [71]:
d1_test.describe()

Unnamed: 0,0,1
count,1580.0,1580.0
mean,0.474775,0.525225
std,0.021819,0.021819
min,0.371469,0.486522
25%,0.467264,0.509981
50%,0.482592,0.517408
75%,0.490019,0.532736
max,0.513478,0.628531


In [73]:
d0_train[d0_train['Target'] == 'in'].describe()

Unnamed: 0,0,1
count,4432.0,4432.0
mean,0.52739,0.47261
std,0.016617,0.016617
min,0.500006,0.392056
25%,0.513962,0.460214
50%,0.524829,0.475171
75%,0.539786,0.486038
max,0.607944,0.499994
