In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import itertools as it
from collections import defaultdict
import seaborn as sns
pd.set_option('display.max_columns', None)

In [2]:
path_common_source_trans = '/home/user/Desktop/courseProject/data/train_transaction.csv'
path_common_source_idn = '/home/user/Desktop/courseProject/data/train_identity.csv'
path_to_data_folder = '/home/user/Desktop/courseProject/data/'

In [3]:
train_trs = pd.read_csv(path_common_source_trans)
train_idn = pd.read_csv(path_common_source_idn )

In [4]:
data_merged = pd.merge(train_trs, train_idn, left_on='TransactionID', right_on='TransactionID', how='left')

In [5]:
def get_share_of_NaN(df):
    result = pd.DataFrame(columns=['Name', 'Number_of_NaN', 'Share_of_NaN'])
    colcount = df.count()
    length = len(df)
    for col_name in colcount.keys():
        result.loc[len(result)] = [col_name, length-colcount[col_name], (length-colcount[col_name])/length]
    return result

def remove_columns_with_many_NaN(df, max_nan_rate):
    '''
    Параметры:
    df - DataFrame
    max_nan_rate - максимальная допустимая доля NaN в колонках датафрейма
    Функция возвращает:
    1) новый датафрейм, в котором удалены колонки, в которых доля NaN болше, чем max_nan_rate
    2) список удалённых колонок
    '''
    df_copy = df.copy()
    removed_columns = []
    nan_stat = get_share_of_NaN(df)
    for i in range(len(nan_stat)):
        column = nan_stat.loc[i]
        if(column['Share_of_NaN'] > max_nan_rate):
            removed_columns.append(column['Name'])
            
    df_copy.drop(columns=removed_columns, inplace=True)
    return df_copy, removed_columns

def remove_columns_with_big_correlation(df, max_corr):
    '''
    Параметры:
    df - DataFrame
    max_corr - максимальная допустимая корреляция между колонками
    Функция возвращает:
    1) новый датафрейм, в котором удалены колонки, в которых корреляция болше, чем max_corr
    2) множество удалённых колонок
    '''
    df_copy = df.copy()
    removed_columns = set()
    corrs = df.corr()
    cols = corrs.columns
    for i in range(len(cols)):
        col_name_1 = cols[i]
        if col_name_1 in {'TransactionID', 'isFraud', 'TransactionDT'} or col_name_1 in removed_columns:
            continue
        
        for j in range(i+1, len(cols)):
            col_name_2 = cols[j]
            if abs(corrs[col_name_1][col_name_2]) > max_corr:
                removed_columns.add(col_name_2)

    df_copy.drop(columns=removed_columns, inplace=True)
    return df_copy, removed_columns


def add_categorical_int_inplace(df, df_describe, name):
    counts = df.groupby(name)[name].count()
    top_elem = counts.idxmax()
    top_elem_freq = counts[top_elem]
    s = df[name]
    df_describe.loc[:, name] = [s.count(), len(counts), top_elem, top_elem_freq]
    
# remove all nones
def proc_cat_col(df, cat_features_mask):
    cat_cols = df.columns[cat_features_mask]
    #print(cat_cols)
    for col in cat_cols:
        dc = df.loc[:,col]
        dc = dc.fillna(dc.mode()[0], inplace=True)


def proc_val_col(df, cat_features_mask, sample_size):
    val_cols = df.columns[~cat_features_mask]
    for col in val_cols:
        dc = df.loc[:,col]
        dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
        
def proc_nans(df, cat_features_mask, sample_size):
    proc_cat_col(df, cat_features_mask)
    proc_val_col(df, cat_features_mask, sample_size)


In [6]:
%%time
data_merged, removed_nan_cols = remove_columns_with_many_NaN(data_merged, 0.85)
data_merged_rm_cols, removed_corr_cols = remove_columns_with_big_correlation(data_merged, 0.9)

CPU times: user 1min 21s, sys: 708 ms, total: 1min 22s
Wall time: 1min 22s


In [7]:
cat_features_mask = (data_merged_rm_cols.dtypes == "object").values
#val_features_mask = (data_merged_rm_cols.dtypes != "object").values
#for i in range(len(data_merged_rm_cols.columns)):
#    if data_merged_rm_cols.iloc[:,i].name in ['card1', 'card2', 'card3', 'card5', 'addr1', 'addr2']:
#        cat_features_mask[i] = True
#        val_features_mask[i] = False

#cat_cols = data_merged_rm_cols[data_merged_rm_cols.columns[cat_features_mask]]
#var_cols = data_merged_rm_cols[data_merged_rm_cols.columns[~cat_features_mask]]
#print(cat_cols.describe(include='all'))

In [10]:
# train_trs_categor_descr = data_merged_rm_cols.describe(include='object')
# print(train_trs_categor_descr)
# print("----")
# for name in ['card1', 'card2', 'card3', 'card5', 'addr1', 'addr2']:
#     add_categorical_int_inplace(data_merged_rm_cols, train_trs_categor_descr, name)

# print(train_trs_categor_descr)

In [9]:
#data_merged.describe()

In [None]:
#data_test.to_csv('/home/user/Desktop/courseProject/data/clear_and_filled.csv')

In [8]:
%%time
data_f0 = data_merged_rm_cols[data_merged_rm_cols['isFraud'] == 0]
data_f1 = data_merged_rm_cols[data_merged_rm_cols['isFraud'] == 1]
proc_cat_col(data_f0, cat_features_mask)
proc_val_col(data_f0, cat_features_mask,10000)
proc_cat_col(data_f1, cat_features_mask)
proc_val_col(data_f1, cat_features_mask,10000)
clear_and_nan = pd.concat([data_f0, data_f1])
clear_and_nan.describe()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.mode()[0], inplace=Tr

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc = dc.fillna(dc.sample(n=sample_size).dropna().mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

CPU times: user 7.95 s, sys: 427 ms, total: 8.37 s
Wall time: 8.37 s


Unnamed: 0,TransactionID,isFraud,TransactionDT,TransactionAmt,card1,card2,card3,card5,addr1,addr2,dist1,C1,C3,C5,C13,D1,D3,D4,D5,D10,D11,D15,V1,V2,V3,V4,V6,V7,V8,V9,V10,V12,V14,V15,V17,V19,V23,V24,V25,V26,V27,V29,V32,V35,V37,V38,V39,V41,V42,V44,V46,V47,V49,V52,V53,V55,V56,V60,V61,V62,V65,V66,V67,V68,V70,V74,V75,V77,V78,V81,V82,V83,V86,V87,V95,V98,V99,V100,V104,V105,V107,V108,V109,V110,V111,V112,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V124,V125,V129,V130,V131,V135,V136,V169,V170,V171,V172,V173,V174,V175,V176,V180,V181,V184,V185,V186,V187,V188,V189,V191,V194,V195,V199,V200,V204,V205,V207,V208,V209,V210,V214,V215,V217,V220,V221,V223,V224,V226,V227,V228,V229,V230,V234,V238,V240,V241,V242,V246,V247,V248,V250,V252,V255,V258,V260,V261,V262,V264,V267,V268,V270,V274,V281,V282,V283,V284,V285,V286,V287,V288,V289,V290,V291,V300,V303,V305,V310,V311,V312,V313,V314,V319,V320,id_01,id_02,id_05,id_06,id_11,id_13,id_17,id_19,id_20
count,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0,590540.0
mean,3282270.0,0.03499,7372311.0,135.027176,9898.734658,362.087335,153.186458,199.471611,290.408235,86.822813,48.409366,14.092458,0.005644,5.571526,32.539918,94.144827,15.726349,99.955209,20.123384,108.021541,77.279051,139.035379,0.999971,1.023826,1.04115,0.919072,1.02408,1.038407,1.014602,1.021888,0.244513,0.609847,0.999565,0.106582,0.116773,0.840026,1.03031,1.050613,0.980538,0.989581,0.000676,0.337879,0.124071,0.663147,1.077145,1.115933,0.118558,0.999478,0.111449,1.059888,1.015909,1.027495,0.283925,0.130421,0.625097,1.058836,1.105185,0.123929,0.852007,0.884853,0.999707,0.983513,0.998366,0.000464,0.354669,0.132284,0.605845,1.073773,1.12265,0.129641,0.868072,0.899787,1.055089,1.08444,1.037467,0.061952,0.89451,0.273358,0.085388,0.280995,0.99958,1.004611,1.014808,1.007735,1.002562,1.005353,1.009293,1.032433,1.01573,1.000391,1.001473,1.000728,1.000874,1.004274,1.001758,1.031104,1.092925,1.050388,8.764282,92.116843,31.116748,17.24096,38.800554,0.039789,1.103414,1.163582,0.031214,0.013059,0.030257,0.050345,1.089135,0.2188,0.059899,0.031339,0.041174,1.035183,1.199502,1.003493,1.009071,1.013947,0.987161,0.989162,1.064018,1.028406,162.428427,4.270319,17.091308,2.10441,8.287206,3.407478,9.088416,31.49659,0.232929,0.040583,1.065833,0.020574,0.087186,0.054689,0.035435,1.077729,1.141069,1.102808,0.501172,0.030641,1.00022,1.000052,1.02506,1.040578,1.0056,1.015349,0.949373,1.007078,0.9537,1.07587,0.992143,1.023668,1.002933,44.53924,8.06714,4.155235,1.846968,23.666116,0.087594,0.817564,0.991134,0.088541,1.167636,0.031492,0.358572,0.183954,0.235468,1.103009,1.659798,0.045409,0.283134,1.000007,118.193256,4.20209,39.173114,21.305592,43.226087,18.372102,42.072278,-6.262839,42161.506155,0.374432,-1.55251,99.939202,51.149045,172.495408,286.554955,482.682895
std,170474.4,0.183755,4617224.0,239.162522,4901.170153,156.750049,11.322604,41.15761,96.461039,2.5373,243.144405,133.569018,0.150536,25.786976,129.364844,157.551484,48.557362,173.421595,64.900349,175.440514,153.627465,195.785107,0.005365,0.175789,0.236202,0.328544,0.175282,0.224238,0.135791,0.165623,0.443847,0.500312,0.020857,0.312968,0.343124,0.401896,0.231471,0.285794,0.173064,0.195398,0.026692,0.494019,0.347035,0.486097,0.585511,0.729412,0.389169,0.022832,0.331118,0.541348,0.141222,0.196675,0.49247,0.379996,0.499761,0.365635,0.61781,0.39292,0.411139,0.45332,0.017113,0.201728,0.229299,0.022155,0.534992,0.370271,0.502783,0.492062,0.72192,0.420326,0.393281,0.435818,0.387515,0.472448,21.028724,0.284923,2.721906,0.946945,0.648548,3.373057,0.020489,0.081124,0.127736,0.097264,0.070812,0.084595,0.11015,0.190335,0.136734,0.035229,0.041,0.036382,0.041673,0.067079,0.048623,0.228075,0.37401,0.279965,113.80274,315.883618,161.120004,293.7697,451.689165,0.446088,0.872072,1.225294,0.45322,0.130709,0.192606,0.42675,0.903873,3.000135,0.616787,0.270472,0.348411,0.598299,4.819697,0.326658,0.385975,0.330404,0.149783,0.18419,0.821053,0.612232,2955.801256,129.836333,451.162582,28.978721,122.939233,42.543196,278.537148,509.083198,4.508172,0.495862,1.34049,0.192943,1.626489,1.299492,1.004851,0.67841,2.0456,1.016142,5.612497,0.286464,0.023277,0.010651,0.313777,0.494835,0.132761,0.418481,0.255457,0.183554,0.466913,0.958012,0.179114,0.623455,0.293992,1077.035834,303.446977,146.497901,32.224789,593.22491,0.512213,0.920928,1.557055,0.33842,3.282425,0.190937,1.078985,0.43061,0.598687,0.768889,16.252373,0.289269,0.623603,0.002603,352.979909,102.3739,172.126681,95.804974,173.443984,332.301482,473.494534,7.430643,107632.921066,2.617689,8.427247,0.561537,5.70321,19.027392,77.877999,85.883911
min,2987000.0,0.0,86400.0,0.251,1000.0,100.0,100.0,100.0,100.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-122.0,0.0,0.0,-53.0,-83.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,1.0,-72.0,-100.0,90.0,10.0,100.0,100.0,100.0
25%,3134635.0,0.0,3027058.0,43.321,6019.0,215.0,150.0,166.0,204.0,87.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,637.0,0.0,0.0,100.0,52.0,166.0,266.0,507.0
50%,3282270.0,0.0,7306528.0,68.769,9678.0,360.0,150.0,226.0,299.0,87.0,1.0,1.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,13.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,637.0,0.0,0.0,100.0,52.0,166.0,266.0,507.0
75%,3429904.0,0.0,11246620.0,125.0,14184.0,512.0,150.0,226.0,327.0,87.0,5.0,3.0,0.0,1.0,12.0,121.0,11.0,123.0,8.0,150.0,57.0,251.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,59.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,107.949997,0.0,0.0,0.0,0.0,0.0,0.0,-5.0,637.0,0.0,0.0,100.0,52.0,166.0,266.0,507.0
max,3577539.0,1.0,15811130.0,31937.391,18396.0,600.0,231.0,237.0,540.0,102.0,10286.0,4685.0,26.0,349.0,2918.0,640.0,819.0,869.0,819.0,876.0,670.0,879.0,1.0,8.0,9.0,6.0,9.0,9.0,8.0,8.0,4.0,3.0,1.0,7.0,15.0,7.0,13.0,13.0,7.0,13.0,4.0,5.0,15.0,3.0,54.0,54.0,15.0,1.0,8.0,48.0,6.0,12.0,5.0,12.0,5.0,17.0,51.0,16.0,6.0,10.0,1.0,7.0,8.0,2.0,6.0,8.0,4.0,30.0,31.0,19.0,7.0,7.0,30.0,30.0,880.0,12.0,88.0,28.0,15.0,99.0,1.0,7.0,7.0,7.0,9.0,9.0,6.0,6.0,6.0,3.0,3.0,3.0,3.0,3.0,3.0,13.0,13.0,13.0,55125.0,55125.0,55125.0,90750.0,90750.0,19.0,48.0,61.0,31.0,7.0,8.0,14.0,48.0,83.0,24.0,16.0,31.0,38.0,218.0,30.0,30.0,21.0,7.0,16.0,45.0,45.0,104060.0,55125.0,55125.0,3300.0,8050.0,3300.0,104060.0,104060.0,303.0,25.0,384.0,16.0,144.0,242.0,360.0,54.0,176.0,65.0,121.0,23.0,7.0,5.0,20.0,45.0,18.0,36.0,18.0,24.0,87.0,66.0,8.0,49.0,20.0,153600.0,55125.0,55125.0,4000.0,66000.0,22.0,32.0,68.0,12.0,95.0,8.0,31.0,10.0,12.0,67.0,1055.0,11.0,20.0,2.0,55125.0,55125.0,55125.0,4817.470215,7519.870117,104060.0,104060.0,0.0,999595.0,52.0,0.0,100.0,64.0,229.0,671.0,661.0


In [11]:
clear_and_nan.to_csv(path_to_data_folder + '/clear_and_filled.csv', index=False)