# Raiffeisen - Data Cleaning

In [1]:
import pandas as pd

pd.options.display.max_columns = 400

In [2]:
# Create table for missing data analysis
def draw_missing_data_table(df):
    total = df.isnull().sum().sort_values(ascending=False)
    percent = (df.isnull().sum()/df.isnull().count()).sort_values(ascending=False)
    missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    return missing_data

## Load original dataset

In [3]:
df = pd.read_csv('Datathon_sample_final1.csv', encoding='iso-8859-1')

### Fix encoding issue

In [4]:
df.CITY = df.CITY.str.replace('®', 'Ž')
df.CITY = df.CITY.str.replace('È', 'Č')
df.CITY = df.CITY.str.replace('©', 'Š')
df.CITY = df.CITY.str.replace('Æ', 'Ć')
df.CITY = df.CITY.str.lower()

In [5]:
df.groupby('CITY').agg({'month_od':'count'}).sort_values(by='month_od', ascending=False).head()

Unnamed: 0_level_0,month_od
CITY,Unnamed: 1_level_1
novi sad,41475
beograd-novi beograd,37756
beograd-zvezdara,32474
beograd-zemun,29126
beograd-voždovac,27052


## Fix missing data

In [6]:
draw_missing_data_table(df).head(30)

Unnamed: 0,Total,Percent
SLA_MATURITY_MTH,1083919,1.0
CC_MATURITY_MTH,1083919,1.0
SA_MATURITY_MTH,1083919,1.0
LM_MATURITY_MTH,1082179,0.998395
LM_MTH_SINCE_LAST_OPEN,1082179,0.998395
TD_MATURITY_MTH,1079560,0.995978
TD_MTH_SINCE_LAST_OPEN,1079461,0.995887
OD_MATURITY,1059976,0.977911
LEN_MATURITY_MTH,1042077,0.961397
LEN_MTH_SINCE_LAST_OPEN,1042077,0.961397


### Remove empty columns

Also add missing value flag column

In [7]:
df.drop(['SLA_MATURITY_MTH', 'CC_MATURITY_MTH', 'SA_MATURITY_MTH'], axis=1, inplace=True)

df['LM_MATURITY_MTH_MISSING'] = df.LM_MATURITY_MTH.isna().astype(int)
df.LM_MATURITY_MTH.fillna(-1, inplace=True)

df['LM_MTH_SINCE_LAST_OPEN_MISSING'] = df.LM_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.LM_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df.loc[df['TD_MATURITY_MTH'] < 0, 'TD_MATURITY_MTH'] = 0
df['TD_MATURITY_MTH_MISSING'] = df.TD_MATURITY_MTH.isna().astype(int)
df.TD_MATURITY_MTH.fillna(-1, inplace=True)

df['TD_MTH_SINCE_LAST_OPEN_MISSING'] = df.TD_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.TD_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df['OD_MATURITY_MISSING'] = df.OD_MATURITY.isna().astype(int)
df.OD_MATURITY.fillna(-1, inplace=True)

df['LEN_MTH_SINCE_LAST_OPEN_MISSING'] = df.LEN_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.LEN_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df.loc[df['LEN_MATURITY_MTH'] < 0, 'LEN_MATURITY_MTH'] = 0
df['LEN_MATURITY_MTH_MISSING'] = df.LEN_MATURITY_MTH.isna().astype(int)
df.LEN_MATURITY_MTH.fillna(-1, inplace=True)

df['CL_MTH_SINCE_LAST_OPEN_MISSING'] = df.CL_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.CL_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df.loc[df['CL_MATURITY_MTH'] < 0, 'CL_MATURITY_MTH'] = 0
df['CL_MATURITY_MTH_MISSING'] = df.CL_MATURITY_MTH.isna().astype(int)
df.CL_MATURITY_MTH.fillna(-1, inplace=True)

df['DAYS_SINCE_LAST_ACCOUNT_CLOSED_MISSING'] = df.DAYS_SINCE_LAST_ACCOUNT_CLOSED.isna().astype(int)
df.DAYS_SINCE_LAST_ACCOUNT_CLOSED.fillna(-1, inplace=True)

df.loc[df['MATURITY_MTH'] < 0, 'MATURITY_MTH'] = 0
df['MATURITY_MTH_MISSING'] = df.MATURITY_MTH.isna().astype(int)
df.MATURITY_MTH.fillna(-1, inplace=True)

df['BLACK_LIST_LEVEL_m1_MISSING'] = df.BLACK_LIST_LEVEL_m1.isna().astype(int)
df.BLACK_LIST_LEVEL_m1.fillna(-1, inplace=True)

df['PCK_MTH_SINCE_LAST_OPEN_MISSING'] = df.PCK_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.PCK_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df['SA_MTH_SINCE_LAST_OPEN_MISSING'] = df.SA_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.SA_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df['CC_MIX_MISSING'] = df.CC_MIX.isna().astype(int)
df.CC_MIX.fillna('Unknown', inplace=True)

df['DC_MIX_MISSING'] = df.DC_MIX.isna().astype(int)
df.DC_MIX.fillna('Unknown', inplace=True)

df['CC_MTH_SINCE_LAST_OPEN_MISSING'] = df.CC_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.CC_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df['SLA_MTH_SINCE_LAST_OPEN_MISSING'] = df.SLA_MTH_SINCE_LAST_OPEN.isna().astype(int)
df.SLA_MTH_SINCE_LAST_OPEN.fillna(-1, inplace=True)

df['MARITAL_STATUS_MISSING'] = df.MARITAL_STATUS.isna().astype(int)
df.MARITAL_STATUS.fillna('Unknown', inplace=True)

df['REGION_CD_MISSING'] = df.REGION_CD.isna().astype(int)
df.REGION_CD.fillna('Unknown', inplace=True)

df['MOB_ACTIVE_MISSING'] = df.MOB_ACTIVE.isna().astype(int)
df.MOB_ACTIVE.fillna(-1, inplace=True)

df['GENDER_CD_MISSING'] = df.GENDER_CD.isna().astype(int)
df.GENDER_CD.fillna('Unknown', inplace=True)

df['AGE_AT_ANALYSIS_MTH_CNT_MISSING'] = df.AGE_AT_ANALYSIS_MTH_CNT.isna().astype(int)
df.AGE_AT_ANALYSIS_MTH_CNT.fillna(-1, inplace=True)

In [8]:
draw_missing_data_table(df).head()

Unnamed: 0,Total,Percent
AGE_AT_ANALYSIS_MTH_CNT_MISSING,0,0.0
RUBR_CI_TXN_CNT_m1,0,0.0
MRCH_RNTC_TXN_AMT_m1,0,0.0
MRCH_SRV_TXN_CNT_m1,0,0.0
MRCH_SRV_TXN_AMT_m1,0,0.0


### Remove date column
Redudant with 'month' column

In [10]:
df.drop(['INFORMATION_DT'], axis=1, inplace=True)

In [15]:
df.head()

Unnamed: 0,PD_MIX_ACTIVE_m1,BLACK_LIST_FLG_m1,BLACK_LIST_LEVEL_m1,LEASING_FLG_m1,RISK_BLOCKED_FLG_m1,CUSTOMER_RK,CC_DB_TXN_CNT_m1,CC_DB_TXN_AMT_m1,CC_CR_TXN_CNT_m1,CC_CR_TXN_AMT_m1,CC_EOM_BAL_AMT_m1,CC_AVG_BAL_AMT_m1,CC_ACC_USED_CNT_m1,CL_DB_TXN_CNT_m1,CL_DB_TXN_AMT_m1,CL_CR_TXN_CNT_m1,CL_CR_TXN_AMT_m1,CL_EOM_BAL_AMT_m1,CL_AVG_BAL_AMT_m1,CL_ACC_USED_CNT_m1,LEN_DB_TXN_CNT_m1,LEN_DB_TXN_AMT_m1,LEN_CR_TXN_CNT_m1,LEN_CR_TXN_AMT_m1,LEN_EOM_BAL_AMT_m1,LEN_AVG_BAL_AMT_m1,LEN_ACC_USED_CNT_m1,LM_DB_TXN_CNT_m1,LM_DB_TXN_AMT_m1,LM_CR_TXN_CNT_m1,LM_CR_TXN_AMT_m1,LM_EOM_BAL_AMT_m1,LM_AVG_BAL_AMT_m1,LM_ACC_USED_CNT_m1,SA_DB_TXN_CNT_m1,SA_DB_TXN_AMT_m1,SA_CR_TXN_CNT_m1,SA_CR_TXN_AMT_m1,SA_EOM_BAL_AMT_m1,SA_AVG_BAL_AMT_m1,SA_ACC_USED_CNT_m1,SLA_DB_TXN_CNT_m1,SLA_DB_TXN_AMT_m1,SLA_CR_TXN_CNT_m1,SLA_CR_TXN_AMT_m1,SLA_EOM_BAL_AMT_m1,SLA_AVG_BAL_AMT_m1,SLA_ACC_USED_CNT_m1,TD_DB_TXN_CNT_m1,TD_DB_TXN_AMT_m1,TD_CR_TXN_CNT_m1,TD_CR_TXN_AMT_m1,TD_EOM_BAL_AMT_m1,TD_AVG_BAL_AMT_m1,TD_ACC_USED_CNT_m1,SSP_FLG_m1,PAST_DUE_AMT_m1,OD_LIMIT_m1,DAYS_PAST_DUE_m1,INCOME_AMT_m1,SLA_INCOME_AMT_m1,DEBIT_CARD_CNT_m1,NOSTRO_TXN_AMT_m1,LORO_TXN_AMT_m1,CHEQUE_TXN_CNT_m1,CHEQUE_TXN_AMT_m1,MRCH_AIR_TXN_CNT_m1,MRCH_AIR_TXN_AMT_m1,MRCH_CASH_TXN_CNT_m1,MRCH_CASH_TXN_AMT_m1,MRCH_CHILD_TXN_CNT_m1,MRCH_CHILD_TXN_AMT_m1,MRCH_CLTH_TXN_CNT_m1,MRCH_CLTH_TXN_AMT_m1,MRCH_DEPST_TXN_CNT_m1,MRCH_DEPST_TXN_AMT_m1,MRCH_DIY_TXN_CNT_m1,MRCH_DIY_TXN_AMT_m1,MRCH_DRCTM_TXN_CNT_m1,MRCH_DRCTM_TXN_AMT_m1,MRCH_ENT_TXN_CNT_m1,MRCH_ENT_TXN_AMT_m1,MRCH_FOOD_TXN_CNT_m1,MRCH_FOOD_TXN_AMT_m1,MRCH_HTL_TXN_CNT_m1,MRCH_HTL_TXN_AMT_m1,MRCH_LGS_TXN_CNT_m1,MRCH_LGS_TXN_AMT_m1,MRCH_MED_TXN_CNT_m1,MRCH_MED_TXN_AMT_m1,MRCH_MOT_TXN_CNT_m1,MRCH_MOT_TXN_AMT_m1,MRCH_MSCS_TXN_CNT_m1,MRCH_MSCS_TXN_AMT_m1,MRCH_PET_TXN_CNT_m1,MRCH_PET_TXN_AMT_m1,MRCH_RNTC_TXN_CNT_m1,MRCH_RNTC_TXN_AMT_m1,MRCH_SRV_TXN_CNT_m1,MRCH_SRV_TXN_AMT_m1,MRCH_TRV_TXN_CNT_m1,MRCH_TRV_TXN_AMT_m1,RUBR_CD_TXN_CNT_m1,RUBR_CD_TXN_AMT_m1,RUBR_CI_TXN_CNT_m1,RUBR_CI_TXN_AMT_m1,RUBR_CU_TXN_CNT_m1,RUBR_CU_TXN_AMT_m1,RUBR_PD_TXN_CNT_m1,RUBR_PD_TXN_AMT_m1,RUBR_PI_TXN_CNT_m1,RUBR_PI_TXN_AMT_m1,RUBR_PU_TXN_CNT_m1,RUBR_PU_TXN_AMT_m1,CH_CALLC_TXN_CNT_m1,CH_CALLC_TXN_AMT_m1,CH_ATM_TXN_CNT_m1,CH_ATM_TXN_AMT_m1,CH_INT_TXN_CNT_m1,CH_INT_TXN_AMT_m1,CH_SMS_TXN_CNT_m1,CH_SMS_TXN_AMT_m1,CH_BRN_TXN_CNT_m1,CH_BRN_TXN_AMT_m1,CH_DIRM_TXN_CNT_m1,CH_DIRM_TXN_AMT_m1,CH_POS_TXN_CNT_m1,CH_POS_TXN_AMT_m1,CC_TOT_TXN_CNT_m1,CC_TOT_TXN_AMT_m1,CL_TOT_TXN_CNT_m1,CL_TOT_TXN_AMT_m1,LEN_TOT_TXN_CNT_m1,LEN_TOT_TXN_AMT_m1,LM_TOT_TXN_CNT_m1,LM_TOT_TXN_AMT_m1,SA_TOT_TXN_CNT_m1,SA_TOT_TXN_AMT_m1,SLA_TOT_TXN_CNT_m1,SLA_TOT_TXN_AMT_m1,TD_TOT_TXN_CNT_m1,TD_TOT_TXN_AMT_m1,TOT_DB_TXN_CNT_m1,TOT_DB_TXN_AMT_m1,TOT_CR_TXN_CNT_m1,TOT_CR_TXN_AMT_m1,FIRST_OPEN_PRODUCT,FIRST_CLOSED_PRODUCT_6MTH,LAST_OPEN_PRODUCT,LAST_CLOSED_PRODUCT,ACCT_OPENED_CNT,ACCT_CLOSED_CNT,EARLY_REPAYMENT_LTD_FLG,ACCT_DEP_LTD_CNT,ACCT_CRE_LTD_CNT,CCREV_FLG_EVER,CCREV_FLG,CCWEB_FLG_EVER,CCWEB_FLG,DC_FLG_EVER,DC_FLG,OD_FLG_EVER,OD_FLG,ROL_FLG_EVER,SLA_FLG_EVER,SLA_FLG,CC_ACCT_OPENED_CNT,CC_ACCT_CLOSED_CNT,CC_MAX_TENURE_MTH,CC_MIN_TENURE_MTH,CC_MTH_SINCE_LAST_OPEN,CC_FLG_EVER,CC_FLG_m1,CC_ACCT_CNT_m1,CC_ACCT_OPEN_CNT_m1,CC_ACCT_CLOSE_CNT_m1,CL_ACCT_OPENED_CNT,CL_ACCT_CLOSED_CNT,CL_MATURITY_MTH,CL_MAX_TENURE_MTH,CL_MIN_TENURE_MTH,CL_MTH_SINCE_LAST_OPEN,CL_FLG_EVER,CL_FLG_m1,CL_ACCT_CNT_m1,CL_ACCT_OPEN_CNT_m1,CL_ACCT_CLOSE_CNT_m1,LEN_ACCT_OPENED_CNT,LEN_ACCT_CLOSED_CNT,LEN_MATURITY_MTH,LEN_MAX_TENURE_MTH,LEN_MIN_TENURE_MTH,LEN_MTH_SINCE_LAST_OPEN,LEN_FLG_EVER,LEN_FLG_m1,LEN_ACCT_CNT_m1,LEN_ACCT_OPEN_CNT_m1,LEN_ACCT_CLOSE_CNT_m1,LM_ACCT_OPENED_CNT,LM_ACCT_CLOSED_CNT,LM_MATURITY_MTH,LM_MAX_TENURE_MTH,LM_MIN_TENURE_MTH,LM_MTH_SINCE_LAST_OPEN,LM_FLG_EVER,LM_FLG_m1,LM_ACCT_CNT_m1,LM_ACCT_OPEN_CNT_m1,LM_ACCT_CLOSE_CNT_m1,SA_ACCT_OPENED_CNT,SA_ACCT_CLOSED_CNT,SA_MAX_TENURE_MTH,SA_MIN_TENURE_MTH,SA_MTH_SINCE_LAST_OPEN,SA_FLG_EVER,SA_FLG_m1,SA_ACCT_CNT_m1,SA_ACCT_OPEN_CNT_m1,SA_ACCT_CLOSE_CNT_m1,SLA_ACCT_OPENED_CNT,SLA_ACCT_CLOSED_CNT,SLA_MAX_TENURE_MTH,SLA_MIN_TENURE_MTH,SLA_MTH_SINCE_LAST_OPEN,SLA_FLG_m1,SLA_ACCT_CNT_m1,SLA_ACCT_OPEN_CNT_m1,SLA_ACCT_CLOSE_CNT_m1,TD_ACCT_OPENED_CNT,TD_ACCT_CLOSED_CNT,TD_MATURITY_MTH,TD_MAX_TENURE_MTH,TD_MIN_TENURE_MTH,TD_MTH_SINCE_LAST_OPEN,TD_FLG_EVER,TD_FLG_m1,TD_ACCT_CNT_m1,TD_ACCT_OPEN_CNT_m1,TD_ACCT_CLOSE_CNT_m1,PCK_ACCT_OPENED_CNT,PCK_ACCT_CLOSED_CNT,PCK_MAX_TENURE_MTH,PCK_MIN_TENURE_MTH,PCK_MTH_SINCE_LAST_OPEN,PCK_FLG_EVER,PCK_FLG_m1,PCK_ACCT_CNT_m1,PCK_ACCT_OPEN_CNT_m1,PCK_ACCT_CLOSE_CNT_m1,ACCT_CNT_m1,PD_MIX_EVER,OD_MATURITY,PROD_CNT_m1,DAYS_SINCE_LAST_ACCOUNT_OPEN,DAYS_SINCE_LAST_ACCOUNT_CLOSED,DAYS_SINCE_FIRST_ACCOUNT_OPEN,MATURITY_MTH,TENURE_MTH,MTH_SINCE_LAST_OPEN,CITY,MOBILE_FLG,EMAIL_FLG,IS_SEGMENT_CD,BRANCH_CD,REGION_CD,GEO_REGION_CD,AGE_AT_ANALYSIS_MTH_CNT,GENDER_CD,DC_MIX,CC_MIX,ROL_flg_m1,MOB_ACTIVE,BizSeg,CC_LIMIT_m1,month,month_od,month_do,mBanking_logs_m1,eBanking_logs_m1,MARITAL_STATUS,Education_level,RESIDENCE_STATUS,EMPLOYMENT_TYPE,LM_MATURITY_MTH_MISSING,LM_MTH_SINCE_LAST_OPEN_MISSING,TD_MATURITY_MTH_MISSING,TD_MTH_SINCE_LAST_OPEN_MISSING,OD_MATURITY_MISSING,LEN_MTH_SINCE_LAST_OPEN_MISSING,LEN_MATURITY_MTH_MISSING,CL_MTH_SINCE_LAST_OPEN_MISSING,CL_MATURITY_MTH_MISSING,DAYS_SINCE_LAST_ACCOUNT_CLOSED_MISSING,MATURITY_MTH_MISSING,BLACK_LIST_LEVEL_m1_MISSING,PCK_MTH_SINCE_LAST_OPEN_MISSING,SA_MTH_SINCE_LAST_OPEN_MISSING,CC_MIX_MISSING,DC_MIX_MISSING,CC_MTH_SINCE_LAST_OPEN_MISSING,SLA_MTH_SINCE_LAST_OPEN_MISSING,MARITAL_STATUS_MISSING,REGION_CD_MISSING,MOB_ACTIVE_MISSING,GENDER_CD_MISSING,AGE_AT_ANALYSIS_MTH_CNT_MISSING
0,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,1,12266.06,2,24532.12,-196248.36,196248.3603,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12281.125686,0.0,6,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,3,36798.18,0,0,0,0.0,0,0.0,0,0.0,3,36798.18,0,0.0,0,0.0,0,0.0,0,0.0,1,12266.06,2,24532.12,LEN,XXX,LEN,XXX,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,1,0,16.0,4,4,4.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,116,-1.0,116,16.0,4,4,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,62.0,F,Unknown,Unknown,0,4.0,Webovci,0,201506,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
1,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,1,12544.54,2,25089.08,-37609.29,37204.9844,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12536.01999,0.0,4,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,3,37633.62,0,0,0,0.0,0,0.0,0,0.0,3,37633.62,0,0.0,0,0.0,0,0.0,0,0.0,1,12544.54,2,25089.08,LEN,XXX,LEN,XXX,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,3.0,17,17,17.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,513,-1.0,513,3.0,17,17,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,63.0,F,Unknown,Unknown,0,17.0,Webovci,0,201607,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
2,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,1,12269.68,2,24539.36,-208443.62,208443.6137,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,3,36809.04,0,0,0,0.0,0,0.0,0,0.0,3,36809.04,0,0.0,0,0.0,0,0.0,0,0.0,1,12269.68,2,24539.36,LEN,XXX,LEN,XXX,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,1,0,17.0,3,3,3.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,86,-1.0,86,17.0,3,3,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,62.0,F,Unknown,Unknown,0,3.0,Webovci,0,201505,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
3,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,2,12242.45,4,24484.9,-135635.25,135638.8889,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12327.060486,0.0,3,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,6,36727.35,0,0,0,0.0,0,0.0,0,0.0,6,36727.35,0,0.0,0,0.0,0,0.0,0,0.0,2,12242.45,4,24484.9,LEN,XXX,LEN,XXX,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,11.0,9,9,9.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,269,-1.0,269,11.0,9,9,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,63.0,F,Unknown,Unknown,0,9.0,Webovci,0,201511,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
4,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,2,12562.65,3,25125.3,-25034.91,24655.4556,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12529.15524,0.0,9,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,5,37687.95,0,0,0,0.0,0,0.0,0,0.0,5,37687.95,0,0.0,0,0.0,0,0.0,0,0.0,2,12562.65,3,25125.3,LEN,XXX,LEN,XXX,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,2.0,18,18,18.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,544,-1.0,544,2.0,18,18,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,64.0,F,Unknown,Unknown,0,18.0,Webovci,0,201608,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0


In [16]:
df.to_csv('RFB - Clean Data.csv')

In [17]:
df = pd.read_csv('RFB - Clean Data.csv', encoding='utf-8')

In [18]:
df.head()

Unnamed: 0.1,Unnamed: 0,PD_MIX_ACTIVE_m1,BLACK_LIST_FLG_m1,BLACK_LIST_LEVEL_m1,LEASING_FLG_m1,RISK_BLOCKED_FLG_m1,CUSTOMER_RK,CC_DB_TXN_CNT_m1,CC_DB_TXN_AMT_m1,CC_CR_TXN_CNT_m1,CC_CR_TXN_AMT_m1,CC_EOM_BAL_AMT_m1,CC_AVG_BAL_AMT_m1,CC_ACC_USED_CNT_m1,CL_DB_TXN_CNT_m1,CL_DB_TXN_AMT_m1,CL_CR_TXN_CNT_m1,CL_CR_TXN_AMT_m1,CL_EOM_BAL_AMT_m1,CL_AVG_BAL_AMT_m1,CL_ACC_USED_CNT_m1,LEN_DB_TXN_CNT_m1,LEN_DB_TXN_AMT_m1,LEN_CR_TXN_CNT_m1,LEN_CR_TXN_AMT_m1,LEN_EOM_BAL_AMT_m1,LEN_AVG_BAL_AMT_m1,LEN_ACC_USED_CNT_m1,LM_DB_TXN_CNT_m1,LM_DB_TXN_AMT_m1,LM_CR_TXN_CNT_m1,LM_CR_TXN_AMT_m1,LM_EOM_BAL_AMT_m1,LM_AVG_BAL_AMT_m1,LM_ACC_USED_CNT_m1,SA_DB_TXN_CNT_m1,SA_DB_TXN_AMT_m1,SA_CR_TXN_CNT_m1,SA_CR_TXN_AMT_m1,SA_EOM_BAL_AMT_m1,SA_AVG_BAL_AMT_m1,SA_ACC_USED_CNT_m1,SLA_DB_TXN_CNT_m1,SLA_DB_TXN_AMT_m1,SLA_CR_TXN_CNT_m1,SLA_CR_TXN_AMT_m1,SLA_EOM_BAL_AMT_m1,SLA_AVG_BAL_AMT_m1,SLA_ACC_USED_CNT_m1,TD_DB_TXN_CNT_m1,TD_DB_TXN_AMT_m1,TD_CR_TXN_CNT_m1,TD_CR_TXN_AMT_m1,TD_EOM_BAL_AMT_m1,TD_AVG_BAL_AMT_m1,TD_ACC_USED_CNT_m1,SSP_FLG_m1,PAST_DUE_AMT_m1,OD_LIMIT_m1,DAYS_PAST_DUE_m1,INCOME_AMT_m1,SLA_INCOME_AMT_m1,DEBIT_CARD_CNT_m1,NOSTRO_TXN_AMT_m1,LORO_TXN_AMT_m1,CHEQUE_TXN_CNT_m1,CHEQUE_TXN_AMT_m1,MRCH_AIR_TXN_CNT_m1,MRCH_AIR_TXN_AMT_m1,MRCH_CASH_TXN_CNT_m1,MRCH_CASH_TXN_AMT_m1,MRCH_CHILD_TXN_CNT_m1,MRCH_CHILD_TXN_AMT_m1,MRCH_CLTH_TXN_CNT_m1,MRCH_CLTH_TXN_AMT_m1,MRCH_DEPST_TXN_CNT_m1,MRCH_DEPST_TXN_AMT_m1,MRCH_DIY_TXN_CNT_m1,MRCH_DIY_TXN_AMT_m1,MRCH_DRCTM_TXN_CNT_m1,MRCH_DRCTM_TXN_AMT_m1,MRCH_ENT_TXN_CNT_m1,MRCH_ENT_TXN_AMT_m1,MRCH_FOOD_TXN_CNT_m1,MRCH_FOOD_TXN_AMT_m1,MRCH_HTL_TXN_CNT_m1,MRCH_HTL_TXN_AMT_m1,MRCH_LGS_TXN_CNT_m1,MRCH_LGS_TXN_AMT_m1,MRCH_MED_TXN_CNT_m1,MRCH_MED_TXN_AMT_m1,MRCH_MOT_TXN_CNT_m1,MRCH_MOT_TXN_AMT_m1,MRCH_MSCS_TXN_CNT_m1,MRCH_MSCS_TXN_AMT_m1,MRCH_PET_TXN_CNT_m1,MRCH_PET_TXN_AMT_m1,MRCH_RNTC_TXN_CNT_m1,MRCH_RNTC_TXN_AMT_m1,MRCH_SRV_TXN_CNT_m1,MRCH_SRV_TXN_AMT_m1,MRCH_TRV_TXN_CNT_m1,MRCH_TRV_TXN_AMT_m1,RUBR_CD_TXN_CNT_m1,RUBR_CD_TXN_AMT_m1,RUBR_CI_TXN_CNT_m1,RUBR_CI_TXN_AMT_m1,RUBR_CU_TXN_CNT_m1,RUBR_CU_TXN_AMT_m1,RUBR_PD_TXN_CNT_m1,RUBR_PD_TXN_AMT_m1,RUBR_PI_TXN_CNT_m1,RUBR_PI_TXN_AMT_m1,RUBR_PU_TXN_CNT_m1,RUBR_PU_TXN_AMT_m1,CH_CALLC_TXN_CNT_m1,CH_CALLC_TXN_AMT_m1,CH_ATM_TXN_CNT_m1,CH_ATM_TXN_AMT_m1,CH_INT_TXN_CNT_m1,CH_INT_TXN_AMT_m1,CH_SMS_TXN_CNT_m1,CH_SMS_TXN_AMT_m1,CH_BRN_TXN_CNT_m1,CH_BRN_TXN_AMT_m1,CH_DIRM_TXN_CNT_m1,CH_DIRM_TXN_AMT_m1,CH_POS_TXN_CNT_m1,CH_POS_TXN_AMT_m1,CC_TOT_TXN_CNT_m1,CC_TOT_TXN_AMT_m1,CL_TOT_TXN_CNT_m1,CL_TOT_TXN_AMT_m1,LEN_TOT_TXN_CNT_m1,LEN_TOT_TXN_AMT_m1,LM_TOT_TXN_CNT_m1,LM_TOT_TXN_AMT_m1,SA_TOT_TXN_CNT_m1,SA_TOT_TXN_AMT_m1,SLA_TOT_TXN_CNT_m1,SLA_TOT_TXN_AMT_m1,TD_TOT_TXN_CNT_m1,TD_TOT_TXN_AMT_m1,TOT_DB_TXN_CNT_m1,TOT_DB_TXN_AMT_m1,TOT_CR_TXN_CNT_m1,TOT_CR_TXN_AMT_m1,FIRST_OPEN_PRODUCT,FIRST_CLOSED_PRODUCT_6MTH,LAST_OPEN_PRODUCT,LAST_CLOSED_PRODUCT,ACCT_OPENED_CNT,ACCT_CLOSED_CNT,EARLY_REPAYMENT_LTD_FLG,ACCT_DEP_LTD_CNT,ACCT_CRE_LTD_CNT,CCREV_FLG_EVER,CCREV_FLG,CCWEB_FLG_EVER,CCWEB_FLG,DC_FLG_EVER,DC_FLG,OD_FLG_EVER,OD_FLG,ROL_FLG_EVER,SLA_FLG_EVER,SLA_FLG,CC_ACCT_OPENED_CNT,CC_ACCT_CLOSED_CNT,CC_MAX_TENURE_MTH,CC_MIN_TENURE_MTH,CC_MTH_SINCE_LAST_OPEN,CC_FLG_EVER,CC_FLG_m1,CC_ACCT_CNT_m1,CC_ACCT_OPEN_CNT_m1,CC_ACCT_CLOSE_CNT_m1,CL_ACCT_OPENED_CNT,CL_ACCT_CLOSED_CNT,CL_MATURITY_MTH,CL_MAX_TENURE_MTH,CL_MIN_TENURE_MTH,CL_MTH_SINCE_LAST_OPEN,CL_FLG_EVER,CL_FLG_m1,CL_ACCT_CNT_m1,CL_ACCT_OPEN_CNT_m1,CL_ACCT_CLOSE_CNT_m1,LEN_ACCT_OPENED_CNT,LEN_ACCT_CLOSED_CNT,LEN_MATURITY_MTH,LEN_MAX_TENURE_MTH,LEN_MIN_TENURE_MTH,LEN_MTH_SINCE_LAST_OPEN,LEN_FLG_EVER,LEN_FLG_m1,LEN_ACCT_CNT_m1,LEN_ACCT_OPEN_CNT_m1,LEN_ACCT_CLOSE_CNT_m1,LM_ACCT_OPENED_CNT,LM_ACCT_CLOSED_CNT,LM_MATURITY_MTH,LM_MAX_TENURE_MTH,LM_MIN_TENURE_MTH,LM_MTH_SINCE_LAST_OPEN,LM_FLG_EVER,LM_FLG_m1,LM_ACCT_CNT_m1,LM_ACCT_OPEN_CNT_m1,LM_ACCT_CLOSE_CNT_m1,SA_ACCT_OPENED_CNT,SA_ACCT_CLOSED_CNT,SA_MAX_TENURE_MTH,SA_MIN_TENURE_MTH,SA_MTH_SINCE_LAST_OPEN,SA_FLG_EVER,SA_FLG_m1,SA_ACCT_CNT_m1,SA_ACCT_OPEN_CNT_m1,SA_ACCT_CLOSE_CNT_m1,SLA_ACCT_OPENED_CNT,SLA_ACCT_CLOSED_CNT,SLA_MAX_TENURE_MTH,SLA_MIN_TENURE_MTH,SLA_MTH_SINCE_LAST_OPEN,SLA_FLG_m1,SLA_ACCT_CNT_m1,SLA_ACCT_OPEN_CNT_m1,SLA_ACCT_CLOSE_CNT_m1,TD_ACCT_OPENED_CNT,TD_ACCT_CLOSED_CNT,TD_MATURITY_MTH,TD_MAX_TENURE_MTH,TD_MIN_TENURE_MTH,TD_MTH_SINCE_LAST_OPEN,TD_FLG_EVER,TD_FLG_m1,TD_ACCT_CNT_m1,TD_ACCT_OPEN_CNT_m1,TD_ACCT_CLOSE_CNT_m1,PCK_ACCT_OPENED_CNT,PCK_ACCT_CLOSED_CNT,PCK_MAX_TENURE_MTH,PCK_MIN_TENURE_MTH,PCK_MTH_SINCE_LAST_OPEN,PCK_FLG_EVER,PCK_FLG_m1,PCK_ACCT_CNT_m1,PCK_ACCT_OPEN_CNT_m1,PCK_ACCT_CLOSE_CNT_m1,ACCT_CNT_m1,PD_MIX_EVER,OD_MATURITY,PROD_CNT_m1,DAYS_SINCE_LAST_ACCOUNT_OPEN,DAYS_SINCE_LAST_ACCOUNT_CLOSED,DAYS_SINCE_FIRST_ACCOUNT_OPEN,MATURITY_MTH,TENURE_MTH,MTH_SINCE_LAST_OPEN,CITY,MOBILE_FLG,EMAIL_FLG,IS_SEGMENT_CD,BRANCH_CD,REGION_CD,GEO_REGION_CD,AGE_AT_ANALYSIS_MTH_CNT,GENDER_CD,DC_MIX,CC_MIX,ROL_flg_m1,MOB_ACTIVE,BizSeg,CC_LIMIT_m1,month,month_od,month_do,mBanking_logs_m1,eBanking_logs_m1,MARITAL_STATUS,Education_level,RESIDENCE_STATUS,EMPLOYMENT_TYPE,LM_MATURITY_MTH_MISSING,LM_MTH_SINCE_LAST_OPEN_MISSING,TD_MATURITY_MTH_MISSING,TD_MTH_SINCE_LAST_OPEN_MISSING,OD_MATURITY_MISSING,LEN_MTH_SINCE_LAST_OPEN_MISSING,LEN_MATURITY_MTH_MISSING,CL_MTH_SINCE_LAST_OPEN_MISSING,CL_MATURITY_MTH_MISSING,DAYS_SINCE_LAST_ACCOUNT_CLOSED_MISSING,MATURITY_MTH_MISSING,BLACK_LIST_LEVEL_m1_MISSING,PCK_MTH_SINCE_LAST_OPEN_MISSING,SA_MTH_SINCE_LAST_OPEN_MISSING,CC_MIX_MISSING,DC_MIX_MISSING,CC_MTH_SINCE_LAST_OPEN_MISSING,SLA_MTH_SINCE_LAST_OPEN_MISSING,MARITAL_STATUS_MISSING,REGION_CD_MISSING,MOB_ACTIVE_MISSING,GENDER_CD_MISSING,AGE_AT_ANALYSIS_MTH_CNT_MISSING
0,0,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,1,12266.06,2,24532.12,-196248.36,196248.3603,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12281.125686,0.0,6,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,3,36798.18,0,0,0,0.0,0,0.0,0,0.0,3,36798.18,0,0.0,0,0.0,0,0.0,0,0.0,1,12266.06,2,24532.12,LEN,XXX,LEN,XXX,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,1,0,16.0,4,4,4.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,116,-1.0,116,16.0,4,4,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,62.0,F,Unknown,Unknown,0,4.0,Webovci,0,201506,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
1,1,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,1,12544.54,2,25089.08,-37609.29,37204.9844,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12536.01999,0.0,4,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,3,37633.62,0,0,0,0.0,0,0.0,0,0.0,3,37633.62,0,0.0,0,0.0,0,0.0,0,0.0,1,12544.54,2,25089.08,LEN,XXX,LEN,XXX,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,3.0,17,17,17.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,513,-1.0,513,3.0,17,17,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,63.0,F,Unknown,Unknown,0,17.0,Webovci,0,201607,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
2,2,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,1,12269.68,2,24539.36,-208443.62,208443.6137,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,3,36809.04,0,0,0,0.0,0,0.0,0,0.0,3,36809.04,0,0.0,0,0.0,0,0.0,0,0.0,1,12269.68,2,24539.36,LEN,XXX,LEN,XXX,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,1,0,17.0,3,3,3.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,86,-1.0,86,17.0,3,3,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,62.0,F,Unknown,Unknown,0,3.0,Webovci,0,201505,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
3,3,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,2,12242.45,4,24484.9,-135635.25,135638.8889,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12327.060486,0.0,3,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,6,36727.35,0,0,0,0.0,0,0.0,0,0.0,6,36727.35,0,0.0,0,0.0,0,0.0,0,0.0,2,12242.45,4,24484.9,LEN,XXX,LEN,XXX,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,11.0,9,9,9.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,269,-1.0,269,11.0,9,9,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,63.0,F,Unknown,Unknown,0,9.0,Webovci,0,201511,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0
4,4,LEN,0,-1.0,0,0,427759,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,2,12562.65,3,25125.3,-25034.91,24655.4556,1,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,0.0,0,0.0,0.0,0.0,0,0,12529.15524,0.0,9,0.0,0.0,0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0.0,0,0,0,0.0,0,0.0,0,0.0,5,37687.95,0,0,0,0.0,0,0.0,0,0.0,5,37687.95,0,0.0,0,0.0,0,0.0,0,0.0,2,12562.65,3,25125.3,LEN,XXX,LEN,XXX,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,2.0,18,18,18.0,1,1,1,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,0,-1.0,0,0,-1.0,0,0,0,0,0,0,0,0,0,-1.0,0,0,0,0,0,1,LEN,-1.0,1,544,-1.0,544,2.0,18,18,beograd-novi beograd,0,0,INDV,161,Grad,BEOGRAD 1,64.0,F,Unknown,Unknown,0,18.0,Webovci,0,201608,201503,201609,0,0,Married,2. Elementary School,Ownership,Unemployed,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,1,1,1,0,0,0,0,0


In [19]:
df.shape

(1083919, 307)