# Create combined training data

In [1]:
import pandas as pd
import numpy as np
import os
import os.path

In [2]:
DATA_DIR = '/opt/project/data'

In [3]:
#Based on this great kernel https://www.kaggle.com/arjanso/reducing-dataframe-memory-size-by-65
def reduce_mem_usage(df):
    start_mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage of properties dataframe is :",start_mem_usg," MB")
    NAlist = [] # Keeps track of columns that have missing values filled in. 
    for col in df.columns:
        if df[col].dtype != object:  # Exclude strings            
            # Print current column type
            print("******************************")
            print("Column: ",col)
            print("dtype before: ",df[col].dtype)            
            # make variables for Int, max and min
            IsInt = False
            mx = df[col].max()
            mn = df[col].min()            
            # Integer does not support NA, therefore, NA needs to be filled
            if not np.isfinite(df[col]).all(): 
                NAlist.append(col)
                df[col].fillna(mn-1,inplace=True)  
                   
            # test if column can be converted to an integer
            asint = df[col].fillna(0).astype(np.int64)
            result = (df[col] - asint)
            result = result.sum()
            if result > -0.01 and result < 0.01:
                IsInt = True            
            # Make Integer/unsigned Integer datatypes
            if IsInt:
                if mn >= 0:
                    if mx < 255:
                        df[col] = df[col].astype(np.uint8)
                    elif mx < 65535:
                        df[col] = df[col].astype(np.uint16)
                    elif mx < 4294967295:
                        df[col] = df[col].astype(np.uint32)
                    else:
                        df[col] = df[col].astype(np.uint64)
                else:
                    if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)    
            # Make float datatypes 32 bit
            else:
                df[col] = df[col].astype(np.float32)
            
            # Print new column type
            print("dtype after: ",df[col].dtype)
            print("******************************")
    # Print final result
    print("___MEMORY USAGE AFTER COMPLETION:___")
    mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage is: ",mem_usg," MB")
    print("This is ",100*mem_usg/start_mem_usg,"% of the initial size")
    return df, NAlist

In [4]:
train_id = pd.read_csv(os.path.join(DATA_DIR,'raw','train_identity.csv'))
print(train_id.shape)
train_id.head()

(144233, 41)


Unnamed: 0,TransactionID,id_01,id_02,id_03,id_04,id_05,id_06,id_07,id_08,id_09,...,id_31,id_32,id_33,id_34,id_35,id_36,id_37,id_38,DeviceType,DeviceInfo
0,2987004,0.0,70787.0,,,,,,,,...,samsung browser 6.2,32.0,2220x1080,match_status:2,T,F,T,T,mobile,SAMSUNG SM-G892A Build/NRD90M
1,2987008,-5.0,98945.0,,,0.0,-5.0,,,,...,mobile safari 11.0,32.0,1334x750,match_status:1,T,F,F,T,mobile,iOS Device
2,2987010,-5.0,191631.0,0.0,0.0,0.0,0.0,,,0.0,...,chrome 62.0,,,,F,F,T,T,desktop,Windows
3,2987011,-5.0,221832.0,,,0.0,-6.0,,,,...,chrome 62.0,,,,F,F,T,T,desktop,
4,2987016,0.0,7460.0,0.0,0.0,1.0,0.0,,,0.0,...,chrome 62.0,24.0,1280x800,match_status:2,T,F,T,T,desktop,MacOS


In [5]:
train_id, _  = reduce_mem_usage(train_id)
print(train_id.shape)
train_id.head()

Memory usage of properties dataframe is : 45.116905212402344  MB
******************************
Column:  TransactionID
dtype before:  int64
dtype after:  uint32
******************************
******************************
Column:  id_01
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_02
dtype before:  float64
dtype after:  uint32
******************************
******************************
Column:  id_03
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_04
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_05
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_06
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_07
dtype before:  float64
dtype after:  int8
****

Unnamed: 0,TransactionID,id_01,id_02,id_03,id_04,id_05,id_06,id_07,id_08,id_09,...,id_31,id_32,id_33,id_34,id_35,id_36,id_37,id_38,DeviceType,DeviceInfo
0,2987004,0,70787,-14,-29,-73,-101,-47,-101,-37,...,samsung browser 6.2,32,2220x1080,match_status:2,T,F,T,T,mobile,SAMSUNG SM-G892A Build/NRD90M
1,2987008,-5,98945,-14,-29,0,-5,-47,-101,-37,...,mobile safari 11.0,32,1334x750,match_status:1,T,F,F,T,mobile,iOS Device
2,2987010,-5,191631,0,0,0,0,-47,-101,0,...,chrome 62.0,255,,,F,F,T,T,desktop,Windows
3,2987011,-5,221832,-14,-29,0,-6,-47,-101,-37,...,chrome 62.0,255,,,F,F,T,T,desktop,
4,2987016,0,7460,0,0,1,0,-47,-101,0,...,chrome 62.0,24,1280x800,match_status:2,T,F,T,T,desktop,MacOS


In [6]:
train_id.columns

Index(['TransactionID', 'id_01', 'id_02', 'id_03', 'id_04', 'id_05', 'id_06',
       'id_07', 'id_08', 'id_09', 'id_10', 'id_11', 'id_12', 'id_13', 'id_14',
       'id_15', 'id_16', 'id_17', 'id_18', 'id_19', 'id_20', 'id_21', 'id_22',
       'id_23', 'id_24', 'id_25', 'id_26', 'id_27', 'id_28', 'id_29', 'id_30',
       'id_31', 'id_32', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38',
       'DeviceType', 'DeviceInfo'],
      dtype='object')

In [7]:
train_data = pd.read_csv(os.path.join(DATA_DIR,'raw','train_transaction.csv'))
train_data, _ = reduce_mem_usage(train_data)
print(train_data.shape)
train_data.head()

Memory usage of properties dataframe is : 1775.152359008789  MB
******************************
Column:  TransactionID
dtype before:  int64
dtype after:  uint32
******************************
******************************
Column:  isFraud
dtype before:  int64
dtype after:  uint8
******************************
******************************
Column:  TransactionDT
dtype before:  int64
dtype after:  uint32
******************************
******************************
Column:  TransactionAmt
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  card1
dtype before:  int64
dtype after:  uint16
******************************
******************************
Column:  card2
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  card3
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  card5
dtype before:  float64
dt

dtype after:  uint8
******************************
******************************
Column:  V29
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V30
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V31
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V32
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V33
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V34
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V35
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V36
dtype before:  float64
dtype after:  uint8
******************************
*****

dtype after:  uint8
******************************
******************************
Column:  V99
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V100
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V101
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  V102
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  V103
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  V104
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V105
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V106
dtype before:  float64
dtype after:  uint8
**************************

dtype after:  uint16
******************************
******************************
Column:  V168
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  V169
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V170
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V171
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V172
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V173
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V174
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V175
dtype before:  float64
dtype after:  uint8
**************************

dtype after:  uint8
******************************
******************************
Column:  V239
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V240
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V241
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V242
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V243
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V244
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V245
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  V246
dtype before:  float64
dtype after:  uint8
***************************

dtype after:  float32
******************************
******************************
Column:  V311
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V312
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V313
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V314
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V315
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V316
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V317
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V318
dtype before:  float64
dtype after:  float32
**********

Unnamed: 0,TransactionID,isFraud,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,...,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
0,2987000,0,86400,68.5,W,13926,99,150,discover,142,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,2987001,0,86401,29.0,W,2755,404,150,mastercard,102,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,2987002,0,86469,59.0,W,4663,490,150,visa,166,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,2987003,0,86499,50.0,W,18132,567,150,mastercard,117,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,2987004,0,86506,50.0,H,4497,514,150,mastercard,102,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
train_data.columns

Index(['TransactionID', 'isFraud', 'TransactionDT', 'TransactionAmt',
       'ProductCD', 'card1', 'card2', 'card3', 'card4', 'card5',
       ...
       'V330', 'V331', 'V332', 'V333', 'V334', 'V335', 'V336', 'V337', 'V338',
       'V339'],
      dtype='object', length=394)

In [9]:
train_data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TransactionID,590540.0,3.282270e+06,1.704744e+05,2987000.000,3.134635e+06,3.282270e+06,3.429904e+06,3.577539e+06
isFraud,590540.0,3.499001e-02,1.837546e-01,0.000,0.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00
TransactionDT,590540.0,7.372311e+06,4.617224e+06,86400.000,3.027058e+06,7.306528e+06,1.124662e+07,1.581113e+07
TransactionAmt,590540.0,1.348496e+02,2.391627e+02,0.251,4.332100e+01,6.876900e+01,1.250000e+02,3.193739e+04
card1,590540.0,9.898735e+03,4.901170e+03,1000.000,6.019000e+03,9.678000e+03,1.418400e+04,1.839600e+04
card2,590540.0,3.585687e+02,1.598653e+02,99.000,2.040000e+02,3.600000e+02,5.120000e+02,6.000000e+02
card3,590540.0,1.530513e+02,1.165922e+01,99.000,1.500000e+02,1.500000e+02,1.500000e+02,2.310000e+02
card5,590540.0,1.985557e+02,4.196233e+01,99.000,1.660000e+02,2.260000e+02,2.260000e+02,2.370000e+02
addr1,590540.0,2.694007e+02,1.132904e+02,99.000,1.840000e+02,2.720000e+02,3.270000e+02,5.400000e+02
addr2,590540.0,7.814420e+01,2.459628e+01,9.000,8.700000e+01,8.700000e+01,8.700000e+01,1.020000e+02


In [10]:
train_raw= pd.merge(train_data,train_id, how='left', on='TransactionID')
train_raw.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TransactionID,590540.0,3.282270e+06,1.704744e+05,2987000.000,3.134635e+06,3.282270e+06,3429904.25,3.577539e+06
isFraud,590540.0,3.499001e-02,1.837546e-01,0.000,0.000000e+00,0.000000e+00,0.00,1.000000e+00
TransactionDT,590540.0,7.372311e+06,4.617224e+06,86400.000,3.027058e+06,7.306528e+06,11246620.00,1.581113e+07
TransactionAmt,590540.0,1.348496e+02,2.391627e+02,0.251,4.332100e+01,6.876900e+01,125.00,3.193739e+04
card1,590540.0,9.898735e+03,4.901170e+03,1000.000,6.019000e+03,9.678000e+03,14184.00,1.839600e+04
card2,590540.0,3.585687e+02,1.598653e+02,99.000,2.040000e+02,3.600000e+02,512.00,6.000000e+02
card3,590540.0,1.530513e+02,1.165922e+01,99.000,1.500000e+02,1.500000e+02,150.00,2.310000e+02
card5,590540.0,1.985557e+02,4.196233e+01,99.000,1.660000e+02,2.260000e+02,226.00,2.370000e+02
addr1,590540.0,2.694007e+02,1.132904e+02,99.000,1.840000e+02,2.720000e+02,327.00,5.400000e+02
addr2,590540.0,7.814420e+01,2.459628e+01,9.000,8.700000e+01,8.700000e+01,87.00,1.020000e+02


In [11]:
train_raw.to_pickle(os.path.join(DATA_DIR,'interim','train_reduced.pkl'))

In [12]:
test_id = pd.read_csv(os.path.join(DATA_DIR,'raw','test_identity.csv'))
test_id, _ = reduce_mem_usage(test_id)
print(test_id.shape)
test_id.head()

Memory usage of properties dataframe is : 44.389320373535156  MB
******************************
Column:  TransactionID
dtype before:  int64
dtype after:  uint32
******************************
******************************
Column:  id_01
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_02
dtype before:  float64
dtype after:  uint32
******************************
******************************
Column:  id_03
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_04
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_05
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_06
dtype before:  float64
dtype after:  int8
******************************
******************************
Column:  id_07
dtype before:  float64
dtype after:  int8
****

Unnamed: 0,TransactionID,id_01,id_02,id_03,id_04,id_05,id_06,id_07,id_08,id_09,...,id_31,id_32,id_33,id_34,id_35,id_36,id_37,id_38,DeviceType,DeviceInfo
0,3663586,-45,280290,-13,-20,0,0,-42,-101,-33,...,chrome 67.0 for android,7,,,F,F,T,F,mobile,MYA-L13 Build/HUAWEIMYA-L13
1,3663588,0,3579,0,0,0,0,-42,-101,0,...,chrome 67.0 for android,24,1280x720,match_status:2,T,F,T,T,mobile,LGLS676 Build/MXB48T
2,3663597,-5,185210,-13,-20,1,0,-42,-101,-33,...,ie 11.0 for tablet,7,,,F,T,T,F,desktop,Trident/7.0
3,3663601,-45,252944,0,0,0,0,-42,-101,0,...,chrome 67.0 for android,7,,,F,F,T,F,mobile,MYA-L13 Build/HUAWEIMYA-L13
4,3663602,-95,328680,-13,-20,7,-33,-42,-101,-33,...,chrome 67.0 for android,7,,,F,F,T,F,mobile,SM-G9650 Build/R16NW


In [13]:
test_data = pd.read_csv(os.path.join(DATA_DIR,'raw','test_transaction.csv'))
test_data, _ = reduce_mem_usage(test_data)
print(test_data.shape)
test_data.head()

Memory usage of properties dataframe is : 1519.2380752563477  MB
******************************
Column:  TransactionID
dtype before:  int64
dtype after:  uint32
******************************
******************************
Column:  TransactionDT
dtype before:  int64
dtype after:  uint32
******************************
******************************
Column:  TransactionAmt
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  card1
dtype before:  int64
dtype after:  uint16
******************************
******************************
Column:  card2
dtype before:  float64
dtype after:  uint16
******************************
******************************
Column:  card3
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  card5
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  addr1
dtype before:  float64
d

dtype after:  uint8
******************************
******************************
Column:  V30
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V31
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V32
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V33
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V34
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V35
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V36
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V37
dtype before:  float64
dtype after:  uint8
******************************
*****

dtype after:  uint8
******************************
******************************
Column:  V100
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V101
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V102
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V103
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V104
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V105
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V106
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V107
dtype before:  float64
dtype after:  uint8
****************************

dtype after:  uint16
******************************
******************************
Column:  V169
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V170
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V171
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V172
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V173
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V174
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V175
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V176
dtype before:  float64
dtype after:  uint8
***************************

dtype after:  uint8
******************************
******************************
Column:  V239
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V240
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V241
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V242
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V243
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V244
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V245
dtype before:  float64
dtype after:  uint8
******************************
******************************
Column:  V246
dtype before:  float64
dtype after:  uint8
****************************

dtype after:  float32
******************************
******************************
Column:  V311
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V312
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V313
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V314
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V315
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V316
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V317
dtype before:  float64
dtype after:  float32
******************************
******************************
Column:  V318
dtype before:  float64
dtype after:  float32
**********

Unnamed: 0,TransactionID,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,...,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
0,3663549,18403224,31.950001,W,10409,111,150,visa,226,debit,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,3663550,18403263,49.0,W,4272,111,150,visa,226,debit,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,3663551,18403310,171.0,W,4476,574,150,visa,226,debit,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,3663552,18403310,284.950012,W,10989,360,150,visa,166,debit,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,3663553,18403317,67.949997,W,18018,452,150,mastercard,117,debit,...,255,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [14]:
test_raw= pd.merge(test_data,test_id, how='left', on='TransactionID')
test_raw.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
TransactionID,506691.0,3.916894e+06,1.462692e+05,3.663549e+06,3790221.5,3.916894e+06,4043566.5,4170239.0
TransactionDT,506691.0,2.692994e+07,4.756507e+06,1.840322e+07,22771540.5,2.720466e+07,31348560.5,34214345.0
TransactionAmt,506691.0,1.346060e+02,2.457908e+02,1.800000e-02,40.0,6.795000e+01,125.0,10270.0
card1,506691.0,9.957222e+03,4.884961e+03,1.001000e+03,6019.0,9.803000e+03,14276.0,18397.0
card2,506691.0,3.592138e+02,1.610235e+02,9.900000e+01,203.0,3.610000e+02,512.0,600.0
card3,506691.0,1.532203e+02,1.309323e+01,9.900000e+01,150.0,1.500000e+02,150.0,232.0
card5,506691.0,1.992551e+02,4.149172e+01,9.900000e+01,166.0,2.260000e+02,226.0,237.0
addr1,506691.0,2.668757e+02,1.151520e+02,9.900000e+01,184.0,2.690000e+02,327.0,540.0
addr2,506691.0,7.665938e+01,2.624302e+01,9.000000e+00,87.0,8.700000e+01,87.0,102.0
dist1,506691.0,3.770279e+04,3.235696e+04,0.000000e+00,10.0,6.553500e+04,65535.0,65535.0


In [15]:
test_raw.to_pickle(os.path.join(DATA_DIR,'interim','test_reduced.pkl'))