In [1]:
import pandas as pd
import os
import chardet
from darwinutils.log import get_task_logger
from darwinutils.mapreduce import parallel_starmap_p
import numpy as np
import time

logger = get_task_logger(__name__)

global_df_lst = []

class CSV2DF(object):
    def __init__(self, max_byte_num_for_coding_detect=100*1024, max_thread_num=None, csv_max_read_lines = 50000):
        if max_thread_num is None:
            self._max_thread_num = max(os.cpu_count() - 4, 3)
        else:
            self._max_thread_num = max_thread_num
        self._max_byte_num_for_coding_detect = max_byte_num_for_coding_detect
        self._csv_max_read_lines = csv_max_read_lines

    def detect_coder(self, file_path):
        with open(file_path, 'rb') as f:
            if os.stat(file_path).st_size <= self._max_byte_num_for_coding_detect:
                detector = chardet.detect(f.read())
            else:
                detector = chardet.detect(f.read(self._max_byte_num_for_coding_detect))
        """There are some issues while using gb2312 so change to gb18030"""
        if 'gb2312' == detector['encoding'].lower():
            detector['encoding'] = 'gb18030'
        logger.debug("Detected Coder info is {}".format(detector))
        return detector['encoding']

    def read_csv_file(self, file_path, encoding, skiprows=None, read_rows=None, usecols=None):
        func = pd.read_csv
        try:
            file_df = func(file_path, encoding=encoding, skiprows=skiprows, nrows=read_rows,low_memory=False, usecols=usecols)
        except Exception as e:
            """Normally, it caused by out of range for skiprows"""
            print("{} - {} may cause out of range for file {}. Reason({})".format(
                skiprows, read_rows, os.path.basename(file_path), str(e)))
            file_df = None
        return file_df

    def map_column_name_idx(self, column_name_lst, map_column_name_lst):
        map_column_idx = []
        for column_name in map_column_name_lst:
            map_column_idx.append(column_name_lst.index(column_name))
        map_column_idx.sort()
        return map_column_idx
            
    def read_content(self, file_path, usecols=None, encoding=None):
        if not os.path.exists(file_path):
            logger.error("{} does not exist".format(file_path))
            return None
        if encoding is None:
            coder = self.detect_coder(file_path)
        else:
            coder = encoding
        skiprows = 0
        """Get Header"""
        header = self.read_csv_file(file_path, encoding=coder, skiprows=skiprows, read_rows=2)
        columns_name = header.columns.tolist()
        all_df = []
        print("Start Read: Coder:{}".format(coder))
        loop_num = 0
        if usecols is not None:
            if len(set(usecols).intersection(set(columns_name))) != len(usecols):
                print("Error: Wrong usecols setting: {} not in list".format(set(usecols).difference(set(usecols).intersection(set(columns_name)))))
                return None         
            tmp_cols = usecols
            usecols = self.map_column_name_idx(columns_name, usecols)    
            columns_name = list(map(lambda s:columns_name[s], usecols))

        while True:
            param_lst = []
            print("Start Batch Read")
            for cnt in range(self._max_thread_num):
                param_lst.append((file_path, coder, skiprows, self._csv_max_read_lines, usecols))
                skiprows += self._csv_max_read_lines
            file_df_lst = parallel_starmap_p(self.read_csv_file, param_lst)
            file_df_lst = list(file_df_lst)
            print("Batch Read Done")
            if file_df_lst[-1] is None:
                """Read complete"""
                all_df.extend(list(filter(lambda s: s is not None, file_df_lst)))
                break
            else:
                all_df.extend(file_df_lst)
                if file_df_lst[-1].shape[0] != self._csv_max_read_lines:
                    """Read complete"""
                    break
            loop_num += 1
        print("Merge {} pieces of DF together. Total Loop: {}".format(len(all_df), loop_num))
        if len(all_df) > 1:
            for df in all_df:
                df.columns = columns_name
        return all_df

In [2]:
del_column_names = ['FM_MERC_CD', 'FM_TRM_NO', 'RET_RE_NO', 'TM_SMP', 'ORSC_MNO', 'CORG_TRAN_CD', 'UUID.1',
                    'TRANSACTION_ID', 'CREATE_USR', 'UPDATE_USR', 'UUID.2', 'USR_ID', 'IN_MNO.2', 'MNO.1',
                    'MEC_DIS_NM', 'BUS_OVERVIEW', 'CPR_REG_NM_CN', 'CPR_OPER_NM_CN', 'CPR_REG_NM_EN', 'CPR_OPER_NM_EN', 
                    'BUS_OVERVIEW_BEN', 'KEY_WORDS', 'MEC_TYP_DETAIL', 'UUID.3', 'USR_ID.1', 'CPR_REG_ADDR',
                    'BUS_ADDR', 'OPER_SCOPE', 'CONT_NM_CN', 'CPR_REG_CAPITAL',  'POS_INSTALL_ADDR', 'SIGN_END_DT',
                    'SALES_MAN_LABEL', 'USR_ID.2',"CORG_RP_MSG","REAL_CORG_NO","IN_MNO.1","PAY_TYPE.1","UUID.4","PAY_CHANNELS",
                    "CONSUMER_ID","PASSWORD_FLAG","CARDHOLDER_NAME", "CORG_RP_CD", "IDC", "BANK_ORDER_NO_EXTEND", "POS_SEQ_NO",
                    'IN_MNO.3', 'BIN_ID']
same_value_columns = ['ISS_ORG_NO', 'IS_XW', 'KJ_PAYT_YPE', 'ARG_PAY_DT', 'IS_TEYOU', 'IS_OA_FREEZE', 'TRAD_MNO', 'OP_UTE', 
                      'LBNK_NO', 'RP_SUB_CODE', 'SET_ONO', 'ROUTE_FLG', 'DISCOUNT_AMT', 'VAS_FEE_AMT', 'PAYEE_BRANCH_NO', 
                      'COUPON_CODE', 'MOBILE_OPER_TYP1', 'UPDATE_TIME', 'PRIOR_PAY_FLG', 'PAY_FEE_AMT', 'ACT_TYP', 'ATV_FLG', 
                      'KJ_ORD_NO', 'WEB_SERVE_COST_AMOUNT', 'AUTO_PAY_FLG', 'USD', 'SET_MOD', 'END_DT', 'ISREF', 'PROMOT_NO',
                      'OFFST_AMT', 'SET_AMT', 'AUD_BNO', 'TM_CTE.2', 'REF_REASON', 'AGENT_ORG_NO.1', 'COUNTRY', 'AUT_PAY_MOD', 
                      'MOBILE_SOURCE', 'VAS_FEE_RATE', 'REALITY', 'AUD_STS', 'DT_CTE.2', 'DT_UTE.2', 'LBNK_NM', 'PAY_ACC_TYP', 
                      'PAY_TYP', 'PAY_MARK', 'STR_TM', 'TRAN_DT.2', 'AUD_RMK', 'BANK_PAY_PURPOSE', 'TOL_AMT', 'CLR_DT.1', 
                      'ERROR_CODE', 'BANK_ORDER_NO', 'MOBILE_OPER_TYP', 'SET_RMK', 'BUS_KIND', 'CLR_BNO', 'TRAN_TM.1', 
                      'ACC_FLG', 'ORD_NO.1', 'STR_DT', 'ACT_NO_ENC', 'PAY_REASON', 'CHANNEL_CD', 'FRZ_NO', 'CHANNEL_DETAIL_CD', 
                      'PROD_TYP', 'OP_CTE', 'TCD', 'MEC_SS_AUTH_STS', 'T0_TYP', 'SET_FEE_AMT', 'SET_STS', 'PRIOR_PAY_LEV', 
                      'BNK_CD', 'LBNK_CITY', 'MEC_CONNENT_TYPE', 'TM_UTE.2', 'ACT_NM_ENC', 'LBNK_PROV', 'PROTOCOL_FLAG', 
                      'IN_MNO.4', 'PAY_DT', 'MEC_DEGREE', 'PAY_ONO', 'PAY_TM', 'UGT_PAY', 'WK_PAY', 'PAY_BAT_TM', 'FLAG_EMP_NO', 
                      'LEG_CRD_LEFF_FLG', 'ATV_FLG.1', 'END_TM', 'FINISH_TM.1', 'CREATE_TIME', 'FINISH_DT.1']
del_column_names += same_value_columns
tmp_head = pd.read_csv("/workspace/suixingfu/csv201803.csv", nrows=3, encoding='gb18030')
all_column_names = tmp_head.columns.tolist()
useful_column_names = all_column_names
for del_name in del_column_names:
    idx = useful_column_names.index(del_name)
    del useful_column_names[idx]
print("Useful Column Number: {}".format(len(useful_column_names)))

Useful Column Number: 134


In [3]:
test_read_csv = CSV2DF()
#global_df_lst = test_read_csv.read_content("/home/sysongyu/workspace/mytoolkits/python/data_clean/t2/t2.csv")
#global_df_lst = test_read_csv.read_content("/workspace/suixingfu/csv201711.csv", usecols=useful_column_names)
global_df_lst = test_read_csv.read_content("/workspace/suixingfu/csv201803.csv", usecols=useful_column_names, encoding='gb18030')
print(set(useful_column_names).difference(set(global_df_lst[0].columns.tolist())))
print(set(global_df_lst[0].columns.tolist()).difference(set(useful_column_names)))

Start Read: Coder:gb18030
Start Batch Read
Batch Read Done
Start Batch Read
Batch Read Done
Start Batch Read
3850000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
3650000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
4150000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
3400000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
3700000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
3350000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
3750000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
3600000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to parse from file)
2950000 - 50000 may cause out of range for file csv201803.csv. Reason(No columns to

In [4]:
with open("suixingfu_uuid_2018.csv", "w") as f:
    for idx, df in enumerate(global_df_lst):
        if idx == 0:
            df["UUID"].to_csv(f, mode="w", index=False, header=True)
        else:
            df["UUID"].to_csv(f, mode="a", index=False, header=False)

In [5]:
from darwinutils.mapreduce import parallel_starmap_t
import collections
import datetime

class DF_CLEAN:
    def __init__(self, head_flag=True):
        if len(global_df_lst) > 0:
            if head_flag:            
                self._columns = global_df_lst[0].columns.tolist()            
            else:
                slef._columns = ["c%04d" % x for x in range(len(global_df_lst[0].columns))]
                for df in global_df_lst:
                    df.columns = self._columns
            self._columns_dtype_dict = global_df_lst[0].columns.to_series().groupby(global_df_lst[0].dtypes).groups
        else:
            self._columns = None
        self._working_columns = []  # seems pool thread does not support a very long parameter list, have to use this method
        self._missing_value_columns = []
        self._factor_unknown = "unknown"
        self._factor_map_dict = collections.defaultdict(dict)
         
    def _get_missing_value_column_name_lst(self, df_idx):
        missing_value_column_name = []
        #print("Processing {} DF".format(df_idx))
        for name in self._working_columns:
            if global_df_lst[df_idx][name].isnull().any():
                missing_value_column_name.append(name)
        return missing_value_column_name
    
    def _reduce_lst(self, columns_lst, axis=0, method='union'):
        # axis 0 means row dealwith
        # axis 1 means column dealwith
        # method can use union and intersection, difference and so on
        value_c_lst = []
        if axis == 1:
            columns_zip = list(zip(*columns_lst))
            print("Total {} blocks".format(len(columns_zip)))
            for column_block in columns_zip:
                #print("Total {} part per block".format(len(column_block)))
                merged_block = set(column_block[0])
                for merged_part in column_block[1:]:
                    merged_block =eval("set.{}".format(method))(merged_block,set(merged_part)) 
                value_c_lst.append(list(merged_block))
        else:
            last_set = set(columns_lst[0])
            for columns_info in columns_lst[1:]:
                last_set = eval("set.{}".format(method))(last_set,set(columns_info)) 
            value_c_lst = list(last_set)
        return value_c_lst
        
    def check_missing_value_columns(self, column_names=None, df_num=None):
        if column_names is None:
            column_names = self._columns
        if column_names is None:
            print("Error: DF dose not have columns")
            return None
        param_lst = []
        if df_num is None:
            df_num = len(global_df_lst)
        self._working_columns = column_names
        for idx in range(df_num):
            param_lst.append([idx])

        column_name_list = parallel_starmap_p(self._get_missing_value_column_name_lst, param_lst)
        column_name_list = list(column_name_list)
        assert(len(column_name_list)==df_num)     
        self._missing_value_columns = self._reduce_lst(column_name_list, axis=0, method='union')
        return self._missing_value_columns
    
    def _get_same_value_column_name_lst(self, df_idx):
        same_value_column_name = []
        #print("Processing {} DF".format(df_idx))
        for name in self._working_columns:
            #if df_idx == 1:
            #    print("Check same value for column {}".format(name))
            if global_df_lst[df_idx][name].isnull().all():
                #elif global_df_lst[df_idx][name].isna().all():
                same_value_column_name.append(name)
            else:
                tmp_df = global_df_lst[df_idx][name].fillna(self._factor_unknown)
                if len(set(tmp_df.values)) == 1:
                    same_value_column_name.append(name)
            
        #if(df_idx == 1):
        #    print(same_value_column_name)
        return same_value_column_name
        
    def check_same_value_columns(self, column_names=None, df_num=None):
        if column_names is None:
            column_names = self._columns
        if column_names is None:
            print("Error: DF dose not have columns")
            return None
        param_lst = []
        if df_num is None:
            df_num = len(global_df_lst)
        self._working_columns = column_names
        for idx in range(df_num):
            param_lst.append([idx])
        column_name_list = parallel_starmap_p(self._get_same_value_column_name_lst, param_lst)
        column_name_list = list(column_name_list)
        #print(column_name_list)
        assert(len(column_name_list)==df_num)        
        return self._reduce_lst(column_name_list, axis=0, method='intersection')
    
    def _get_factor_values(self, df_idx):
        factor_column_values = []
        #if (df_idx == 0):
        #    print("Calculate Factor of columns: Processing {} DF".format(df_idx))
        #    print(len(self._working_columns))
        for name in self._working_columns:
            tmp_df = global_df_lst[df_idx][name].fillna(self._factor_unknown)
            tmp_lst = list(tmp_df.values)
            tmp_lst.append(self._factor_unknown)
            factor_column_values.append(list(set(tmp_lst)))

        return factor_column_values
    
    def _change_column_type(self, df_idx, column_type):
        #print("Change Column Types: Processing {} DF".format(df_idx))
        for name in self._working_columns:
            global_df_lst[df_idx][name] = global_df_lst[df_idx][name].astype(column_type)
    
    def get_factor_candidate_columns(self, column_names=None,df_num=None):        
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            column_names = self._columns_dtype_dict[np.dtype(object)].values

        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx])
        column_name_list = parallel_starmap_p(self._get_factor_values, param_lst)
        column_name_list = list(column_name_list)

        assert(len(column_name_list)==df_num)  
        print("Reduce List")
        column_info_lst = self._reduce_lst(column_name_list, axis=1, method='union')
        return self._working_columns, column_info_lst
    
    @property
    def columns(self):
        return self._columns
    
    ########################################################
    def _set_bool_type_for_column(self, df_idx):
        for name in self._working_columns:
            #if(df_idx == 0):
            #    print("Column: {} Set to Bool".format(name))
            tmp_df = global_df_lst[df_idx][name].fillna(0)
            values = tmp_df.values.tolist()
            for idx, value in enumerate(values):
                if value != 0:
                    values[idx] = 1
            global_df_lst[df_idx][name] = values
            
            
    def set_column_to_bool(self, column_names=None,df_num=None):  
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            print("column_names is None")
            return None
     
        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx])
        parallel_starmap_t(self._set_bool_type_for_column, param_lst)   
        
    def _set_factor_value_for_column(self, df_idx, value_dict):
        for name in self._working_columns:
            #if(df_idx == 0):
            #    print("Column: {} Set to Factor Value".format(name))
            tmp_df = global_df_lst[df_idx][name].fillna(self._factor_unknown)
            values = tmp_df.values.tolist()
            for idx, value in enumerate(values):
                try:
                    tmp_value = str(int(float(value)))
                    values[idx] = value_dict[name].get(tmp_value)
                except ValueError:
                    values[idx] = value_dict[name].get(value)
                if values[idx] is None:
                    print("Column {} Value {} not defined".format(name, str(value)))
                    values[idx] = value_dict[name][self._factor_unknown]
                    value_dict[name][value] = value_dict[name][self._factor_unknown]
            global_df_lst[df_idx][name] = values
    
    def set_column_to_factor(self, column_names=None,df_num=None, value_dict=None):  
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            print("column_names is None")
            return None
        if value_dict is None:
            print("value_dict is None")
            return None
     
        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx, value_dict])
        parallel_starmap_t(self._set_factor_value_for_column, param_lst)   
        
    def _drop_columns(self, df_idx):
        global_df_lst[df_idx] = global_df_lst[df_idx].drop(self._working_columns, axis=1)
    
    def drop_columns(self, column_names=None,df_num=None):  
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            print("column_names is None")
            return None
     
        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx])
        parallel_starmap_t(self._drop_columns, param_lst)   
        
    def _compare_two_columns(self, df_idx, src_column, target_column):
        rst = np.where(global_df_lst[df_idx][src_column] == global_df_lst[df_idx][target_column], True, False)
        return rst.all()
    
    def compare_two_columns(self, src_column, target_column, df_num=None):
        if df_num is None:
            df_num = len(global_df_lst)
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx, src_column, target_column])
        column_rst_list = parallel_starmap_p(self._compare_two_columns, param_lst)   
        column_rst_list = list(column_rst_list)
        return np.array(column_rst_list).all()
    
    def _transfer_datetime_to_int(self, df_idx, datetime_format, basetime, scale, prefix):
        print_flag = 1
        if prefix is None:
            prefix = ''
        for name in self._working_columns:
            if(df_idx == 0):
                print("Transfer Time on: {}".format(name))
            tmp_df = global_df_lst[df_idx][name].fillna(0)
            values = tmp_df.values.tolist()
            for idx, value in enumerate(values):
                if value == '0':
                    values[idx] = 0
                    continue
                if value != 0:
                    # Remove in future
                    try:
                        tmp_value = str(int(value))
                    except ValueError:
                        tmp_value = str(value)
                    if tmp_value == '' or tmp_value.find(' ')==0:
                        values[idx] = 0
                        continue
                    tmp_value=prefix+tmp_value  

                    try:
                        tmp_dateitme = datetime.datetime.strptime(tmp_value, datetime_format)
                    except Exception as e:
                        print("Error while transfer string to datatime in column {}, line {} value {}. Reason: {}".format(name, idx, value, str(e)))
                        values[idx] = 0
                        continue
                    
                    if scale=="Day":
                        values[idx] = (tmp_dateitme - basetime).days
                    elif scale=="Hour":
                        values[idx] = (tmp_dateitme - basetime).days*24 + int((tmp_dateitme - basetime).seconds/3600)
                    elif scale=="Minute":
                        values[idx] = (tmp_dateitme - basetime).days*24*60 + int((tmp_dateitme - basetime).seconds/60)
                    elif scale=="Second":
                        values[idx] = (tmp_dateitme - basetime).total_seconds
                    else:
                        print("Unsupported scale {}".format(scale))
                        return None
            if(df_idx == 0):
                print(values[:10])
            global_df_lst[df_idx][name] = values
            
        
    def transfer_datetime_to_int(self, column_names=None, datetime_format=None, basetime=None, scale=None, df_num=None, prefix=None):
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            print("column_names is None")
            return None
        if basetime is None or type(basetime).__name__ != 'datetime':
            print("basetime is None or format is not datetime")
            return None
        if datetime_format is None:
            print("datetime_format is None")
            return None
        if scale is None or scale not in ["Day", "Hour", "Minute", "Second"]:
            print("scale is None or value is not right")
            return None
        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx, datetime_format, basetime, scale, prefix])
        parallel_starmap_t(self._transfer_datetime_to_int, param_lst)   
        
    def _update_value_difference_by_columns(self, df_num, refer_column_name, new_column_names=None):
        if new_column_names is None:
            for name in self._working_columns:
                #if(df_idx == 0):
                #    print("Transfer Time on: {} Set to Factor Value".format(name))
                global_df_lst[df_idx][name] = global_df_lst[df_idx][name] - global_df_lst[df_idx][refer_column_name]
        else:
            for idx, name in self._working_columns:
                #if(df_idx == 0):
                #    print("Transfer Time on: {} Set to Factor Value".format(name))
                global_df_lst[df_idx][new_column_names[idx]] = global_df_lst[df_idx][name] - global_df_lst[df_idx][refer_column_name]
        
    def update_value_difference_by_columns(self, refer_column_name, columns_names, new_column_names=None, df_num=None):
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            print("column_names is None")
            return None
        if type(refer_column_name).__name__ != 'str':
            print("refer_column_name type error")
            return None
        if new_column_names is not None and len(new_column_names) != len(columns_names):
            print("new_column_names param is not correct")
            return None
        
        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx, refer_column_name, new_column_names])
        parallel_starmap_t(self._update_value_difference_by_columns, param_lst)   
        
    def _fill_value_to_nan_cell(self, df_idx, value): 
        for name in self._working_columns:
            if df_idx==0:
                print("Filling column {}".format(name))
            global_df_lst[df_idx][name] = global_df_lst[df_idx][name].fillna(value)
    
    def fill_value_to_nan_cell(self, column_names=None, value=0, df_num=None):
        if df_num is None:
            df_num = len(global_df_lst)
        if column_names is None:
            column_names = self._columns
        self._working_columns = column_names
        param_lst = []
        for idx in range(df_num):
            param_lst.append([idx, value])
        parallel_starmap_t(self._fill_value_to_nan_cell, param_lst)   
        
    def fill_fact_map_dict(self, column_names=None, factor_value=None):
        if column_names is None or factor_value is None:
            print("Wrong input parameter")
            return None
        if len(column_names) != len(factor_value):
            print("Column name list length {} not equate to factor value list length {}".format(len(column_names),len(factor_value)))
            return None
        exist_column_dict_name = self._factor_map_dict.keys()
        if len(exist_column_dict_name) == 0:
            new_column_lst = column_names
        else:
            new_column_lst = list(set(column_names).difference(set(exist_column_dict_name)))
        new_column_lst = list(map(lambda s: column_names.index(s), new_column_lst))
        for column_idx in new_column_lst:
            information =  list(factor_value[column_idx])
            tmp_lst = []
            for v in information:
                if v == ' ' or v =='':
                    continue
                try:
                    tmp_lst.append(str(int(float(v))))
                except:
                    tmp_lst.append(str(v))
            information = list(set(tmp_lst))
            del information[information.index(self._factor_unknown)]
            self._factor_map_dict[column_names[column_idx]][self._factor_unknown] = 0
            information.sort()
            for idx, key in enumerate(information):
                self._factor_map_dict[column_names[column_idx]][key] = idx+1
        exit_column_lst = set(column_names).intersection(set(exist_column_dict_name))
        exit_column_lst = list(map(lambda s: column_names.index(s), exit_column_lst))
        for column_idx in exit_column_lst:
            information =  list(factor_value[column_idx])
            tmp_lst = []
            for v in information:
                if v == ' ' or v =='':
                    continue
                try:
                    tmp_lst.append(str(int(float(v))))
                except:
                    tmp_lst.append(str(v))
            information = list(set(tmp_lst))
            information.sort()
            current_num = max(list(self._factor_map_dict[column_names[column_idx]].values())) + 1
            for idx, key in enumerate(information):
                if self._factor_map_dict[column_names[column_idx]].get(key) is None:
                    self._factor_map_dict[column_names[column_idx]][key] = current_num
                    current_num += 1
        return self._factor_map_dict

In [6]:
clean_df = DF_CLEAN()

In [7]:
missing_columns = clean_df.check_missing_value_columns()
print("Missing:\n{}".format(missing_columns))


Missing:
['OUUID', 'TD_MNO', 'DT_UTE', 'ORG_COD', 'DT_UTE.1', 'PAY_TYPE', 'CRD_NO', 'POS_OPR_ID', 'REVERSE_FLAG', 'OTHER_ENC_FLG', 'RP_CD', 'SETTLE_FLAG', 'COUPON_AMT', 'OPER_LIC_EFF_EDT', 'SIGN_MNO', 'BANK_MANAGER', 'LEG_PER_CRD_NO_ENC', 'CO_RUL_NO', 'MCC_CD', 'FINISH_DT', 'BANK_TEAM_WORK_SIGN', 'TRAN_DT.1', 'MEC_FEE_RATE', 'CRD_TYP', 'UTE_STFF_NO', 'QRCODE_MNO_TYPE', 'COOPERATION_MANAGER_ID', 'SES_BAT_NO', 'COOPER_FLAG', 'MANAGER_TEAM_WORK', 'CUSTOM_CLASSIFY', 'TXN_RSV1', 'BNK_TYP', 'IDE_NO', 'FINISH_TM', 'BD_FLG', 'TM_UTE.1', 'TAX_REG_NO', 'LIMIT_CREDIT_PAY', 'CORG_RCD', 'PAY_SOURCE', 'CORG_TRAN_STS', 'OPER_LIC_EFF_STT', 'ENCRY_CRD_NO', 'MEC_BUSI_TYP', 'IS_XW_MEC', 'SECOND_CONFIRM', 'MNO', 'IN_MOD', 'WEB_SERVE_AMOUNT', 'AUT_CD', 'CTXN_DT', 'UTE_STFF_NO.1', 'SPECIAL_OFFER', 'BANK_TEAM_WORK', 'TRM_NO', 'TM_UTE', 'CORG_NO', 'AUT_SET_STS', 'CLR_DT', 'PARENT_IN_MNO', 'ORD_NO', 'NO_SIGN_SECRET', 'CORG_NM', 'OLD_TRAN_CD', 'CCY', 'DATA_COMP']


In [8]:
same_columns = clean_df.check_same_value_columns()
print("Same:\n{}".format(same_columns))

Same:
['LIMIT_CREDIT_PAY']


In [9]:
last_fact_columns = ['IDE_NO', 'MEC_NORMAL_LEVEL', 'SETTLE_FLAG', 'SYS_ID', 'TRAN_STS', 'POS_OPR_ID', 'CORG_TRAN_STS', 'CORG_NM', 
                     'CORG_NO', 'SYS_ID.1', 'TD_MNO', 'SPECIAL_OFFER', 'QRCODE_MNO_TYPE', 'UTE_STFF_NO.1', 'TRAN_CD', 
                     'MCC_CD', 'CUP_CODE', 'CRD_TYP', 'OLD_TRAN_CD', 'MEC_DIST_CD', 'CPR_TYP', 
                     'RP_CD', 'ROOT_AGENT_ORG_NO', 'BANK_TEAM_WORK_SIGN', 'RE_BUS_CNL', 'TRAN_FLG', 'BRANCH_ORG_UUID', 'AGENT_ORG_NO', 
                     'ORG_COD', 'MEC_PROV_CD', 'CTE_STFF_NO.1', 'OPER_STT', 'UTE_STFF_NO', 'OPER_LIC_EFF_EDT', 'COOPERATION_MANAGER_ID',
                     'IN_MOD', 'MEC_CITY_CD', 'OPER_AREA', 'BNK_TYP', 'CFM_FLG', 'AGENT_BRANCH_ORG_UUID', 'CORG_RCD', 'CTE_STFF_NO',
                     'CCY', 'REVERSE_FLAG', 'SECOND_CONFIRM']
columns = clean_df.columns

print(len(last_fact_columns))
print(set(last_fact_columns).difference(set(columns)))

46
set()


In [10]:
column_name_lst, factor_values = clean_df.get_factor_candidate_columns(column_names=last_fact_columns)


Reduce List
Total 46 blocks


In [11]:
import json
with open('column_factor_map.json', 'r') as fp:
    factor_map_dict = json.load(fp)
clean_df._factor_map_dict = factor_map_dict

In [12]:
print(factor_map_dict.keys())

dict_keys(['IDE_NO', 'MEC_NORMAL_LEVEL', 'SETTLE_FLAG', 'SYS_ID', 'TRAN_STS', 'POS_OPR_ID', 'CORG_TRAN_STS', 'CORG_NM', 'CORG_NO', 'SYS_ID.1', 'TD_MNO', 'SPECIAL_OFFER', 'QRCODE_MNO_TYPE', 'UTE_STFF_NO.1', 'TRAN_CD', 'MCC_CD', 'CUP_CODE', 'CRD_TYP', 'OLD_TRAN_CD', 'MEC_DIST_CD', 'CPR_TYP', 'RP_CD', 'ROOT_AGENT_ORG_NO', 'BANK_TEAM_WORK_SIGN', 'RE_BUS_CNL', 'TRAN_FLG', 'BRANCH_ORG_UUID', 'AGENT_ORG_NO', 'ORG_COD', 'MEC_PROV_CD', 'CTE_STFF_NO.1', 'OPER_STT', 'UTE_STFF_NO', 'OPER_LIC_EFF_EDT', 'COOPERATION_MANAGER_ID', 'IN_MOD', 'MEC_CITY_CD', 'OPER_AREA', 'BNK_TYP', 'CFM_FLG', 'AGENT_BRANCH_ORG_UUID', 'CORG_RCD', 'CTE_STFF_NO', 'CCY', 'REVERSE_FLAG', 'SECOND_CONFIRM'])


In [13]:
factor_map_dict = clean_df.fill_fact_map_dict(column_names=column_name_lst, factor_value=factor_values)

In [14]:
import json
with open('column_factor_map.json', 'w') as fp:
    json.dump(factor_map_dict, fp)

In [15]:
bool_column_name = ["OUUID","ORD_NO","TRM_NO","BAT_NO","CRD_NO","REF_AMT","REF_FEE_AMT","BD_FLG","AUT_CD",
                    "ENCRY_CRD_NO","TXN_RSV1","SES_BAT_NO","COOPER_FLAG","CO_RUL_NO","LIMIT_CREDIT_PAY",
                    "PARENT_IN_MNO","AGRNO","BUSINESS_EMP_NO","CS_TEL_NO_ENC",
                    "DATA_COMP","TAX_REG_NO","CUSTOM_CLASSIFY","MANAGER_TEAM_WORK","BANK_MANAGER","MEC_ADMIN_TEL_ENC",
                    "CONT_TEL_NO_ENC","LEG_PER_CRD_NO_ENC","LEG_PER_NM_ENC","OTHER_ENC_FLG", 'OPER_LIC_NO','MNO','SIGN_MNO']
print(len(bool_column_name))
print(set(bool_column_name).difference(set(columns)))

32
set()


In [16]:
clean_df.set_column_to_bool(column_names=bool_column_name)

In [17]:
clean_df.set_column_to_factor(column_names=last_fact_columns, value_dict=factor_map_dict)

Column UTE_STFF_NO.1 Value   not defined
Column OPER_LIC_EFF_EDT Value   not defined


In [20]:
with open("csv201803_clear_1.csv", 'w') as f:
    for idx, df in enumerate(global_df_lst):
        if idx == 0:
            df.to_csv(f, mode='w', header=True, index=False, encoding='utf-8')
        else:
            df.to_csv(f, mode='a', header=False, index=False, encoding ='utf-8')

In [21]:
date_time_column_lst = ['CLR_DT','TRAN_DT','DATE_CREATED','LAST_UPDATED','TRAN_DT.1','DT_CTE','DT_UTE','OPER_LIC_EFF_STT',
                        'LEG_CRD_EFF_STT','LEG_CRD_EFF_EDT','DT_CTE.1','DT_UTE.1','SIGN_DT','CTXN_DT', 'FINISH_DT']
columns = global_df_lst[0].columns.tolist()
print(len(date_time_column_lst))
print(set(date_time_column_lst).difference(set(columns)))

15
set()


In [22]:
date_time_format_1_lst = ['CTXN_DT']
date_time_format_2_lst = ['DATE_CREATED', 'LAST_UPDATED']
date_time_format_3_lst = ['FINISH_DT', 'TRAN_DT.1', 'OPER_LIC_EFF_STT', 'TRAN_DT', 'LEG_CRD_EFF_EDT', 'DT_CTE.1', 
                          'LEG_CRD_EFF_STT', 'SIGN_DT', 'DT_UTE.1', 'CLR_DT', 'DT_UTE', 'DT_CTE']

In [25]:
global_df_lst = test_read_csv.read_content("csv201803_clear_1.csv", encoding='gb18030')
clean_df = DF_CLEAN()

Start Read: Coder:gb18030
Start Batch Read
Batch Read Done
Start Batch Read
Batch Read Done
Start Batch Read
2950000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3650000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3900000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3950000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3150000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3100000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
4000000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3750000 - 50000 may cause out of range for file csv201803_clear_1.csv. Reason(No columns to parse from file)
3450000 - 50000 may

In [26]:
for name in date_time_column_lst:
    print(name)
    print(global_df_lst[1][name][:5])

CLR_DT
0    20180312.0
1    20180312.0
2    20180312.0
3    20180312.0
4    20180312.0
Name: CLR_DT, dtype: float64
TRAN_DT
0    20180312
1    20180312
2    20180312
3    20180312
4    20180312
Name: TRAN_DT, dtype: int64
DATE_CREATED
0    2018-03-12 16:37:35.356000
1    2018-03-12 16:54:39.775000
2    2018-03-12 17:28:16.003000
3    2018-03-12 16:39:59.950000
4    2018-03-12 16:45:31.264000
Name: DATE_CREATED, dtype: object
LAST_UPDATED
0    2018-03-12 16:37:58.000000
1    2018-03-12 16:54:40.000000
2    2018-03-12 17:28:49.000000
3    2018-03-12 16:39:59.000000
4    2018-03-12 16:45:31.000000
Name: LAST_UPDATED, dtype: object
TRAN_DT.1
0    20180312.0
1    20180312.0
2    20180312.0
3    20180312.0
4    20180312.0
Name: TRAN_DT.1, dtype: float64
DT_CTE
0    20171226
1    20170527
2    20151113
3    20160704
4    20160223
Name: DT_CTE, dtype: int64
DT_UTE
0    20180404.0
1    20180305.0
2    20180320.0
3    20180111.0
4    20180319.0
Name: DT_UTE, dtype: float64
OPER_LIC_EFF_STT
0    

In [27]:
print(global_df_lst[0]['CTXN_DT'][:5])
clean_df.transfer_datetime_to_int(column_names=date_time_format_1_lst, datetime_format="%Y%m%d", basetime=datetime.datetime(2018,3,1), scale="Minute",prefix='2018')
print(global_df_lst[0]['CTXN_DT'][:5])

0    301
1    301
2    301
3    301
4    301
Name: CTXN_DT, dtype: int64
Transfer Time on: CTXN_DT
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
0    0
1    0
2    0
3    0
4    0
Name: CTXN_DT, dtype: int64


In [28]:
print(global_df_lst[0]['TRAN_DT'][:5])
clean_df.transfer_datetime_to_int(column_names=date_time_format_3_lst, datetime_format="%Y%m%d", basetime=datetime.datetime(2018,3,1), scale="Minute")
print(global_df_lst[0]['TRAN_DT'][:5])

0    20180301
1    20180301
2    20180301
3    20180301
4    20180301
Name: TRAN_DT, dtype: int64
Transfer Time on: FINISH_DT
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Transfer Time on: TRAN_DT.1
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 1697 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 1707 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 1840 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2135 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2141 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 4262 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 6132 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2899 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 8363 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 7283 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 8216 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 8265 value 00000000. Reason: time data '0' do

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17135 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 11233 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 12700 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 13931 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 11492 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17403 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 10680 value 10101. Reason: time

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 18094 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 16773 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28276 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 16804 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17094 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17347 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 18344 value 00010101. Reas

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 24660 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 22720 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 27936 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30095 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 29422 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28375 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 29637 value 00010101. Reaso

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28907 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 688 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 20 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 32880 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 32130 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28461 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 34934 value 00000000. Reason: time data '0

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2106 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2116 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 29297 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33633 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2144 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 14871 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2255 value 10101. Reason: time data 

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 3421 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30791 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 4401 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 2204 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 4141 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 3816 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 4446 value 00010101. Reason: time 

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 5884 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 5902 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 6547 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 6979 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 6340 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33559 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 6807 value 00010101. Reason: time dat

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 8696 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 9087 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 9116 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 7836 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 8473 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 9212 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 7982 value 10101. Reason: time data '10

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 8975 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 23018 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 39767 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 37602 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 41868 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 42984 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 9499 value 10101. Reason: time data

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 10751 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 12815 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 11468 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 11290 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 41370 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 12203 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 11583 value 10101. Reason: time da

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 13401 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 40327 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 13127 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 13128 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 13132 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 12982 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 13136 value 10101. Reason: time 

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 15537 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 48726 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 42834 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 41949 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 15882 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 44719 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 15934 value 10101. Reason: ti

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 15336 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 43566 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 43568 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17075 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17089 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17845 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 17858 value 10101. Reason: time d

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 18816 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 31890 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 31902 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 31906 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 19936 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 19355 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47743 value 00000000. Re

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 19225 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 19248 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49515 value 00000000. Reason: time data '0' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 20359 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 21171 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47623 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47651 value 00010101. Rea

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 23018 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 22418 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 23186 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 22978 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 22598 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 23015 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 23273 value 00010101. Rea

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 24971 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 25938 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 25206 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 26875 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 25778 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 26151 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 24540 value 10101. Reason: tim

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28956 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 27568 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 26756 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28670 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 28733 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 27628 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 27638 value 10101. Reason: time d

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 29945 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30290 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30909 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30912 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30919 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30839 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 30806 value 10101. Reason: time

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 32404 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 32405 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33313 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33915 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33539 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33177 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 33179 value 00010101. Reason: 

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 34823 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 34845 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 36543 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 37123 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 48977 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 36855 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 36891 value 10101. Reason: 

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 38785 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 38804 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 37181 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 39609 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 39611 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 38682 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 38776 value 10101. Reason: tim

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 39924 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 41639 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 43425 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 41764 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 41350 value 10101.0. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 42399 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 42409 value 00010101. Reason: t

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 44102 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 44601 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 44251 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 43890 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 43991 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 44036 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 44418 value 10101. Reason: time d

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47018 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47342 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 45025 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 46559 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 45170 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47593 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 47613 value 10101. Reaso

Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 48744 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49797 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49350 value 00010101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49916 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49945 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49831 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column OPER_LIC_EFF_STT, line 49217 value 00010101. Re

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 6548 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 5223 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 5259 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4786 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 9449 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 5722 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 9737 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 5433 value 

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 18395 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 18485 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 13767 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 11946 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 19300 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 12905 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 19305 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 1799

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 24240 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 20712 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 27114 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 21464 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 24686 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 24694 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 20399 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 2112

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 31801 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 31743 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 69 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 823 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 26974 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 942 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 1085 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 576 value 20

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 29733 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 3692 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 3159 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 3451 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 5062 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4704 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 2214 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 34962 valu

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 34107 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 6549 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 34002 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 6212 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 5343 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 8147 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 6372 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 6195 valu

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 42274 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 36752 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 38392 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 8144 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 9041 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 9057 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 8636 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 10034 va

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 11107 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 43696 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 40249 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 13022 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 47151 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 11773 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 14370 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 1343

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 44001 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 46840 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 15540 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 13100 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 15472 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 49904 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 46275 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4462

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 17630 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 45502 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 46694 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 18378 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 17896 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 18246 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 16909 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4594

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 22755 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 22025 value 20190229. Reason: day is out of range for month
[1568160, 43041600, 43041600, 43041600, 43041600, 1487520, 43041600, 1352160, 1530720, 725760]
Transfer Time on: DT_CTE.1
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 22139 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 22439 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 22481 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 23586 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 24396 value 20

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 26176 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 26524 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 27774 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 27774 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 26915 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 39972 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 28090 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 2691

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 32152 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 31274 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 31878 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 32127 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 31044 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 30598 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 32434 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 2998

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 36159 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 35816 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 36004 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 35969 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 37142 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 36167 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 49060 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 3624

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 38721 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 38248 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 39810 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 40847 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 39572 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 41330 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 40998 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4150

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 45474 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 43961 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 45827 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 46685 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 44841 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 45563 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 45668 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4328

Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 48628 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 46924 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 46940 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 49159 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 49415 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 49991 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 49448 value 20190229. Reason: day is out of range for month
Error while transfer string to datatime in column LEG_CRD_EFF_EDT, line 4793

Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 826 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 411 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 1815 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 912 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 29584 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 1583 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 1438 value 10101. Reason: time data '10101' does

Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 10531 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 9361 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 38773 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 22945 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 10169 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 10310 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 10441 value 10101. Reason: time data '101

Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 15325 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 16755 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 15947 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 17124 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 16331 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 17444 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 17228 value 10101. Reason: time data '10

Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 23405 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 24214 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 24220 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 23012 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 24671 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 22869 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 24570 value 10101. Reason: time data '10

Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 31184 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 32200 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 33130 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 31476 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 31840 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 34636 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 31412 value 10101. Reason: time data '10

Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 41412 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 41639 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 42167 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 41575 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 41348 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 42465 value 10101. Reason: time data '10101' does not match format '%Y%m%d'
Error while transfer string to datatime in column LEG_CRD_EFF_STT, line 42577 value 10101. Reason: time data '10

[0, 0, -1440, 0, 0, 0, 0, 0, 0, 0]
Transfer Time on: DT_UTE
[0.0, -54720, -38880, -74880, 41760, -44640, -1440, -195840, 43200, 47520]
Transfer Time on: DT_CTE
[-10080, -103680, -92160, -93600, -93600, -90720, -48960, -226080, -47520, -851040]
0    0
1    0
2    0
3    0
4    0
Name: TRAN_DT, dtype: int64


In [29]:
print(global_df_lst[0]['DATE_CREATED'][:5])
clean_df.transfer_datetime_to_int(column_names=date_time_format_2_lst, datetime_format="%Y-%m-%d %H:%M:%S.%f", basetime=datetime.datetime(2018,3,1), scale="Minute")
print(global_df_lst[0]['DATE_CREATED'][:5])

0    2018-03-01 00:28:11.208000
1    2018-03-01 01:31:25.869000
2    2018-03-01 02:14:41.438000
3    2018-03-01 02:08:15.836000
4    2018-03-01 08:29:56.193000
Name: DATE_CREATED, dtype: object
Transfer Time on: DATE_CREATED
[28, 91, 134, 128, 509, 551, 564, 581, 603, 639]
Transfer Time on: LAST_UPDATED
[28, 92, 134, 136, 510, 552, 564, 582, 603, 639]
0     28
1     91
2    134
3    128
4    509
Name: DATE_CREATED, dtype: int64


In [30]:
clean_df.fill_value_to_nan_cell()

Filling column UUID
Filling column OUUID
Filling column ORD_NO
Filling column CLR_DT
Filling column RE_BUS_CNL
Filling column IN_MNO
Filling column TRM_NO
Filling column MCC_CD
Filling column BAT_NO
Filling column TRAN_DT
Filling column TRAN_TM
Filling column CTXN_DT
Filling column CORG_NO
Filling column POS_OPR_ID
Filling column TRAN_FLG
Filling column TRAN_CD
Filling column OLD_TRAN_CD
Filling column CORG_RCD
Filling column CORG_TRAN_STS
Filling column TRAN_STS
Filling column IN_MOD
Filling column CRD_NO
Filling column CCY
Filling column TRAN_AMT
Filling column REF_AMT
Filling column REF_FEE_AMT
Filling column QK_AMT
Filling column CORG_FEE_RAT
Filling column CORG_FEE_AMT
Filling column REC_FEE_AMT
Filling column BD_FLG
Filling column BNK_TYP
Filling column CRD_TYP
Filling column CRD_FLG
Filling column IC_CRD_FLG
Filling column AUT_CD
Filling column RP_CD
Filling column DATE_CREATED
Filling column LAST_UPDATED
Filling column SIGN_FLG
Filling column CFM_FLG
Filling column CORG_NM
Fill

In [31]:
with open("csv201803_clear_2.csv", 'w') as f:
    for idx, df in enumerate(global_df_lst):
        if idx == 0:
            df.to_csv(f, mode='w', header=True, index=False, encoding='utf-8')
        else:
            df.to_csv(f, mode='a', header=False, index=False, encoding ='utf-8')