In [81]:
!pwd
!ls

/d/GH/GitWorkSpace/bank_model_competiton/data/v25
bak
columns.txt
model_gbdt_v25.ipynb
model_lgb_v25.ipynb
model_xgb_v25.ipynb
process_v25.ipynb
t.py
test.dat.v25
train.dat.v25


In [82]:
import pandas as pd 
import matplotlib.pyplot as plt
import statistics
import datetime
import seaborn as sns
import os
import numpy as np
import time 
from sklearn.preprocessing import OrdinalEncoder

pd.set_option('display.max_columns',200)
pd.set_option('display.max_rows',   200)


plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

suffix = os.path.split(os.getcwd())[-1]

root_dir='../../'

train_path=root_dir+'train.csv'
train_bank_path=root_dir+'train_bank_statement.csv'
output_train_path = "train.dat.%s" % suffix

test_path=root_dir+'testaa.csv'
test_bank_path=root_dir+'testaa_bank_statement.csv'
output_test_path = "test.dat.%s" % suffix

print(suffix)
print('process time : ',time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime()))

v25
process time :  2025-09-01 17:52:41


In [83]:
# debug 模式可设置较小值
NROWS = 1000000000000000000000000000000

# myl特征处理

# 1. 交易流水特征

In [84]:
import pandas as pd
import numpy as np
from functools import reduce


# 补齐日期
def expand_user_days(bank_df):
    bank_df = bank_df.copy()
    bank_df = bank_df.set_index('time').sort_index()
    all_days = pd.date_range(bank_df.index.min(), bank_df.index.max())
    bank_df = bank_df.reindex(all_days, fill_value=0)
    bank_df.index.name = 'time'
    return bank_df


def get_cash_aum(df, bank_df):
    df = df.copy()
    bank_df = bank_df.copy()

    bank_df['time'] = pd.to_datetime(bank_df['time'], unit='s')
    bank_df['amount_signed'] = bank_df['amount'] * bank_df['direction'].map({0: 1, 1: -1})

    # 每日净变动
    daily_change = bank_df.groupby(['id', 'time'])['amount_signed'].sum().reset_index()

    # 注意这里用 group_keys=False，避免多余的索引列
    daily_balance = (
        daily_change.groupby('id', group_keys=False)
        .apply(expand_user_days)
        .reset_index()
    )

    # 计算每日余额（逐日累计）
    daily_balance['balance'] = daily_balance.groupby('id')['amount_signed'].cumsum()

    # 计算月日均 AUM
    daily_balance['year_month'] = daily_balance['time'].dt.to_period('M')

    # 计算月日均 AUM
    monthly_aum = (
        daily_balance.groupby(['id', 'year_month'])['balance']
        .mean()
        .reset_index(name='aum_monthly_avg_daily')
    )

    # 每个用户的最后交易月
    last_month = monthly_aum.groupby('id')['year_month'].max().reset_index(name='last_month')

    # 拼接，计算相对月差
    merged = monthly_aum.merge(last_month, on='id')
    merged['month_diff'] = (merged['last_month'] - merged['year_month']).apply(lambda x: x.n)

    # 提取最近 N 个月
    months = [1, 2, 3, 4, 6]

    features = []
    for m in months:
        tmp = merged[merged['month_diff'] == m - 1][['id', 'aum_monthly_avg_daily']]
        tmp = tmp.rename(columns={'aum_monthly_avg_daily': f'cash_aum_month_avg_last_{m}m'})
        features.append(tmp)

    # 合并特征
    features_df = reduce(lambda l, r: pd.merge(l, r, on='id', how='left'), features)

    #
    df = df.merge(features_df, on='id', how='left')

    return df

def get_span_months(df, bank_df):
    # 交易流水跨度月数
    bank_df["time"] = pd.to_datetime(bank_df["time"], unit='s')
    span_df = bank_df.groupby("id")["time"].agg(["min", "max"]).reset_index()

    # --计算跨度的自然月数
    span_df["cash_span_months"] = (
            (span_df["max"].dt.year - span_df["min"].dt.year) * 12
            + (span_df["max"].dt.month - span_df["min"].dt.month)
            + 1  # 如果要算“覆盖的自然月数” → 加 1
    )

    span_df = span_df.drop(columns=['min', 'max'])

    df = df.merge(span_df, on='id', how='left')
    return df


def get_month_cash_stat(df, bank_df):
    df, bank_df = df.copy(), bank_df.copy()

    bank_df['month'] = pd.to_datetime(bank_df['time'], unit='s').dt.to_period('M').astype(str)

    prefix = 'month'
    # 总转入
    in_df = bank_df[bank_df['direction'] == 0].groupby(['id', 'month'])['amount'].agg(['sum', 'count', 'max']).rename(
        columns={
            'sum': f'{prefix}_in_amount',
            'count': f'{prefix}_in_count',
            'max': f'{prefix}_in_max'
        }
    ).reset_index()

    # 总转出
    out_df = bank_df[bank_df['direction'] == 1].groupby(['id', 'month'])['amount'].agg(['sum', 'count', 'max']).rename(
        columns={'sum': f'{prefix}_out_amount',
                 'count': f'{prefix}_out_count',
                 'max': f'{prefix}_out_max',
                 }
    ).reset_index()

    month_df = in_df.merge(out_df, on=['id', 'month'], how='right')

    # NOTE：净转入应该是 转入 - 转出，修复， 08-29 14:00
    month_df['month_net_amount'] = month_df['month_in_amount'] - month_df['month_out_amount']

    # amount -> count ?
    #month_df['month_neg_net_count'] = (month_df['month_net_amount'] < 0).astype(int)
    month_df['month_neg_net_amount'] = (month_df['month_net_amount'] < 0).astype(int)

    prefix = 'cash_month'
    month_agg_df = month_df.groupby(['id'])['month_neg_net_amount'].agg(['count', 'sum']).rename(
        columns={'count': f'{prefix}_has_trans_months',
                 'sum': f'{prefix}_neg_amount_months'}
    ).reset_index()

    #df = df.merge(month_agg_df, on='id', how='left')

    # ---- 最近 N 个月的净转入/转入/转出 ----
    month_df['month'] = pd.to_datetime(month_df['month'])
    month_df = month_df.sort_values(['id', 'month'])

    for n in [1, 2, 3, 4, 6]:
        # 转入
        month_df[f'cash_month_in_amount_last{n}m'] = (
            month_df.groupby('id')['month_in_amount']
            .transform(lambda x: x.rolling(window=n, min_periods=1).sum())
        )
        # 转出
        month_df[f'cash_month_out_amount_last{n}m'] = (
            month_df.groupby('id')['month_out_amount']
            .transform(lambda x: x.rolling(window=n, min_periods=1).sum())
        )
        # 净转入
        month_df[f'cash_month_net_amount_last{n}m'] = (
            month_df.groupby('id')['month_net_amount']
            .transform(lambda x: x.rolling(window=n, min_periods=1).sum())
        )

    # 取每个用户最后一个月的快照
    last_month_df = (
        month_df.groupby('id')
        .tail(1)
        .drop(columns=['month_neg_net_amount'])  # 这个是中间特征，可以不用留
    )

    # 合并
    final_df = df.merge(month_agg_df, on='id', how='left')
    final_df = final_df.merge(last_month_df, on='id', how='left')

    return final_df

def get_total_cash_stat(df, bank_df):
    # 用户总转入、转出、净转入
    prefix = 'cash_total'

    total_in_df = bank_df[bank_df['direction'] == 0].groupby(['id'])['amount'].agg(['sum', 'count', 'max']).rename(
        columns={
            'sum': f'{prefix}_in_amount',
            'count': f'{prefix}_in_count',
            'max': f'{prefix}_in_max'
        }
    ).reset_index()

    total_out_df = bank_df[bank_df['direction'] == 1].groupby(['id'])['amount'].agg(['sum', 'count', 'max']).rename(
        columns={
            'sum': f'{prefix}_out_amount',
            'count': f'{prefix}_out_count',
            'max': f'{prefix}_out_max'
        }
    ).reset_index()

    df = df.merge(total_in_df, on='id', how='left').merge(total_out_df, on='id', how='left')

    df['cash_total_net_amount'] = df['cash_total_in_amount'] - df['cash_total_out_amount']

    return df

def safe_div(df, a, b, c):
    df[c] = (
    df[a]                           # 分子列
    .div(df[b])                     # 除以分母列（自动处理对齐）
    .replace([np.inf, -np.inf], 0)    # 将除零导致的 inf 替换为 0
    .fillna(0)                        # 将 NaN（分子或分母为 NaN）替换为 0
    )
    return df

def generate_cashflow_features(df, bank_df):
    df = df.copy()
    bank_df = bank_df.copy()

    # 是否有交易流水
    df['cash_has_trans'] = df['id'].isin(bank_df['id']).astype(int)

    # 交易流水跨度月数
    df = get_span_months(df, bank_df)

    # 总转入、转出、净转入
    df = get_total_cash_stat(df, bank_df)

    # 按照月统计，有交易的月数、净转入为负月数
    df = get_month_cash_stat(df, bank_df)

    # 用户交易流水跨度内平均每个月总转入、转出、净转入
    df = safe_div(df, 'cash_total_out_amount', 'cash_span_months', 'cash_avg_out_amount')
    df = safe_div(df, 'cash_total_in_amount', 'cash_span_months', 'cash_avg_in_amount')
    df = safe_div(df, 'cash_total_net_amount', 'cash_span_months', 'cash_avg_net_amount')

    # 增加aum相关
    df = get_cash_aum(df, bank_df)

    return df

# 2. 贷款特征

In [85]:
# 计算已还款期数（自然月差值）
def month_diff(d1, d2):
    if pd.isna(d1) or pd.isna(d2):
        return 0
    return (d1.year - d2.year) * 12 + (d1.month - d2.month) + 1  # +1 表示覆盖自然月


def calc_num_repay(row):
    if pd.isna(row["max_txn_time"]):
        return 0  # 没流水
    if row["max_txn_time"] > row["issue_time_s"]:
        return month_diff(row["max_txn_time"], row["issue_time_s"])  # 已开始还款
    else:
        return 0  # 未开始还款

def generate_loan_features(df, bank_df):
    df, bank_df = df.copy(), bank_df.copy()

    # 本次总还款金额：total_loan = loan * (1 + interest_rate)
    df['interest_rate'] /= 100.
    df['loan_total_loan'] = df['loan'] * (1 + df['interest_rate'])

    # 每个月还款金额：month_repay = total_loan / term
    df['loan_month_repay'] = df['loan_total_loan'] / df['term']

    # 是否已经开始还款：最大交易时间 > issue_time ?
    df["issue_time_s"] = pd.to_datetime(df["issue_time"], unit='s')
    bank_df["time"] = pd.to_datetime(bank_df["time"], unit='s')

    # --取每个用户的最大交易时间
    max_txn_time = bank_df.groupby("id")["time"].max().reset_index()
    max_txn_time = max_txn_time.rename(columns={"time": "max_txn_time"})

    df = df.merge(max_txn_time, on="id", how="left")

    # -- 判断是否已经开始还款
    df["loan_has_repayment"] = (df["max_txn_time"] > df["issue_time_s"]).astype(int)

    # 还剩到期期数：remain_repay_term = term - num_repay_term
    df['max_txn_time'] = pd.to_datetime(df['max_txn_time'], unit='s')

    # 新增，账龄
    df['history_time_s'] = pd.to_datetime(df['history_time'], unit='s')
    df['time_account_day'] = (df['max_txn_time'] - df['history_time_s']).dt.days

    # 已经还款期数
    df["loan_repay_term"] = df.apply(calc_num_repay, axis=1)

    # 还剩期数
    df['loan_remain_repay_term'] = df['term'] - df["loan_repay_term"]

    # 还剩金额：remain_repay_amount = remain_repay_term * month_repay
    df['loan_remain_repay_amount'] = df['loan_remain_repay_term'] * df['loan_month_repay']

    # 总负债：total_debt = remain_repay_amount + balance
    df['loan_total_debt'] = df['loan_remain_repay_amount'] + df['balance']

    # 他行余额比例： balance_ratio = balance / balance_limit
    df['loan_balance_ratio'] = df['balance'] / df['balance_limit']

    # 贷款相比额度比例？ loan_ratio = loan / balance_limit
    df['loan_loan_ratio'] = df['loan'] / df['balance_limit']
    
    df.drop(['issue_time_s'], axis=1, inplace=True)
    return df

In [86]:
#  读入数据
df_train  = pd.read_csv(train_path,  nrows=NROWS)
df_test   = pd.read_csv(test_path,   nrows=NROWS)

df_bank_train  = pd.read_csv(train_bank_path, nrows=NROWS)
df_bank_test   = pd.read_csv(test_bank_path,  nrows=NROWS)

df_train_test      = pd.concat([df_train,    df_test],        axis=0)
df_bank_train_test = pd.concat([df_bank_train, df_bank_test], axis=0)


#cash feature
df_train_test = generate_cashflow_features(df_train_test, df_bank_train_test)
print('after cashflow feature df_train_test : ', df_train_test.shape)

cash_nume_names = []
for name in df_train_test.columns:
    if name.startswith('cash_'):
        cash_nume_names.append(name)
print('cash nume name : ',cash_nume_names)

# loan feature
df_train_test = generate_loan_features(df_train_test, df_bank_train_test)
print('after loan features df_train_test : ', df_train_test.shape)
print('process time : ',time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime()))
df_train_test

after cashflow feature df_train_test :  (73534, 61)
cash nume name :  ['cash_has_trans', 'cash_span_months', 'cash_total_in_amount', 'cash_total_in_count', 'cash_total_in_max', 'cash_total_out_amount', 'cash_total_out_count', 'cash_total_out_max', 'cash_total_net_amount', 'cash_month_has_trans_months', 'cash_month_neg_amount_months', 'cash_month_in_amount_last1m', 'cash_month_out_amount_last1m', 'cash_month_net_amount_last1m', 'cash_month_in_amount_last2m', 'cash_month_out_amount_last2m', 'cash_month_net_amount_last2m', 'cash_month_in_amount_last3m', 'cash_month_out_amount_last3m', 'cash_month_net_amount_last3m', 'cash_month_in_amount_last4m', 'cash_month_out_amount_last4m', 'cash_month_net_amount_last4m', 'cash_month_in_amount_last6m', 'cash_month_out_amount_last6m', 'cash_month_net_amount_last6m', 'cash_avg_out_amount', 'cash_avg_in_amount', 'cash_avg_net_amount', 'cash_aum_month_avg_last_1m', 'cash_aum_month_avg_last_2m', 'cash_aum_month_avg_last_3m', 'cash_aum_month_avg_last_4m', '

Unnamed: 0,id,title,career,zip_code,residence,loan,term,interest_rate,issue_time,syndicated,installment,record_time,history_time,total_accounts,balance_accounts,balance_limit,balance,level,label,cash_has_trans,cash_span_months,cash_total_in_amount,cash_total_in_count,cash_total_in_max,cash_total_out_amount,cash_total_out_count,cash_total_out_max,cash_total_net_amount,cash_month_has_trans_months,cash_month_neg_amount_months,month,month_in_amount,month_in_count,month_in_max,month_out_amount,month_out_count,month_out_max,month_net_amount,cash_month_in_amount_last1m,cash_month_out_amount_last1m,cash_month_net_amount_last1m,cash_month_in_amount_last2m,cash_month_out_amount_last2m,cash_month_net_amount_last2m,cash_month_in_amount_last3m,cash_month_out_amount_last3m,cash_month_net_amount_last3m,cash_month_in_amount_last4m,cash_month_out_amount_last4m,cash_month_net_amount_last4m,cash_month_in_amount_last6m,cash_month_out_amount_last6m,cash_month_net_amount_last6m,cash_avg_out_amount,cash_avg_in_amount,cash_avg_net_amount,cash_aum_month_avg_last_1m,cash_aum_month_avg_last_2m,cash_aum_month_avg_last_3m,cash_aum_month_avg_last_4m,cash_aum_month_avg_last_6m,loan_total_loan,loan_month_repay,max_txn_time,loan_has_repayment,history_time_s,time_account_day,loan_repay_term,loan_remain_repay_term,loan_remain_repay_amount,loan_total_debt,loan_balance_ratio,loan_loan_ratio
0,0,9,0.0,221373,1,7200,36,0.1095,1238631967,0,1,1238630622,472006661,17.0,9.0,36200.0,13856.00,A4,0.0,1,6.0,59707.50,6.0,10638.10,12079.50,42.0,1301.06,47628.00,6.0,0.0,2009-03-01,10328.40,1.0,10328.40,763.79,2.0,410.23,9564.61,10328.40,763.79,9564.61,10328.40,1954.87,9564.61,30329.70,6087.25,25433.53,40298.05,7029.39,34459.74,59707.50,12079.50,48819.08,2013.250000,9951.250000,7938.000000,47628.000000,47628.000,47628.000000,47628.000000,47628.000000,7988.40,221.900000,2009-03-28,0,1984-12-16 00:57:41,8867.0,0,36,7988.400000,21844.400000,0.382762,0.198895
1,1,8,10.0,311681,0,21300,36,0.1295,1128212052,0,0,1161907665,763779041,17.0,9.0,20400.0,13773.00,B0,1.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,24058.35,668.287500,NaT,0,1994-03-16 00:50:41,,0,36,24058.350000,37831.350000,0.675147,1.044118
2,2,8,7.0,271562,1,10400,60,0.2105,1249171509,0,0,1383958593,727143443,17.0,9.0,10800.0,2023.00,B4,0.0,1,7.0,6522.38,4.0,1696.54,15883.72,44.0,3394.94,-9361.34,7.0,3.0,2013-11-01,,,,3609.08,3.0,3394.94,,,3609.08,,1696.54,5655.03,-349.41,3331.49,8787.56,-1846.99,3331.49,10702.54,-1846.99,5015.84,13023.33,-1033.12,2269.102857,931.768571,-1337.334286,-9273.233333,-5538.170,-5648.978333,-3118.624286,-1375.686667,12589.20,209.820000,2013-11-09,1,1993-01-16 00:17:23,7601.0,52,8,1678.560000,3701.560000,0.187315,0.962963
3,3,7,2.0,522083,0,33050,36,0.1640,1172882234,0,1,1214353935,687660346,17.0,9.0,24700.0,21992.00,B3,0.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,38470.20,1068.616667,NaT,0,1991-10-17 00:45:46,,0,36,38470.200000,60462.200000,0.890364,1.338057
4,4,8,3.0,101026,1,5200,36,0.1435,1172882384,0,0,1240274527,322012875,17.0,9.0,5100.0,1669.00,B2,1.0,1,6.0,20339.90,6.0,3672.20,30823.10,87.0,3561.84,-10483.20,6.0,3.0,2009-03-01,3344.25,1.0,3344.25,12591.13,25.0,3561.84,-9246.88,3344.25,12591.13,-9246.88,6538.00,14381.29,-7843.29,10210.20,21544.17,-11333.97,13370.35,23091.21,-9720.86,20339.90,30823.10,-10483.20,5137.183333,3389.983333,-1747.200000,-6613.362917,-402.454,1568.347500,550.397778,-294.607273,5946.20,165.172222,2009-03-29,1,1980-03-16 00:01:15,10604.0,25,11,1816.894444,3485.894444,0.327255,1.019608
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73529,73529,0,8.0,601107,1,10000,12,0.1885,1130976000,0,0,1125964800,1018224000,6.0,3.0,3818.0,2224.69,A4,,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,11885.00,990.416667,NaT,0,2002-04-08 00:00:00,,0,12,11885.000000,14109.690000,0.582685,2.619172
73530,73530,0,10.0,601102,1,10000,12,0.2930,1156204800,0,0,1157068800,1054425600,6.0,6.0,5502.0,4126.71,B4,,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,12930.00,1077.500000,NaT,0,2003-06-01 00:00:00,,0,12,12930.000000,17056.710000,0.750038,1.817521
73531,73531,0,4.0,601408,1,11000,12,0.2475,1144108800,0,0,1111622400,1037404800,8.0,3.0,4844.0,2710.96,A3,,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,13722.50,1143.541667,NaT,0,2002-11-16 00:00:00,,0,12,13722.500000,16433.460000,0.559653,2.270851
73532,73532,0,3.0,601904,1,8000,12,0.2200,1163808000,0,0,1116892800,1057017600,6.0,3.0,3495.0,1834.93,A3,,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,9760.00,813.333333,NaT,0,2003-07-01 00:00:00,,0,12,9760.000000,11594.930000,0.525016,2.288984


# 郭浩特征处理 ------------------------------------- #

#  基础特征

In [87]:
# #  读入数据
# df_train  = pd.read_csv(train_path, index_col=['id'], nrows=NROWS)
# df_test   = pd.read_csv(test_path,  index_col=['id'], nrows=NROWS)

# df_bank_train  = pd.read_csv(train_tx_path, index_col=['id'], nrows=NROWS)
# df_bank_test   = pd.read_csv(test_tx_path,  index_col=['id'], nrows=NROWS)

# df_train_test    = pd.concat([df_train,    df_test],    axis=0)
# df_bank_train_test = pd.concat([df_bank_train, df_bank_test], axis=0)
# print(df_train.shape)
# print(df_test.shape)

#  特征加工
df_train_test['balance_account_avg'] = df_train_test['balance'] / df_train_test['balance_accounts']
df_train_test['loan_term_avg'] = df_train_test['loan'] / df_train_test['term']
df_train_test['balance_accounts_ratio'] = df_train_test['balance_accounts'] / df_train_test['total_accounts']

#log 处理
df_train_test['loan_log'] = df_train_test['loan'].apply(lambda x : np.log(x))
df_train_test['balance_account_avg_log'] = df_train_test['balance_account_avg'].apply(lambda x : np.log(x))
df_train_test['loan_term_avg_log'] = df_train_test['loan_term_avg'].apply(lambda x : np.log(x))
df_train_test['balance_accounts_ratio_log'] = df_train_test['balance_accounts_ratio'].apply(lambda x : np.log(x))
df_train_test['interest_rate_log'] = df_train_test['interest_rate'].apply(lambda x : np.log(x))

df_train_test['balance_log'] = df_train_test['balance'].apply(lambda x : np.log(x))
df_train_test['balance_limit_log'] = df_train_test['balance_limit'].apply(lambda x : np.log(x))
df_train_test['balance_accounts_log'] = df_train_test['balance_accounts'].apply(lambda x : np.log(x))

# zip code
df_train_test['zip_province']  = df_train_test['zip_code'].apply(lambda x : str(x)[:2])
df_train_test['zip_city']      = df_train_test['zip_code'].apply(lambda x : str(x)[:4])

#level 处理
df_train_test['level_hash'] = df_train_test['level'].apply(lambda x : hash(x) % 1000 )
encoder = OrdinalEncoder(categories=[['A0','A1','A2','A3','A4','A5','B0','B1','B2','B3','B4','B5','C0','C1','C2','C3','C4','C5','D0','D1','D2','D3','D4','D5','E0','E1','E2','E3','E4','E5']]) 
df_train_test['level_ord'] = encoder.fit_transform(df_train_test[['level']].values)

df_train_test['grade'] = df_train_test['level'].apply( lambda x : str(x)[0])  #提取ABCDE
df_train_test['grade'] = df_train_test['grade'].map({'A':1 , 'B':2, 'C':3, 'D':4, 'E':5})


# 分桶处理
bins_num = 50
tmp_labels = ['%d' % i for i in range(bins_num)]
df_train_test['interest_rate_cut'] = pd.cut(df_train_test['interest_rate'], bins=bins_num, labels = tmp_labels)

tmp_labels = ['%d' % i for i in range(bins_num)]
df_train_test['interest_rate_log_cut'] = pd.cut(df_train_test['interest_rate_log'], bins=bins_num, labels = tmp_labels)

# 等频
tmp_labels = ['%d' % i for i in range(bins_num)]
df_train_test['balance_cut'] = pd.qcut(df_train_test['balance'], q=bins_num, labels = tmp_labels, duplicates='drop')

df_train_test['loan_cut'] = pd.qcut(df_train_test['loan'], q=bins_num,  duplicates='drop')
loan_labels = np.unique(df_train_test['loan_cut'].values.codes).tolist()
df_train_test['loan_cut'] = pd.qcut(df_train_test['loan'], q=bins_num,  labels = loan_labels,  duplicates='drop')

tmp_labels = ['%d' % i for i in range(bins_num)]
df_train_test['balance_limit_cut'] = pd.qcut(df_train_test['balance_limit'], q=bins_num, labels = tmp_labels, duplicates='drop')

df_train_test['loan_term_avg_cut'] = pd.qcut(df_train_test['loan_term_avg'], q=bins_num,  duplicates='drop')
loan_labels = np.unique(df_train_test['loan_term_avg_cut'].values.codes).tolist()
df_train_test['loan_term_avg_cut'] = pd.qcut(df_train_test['loan_term_avg'], q=bins_num,  labels = loan_labels,  duplicates='drop')

tmp_labels = ['%d' % i for i in range(bins_num)]
df_train_test['balance_account_avg_cut'] = pd.qcut(df_train_test['balance_account_avg'], q=bins_num, labels = tmp_labels, duplicates='drop')

# 时间处理
df_train_test['record_time_format'] = df_train_test['record_time'].apply(lambda x: datetime.datetime.fromtimestamp(x))
df_train_test['record_time_year'] = df_train_test['record_time_format'].map(lambda x : x.year)
df_train_test['record_time_month'] = df_train_test['record_time_format'].map(lambda x : x.month)      # 探索周期性
df_train_test['record_time_week'] = df_train_test['record_time_format'].map(lambda x : x.week)      # 探索周期性
df_train_test['record_time_year_month'] = df_train_test['record_time_format'].map(lambda x : x.strftime('%Y%m'))

# 后验违约概率 
df_level_stat = pd.DataFrame()
df_level_stat['level_cnt']  = df_train_test[['level','label']].groupby('level').apply(lambda x : x['label'].count())
df_level_stat['level_default_cnt']  = df_train_test[['level','label']].groupby('level')[['label']].apply(lambda x : x[x['label'] == 1].count())
df_level_stat['level_default_ratio'] = df_level_stat['level_default_cnt'] / df_level_stat['level_cnt']
df_level_stat = df_level_stat.reset_index()
# print(df_level_stat)
df_train_test = pd.merge(df_train_test, df_level_stat[['level','level_default_ratio']], on='level', how='left',sort=False)

df_stat_tmp = pd.DataFrame()
df_stat_tmp['interest_rate_cnt']  = df_train_test[['interest_rate_cut','label']].groupby('interest_rate_cut').apply(lambda x : x['label'].count())
df_stat_tmp['interest_rate_default_cnt']  = df_train_test[['interest_rate_cut','label']].groupby('interest_rate_cut')[['label']].apply(lambda x : x[x['label'] == 1].count())
df_stat_tmp['interest_rate_default_ratio'] = df_stat_tmp['interest_rate_default_cnt'] / df_stat_tmp['interest_rate_cnt']
df_stat_tmp = df_stat_tmp.reset_index()
# print(df_stat_tmp)
df_train_test = pd.merge(df_train_test, df_stat_tmp[['interest_rate_cut','interest_rate_default_ratio']], on='interest_rate_cut', how='left',sort=False)

df_stat_tmp = pd.DataFrame()
df_stat_tmp['term_cnt']  = df_train_test[['term','label']].groupby('term').apply(lambda x : x['label'].count())
df_stat_tmp['term_default_cnt']  = df_train_test[['term','label']].groupby('term')[['label']].apply(lambda x : x[x['label'] == 1].count())
df_stat_tmp['term_default_ratio'] = df_stat_tmp['term_default_cnt'] / df_stat_tmp['term_cnt']
df_stat_tmp = df_stat_tmp.reset_index()
# print(df_stat_tmp)
df_train_test = pd.merge(df_train_test, df_stat_tmp[['term','term_default_ratio']], on='term', how='left',sort=False)

df_train_test.drop(['record_time_format'], axis=1, inplace=True)
print('process time : ',time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime()))
if 'id' in df_train_test.columns :
    df_train_test.set_index('id', inplace=True)

process time :  2025-09-01 17:55:36


#  交易特征处理

In [88]:
df_bank_train_test['time_format'] = df_bank_train_test['time'].apply(lambda x: datetime.datetime.fromtimestamp(x))
print(df_bank_train_test)

#days_diff, tm_count, total_amount, amount_1, amount_0, total_amount_day_avg, amount_1_day_avg, amount_0_day_avg
print(df_bank_train_test.shape)

df_bank_stat = pd.DataFrame()
df_bank_stat['tx_time_max'] = df_bank_train_test.groupby('id')['time_format'].agg('max')
df_bank_stat['tx_time_min'] = df_bank_train_test.groupby('id')['time_format'].agg('min')

df_bank_stat['tx_max_min_days'] = (df_bank_stat['tx_time_max'] - df_bank_stat['tx_time_min'])
df_bank_stat['tx_max_min_days'] = df_bank_stat['tx_max_min_days'].map(lambda x : x.days)

df_bank_stat['tx_count'] = df_bank_train_test.groupby('id')['amount'].agg('count')
df_bank_stat['total_amount'] = df_bank_train_test.groupby('id')['amount'].agg('sum')

df_bank_stat['1_amount'] = df_bank_train_test.groupby('id').apply( lambda x : x [ x['direction'] == 1]['amount'].sum())
df_bank_stat['0_amount'] = df_bank_train_test.groupby('id').apply( lambda x : x [ x['direction'] == 0]['amount'].sum())
#TODO :  1 - 0 amount 

df_bank_stat['total_amount_avg']  = df_bank_stat['total_amount'] / df_bank_stat['tx_max_min_days'] 
df_bank_stat['1_amount_avg']  = df_bank_stat['1_amount'] / df_bank_stat['tx_max_min_days'] 
df_bank_stat['0_amount_avg']  = df_bank_stat['0_amount'] / df_bank_stat['tx_max_min_days'] 

df_bank_stat['total_amount_avg2']  = df_bank_stat['total_amount'] / df_bank_stat['tx_count'] 
df_bank_stat['1_amount_avg2']  = df_bank_stat['1_amount'] / df_bank_stat['tx_count'] 
df_bank_stat['0_amount_avg2']  = df_bank_stat['0_amount'] / df_bank_stat['tx_count'] 

#交易活跃度： 效果次数/天数
df_bank_stat['tx_count_avg']  = df_bank_stat['tx_count'] / df_bank_stat['tx_max_min_days'] 
df_bank_stat['tx_tmstp_max'] = df_bank_train_test.groupby('id')['time'].agg('max')
df_bank_stat['tx_tmstp_min'] = df_bank_train_test.groupby('id')['time'].agg('min')
# df_bank_stat['tx_max_min_days'] = (df_bank_stat['tx_time_max'] - df_bank_stat['tx_time_min'])
# df_bank_stat['tx_max_min_days'] = df_bank_stat['tx_max_min_days'].map(lambda x : x.days)

df_bank_stat.drop([ 'tx_time_max', 'tx_time_min'], axis = 1, inplace=True)
df_bank_stat

           id        time  direction       amount         time_format
0           0  1224115200          0  8771.350000 2008-10-16 08:00:00
1           0  1224288000          1   310.650000 2008-10-18 08:00:00
2           0  1224460800          1   152.620000 2008-10-20 08:00:00
3           0  1225152000          1    20.490000 2008-10-28 08:00:00
4           0  1226793600          1   173.170000 2008-11-16 08:00:00
...       ...         ...        ...          ...                 ...
646661  71870  1160956800          1   493.403945 2006-10-16 08:00:00
646662  71870  1161043200          0     9.462382 2006-10-17 08:00:00
646663  71870  1161388800          1   222.936072 2006-10-21 08:00:00
646664  71870  1161475200          0   222.936072 2006-10-22 08:00:00
646665  71870  1161475200          1    20.908367 2006-10-22 08:00:00

[2364084 rows x 5 columns]
(2364084, 5)


Unnamed: 0_level_0,tx_max_min_days,tx_count,total_amount,1_amount,0_amount,total_amount_avg,1_amount_avg,0_amount_avg,total_amount_avg2,1_amount_avg2,0_amount_avg2,tx_count_avg,tx_tmstp_max,tx_tmstp_min
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,163,48,71787.000000,12079.500000,59707.500000,440.411043,74.107362,366.303681,1495.562500,251.656250,1243.906250,0.294479,1238198400,1224115200
2,180,48,22406.100000,15883.720000,6522.380000,124.478333,88.242889,36.235444,466.793750,330.910833,135.882917,0.266667,1383955200,1368403200
4,169,93,51163.000000,30823.100000,20339.900000,302.739645,182.385207,120.354438,550.139785,331.431183,218.708602,0.550296,1238284800,1223683200
6,179,61,41733.770000,15385.270000,26348.500000,233.149553,85.951229,147.198324,684.160164,252.217541,431.942623,0.340782,1220227200,1204761600
7,175,66,59958.010000,22642.760000,37315.250000,342.617200,129.387200,213.230000,908.454697,343.072121,565.382576,0.377143,1201996800,1186876800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73515,94,25,9477.597620,5109.004148,4368.593472,100.825507,54.351108,46.474399,379.103905,204.360166,174.743739,0.265957,1161475200,1153353600
73517,180,272,212302.562407,126112.342194,86190.220212,1179.458680,700.624123,478.834557,780.524126,463.648317,316.875810,1.511111,1161475200,1145923200
73522,180,116,45701.276358,24957.995111,20743.281247,253.895980,138.655528,115.240451,393.976520,215.155130,178.821390,0.644444,1161475200,1145923200
73525,180,226,145276.809894,92246.439896,53030.369998,807.093388,512.480222,294.613167,642.817743,408.170088,234.647655,1.255556,1161475200,1145923200


# 合并特征、处理缺失值、保存结果

In [89]:
# 合并交易特征
df_concat = pd.concat([df_train_test, df_bank_stat], axis=1)
print(df_concat.shape)
df_concat.reset_index(inplace=True)
df_concat = df_concat.rename(columns={'index':'id'})
print(df_concat.columns)

# 缺失值处理
df_concat = df_concat.replace([np.inf, -np.inf], np.nan)

col_str = 'career,balance_limit_cut'
for col_name in col_str.split(','):
    value = df_concat[col_name].mode()[0]
    print(col_name, value)
    df_concat[col_name].fillna(value, inplace=True)
    
col_str = 'balance_limit,balance_log,balance_limit_log,balance_account_avg_log,tx_max_min_days,tx_count,total_amount,1_amount,0_amount,total_amount_avg,1_amount_avg,0_amount_avg,total_amount_avg2,1_amount_avg2,0_amount_avg2,tx_count_avg,tx_tmstp_max,tx_tmstp_min' 
for col_name in col_str.split(',') : 
#     value = df_concat[col_name].mean(skipna=True)
    # 替换为0值
    value = 0
    df_concat[col_name].fillna(value,inplace=True)
print('isna \n',df_concat.isna().sum())

#TODO 离群点处理

df_result_train = df_concat.iloc[:df_train.shape[0],:]
df_result_test = df_concat.iloc[df_train.shape[0]:,:]
df_result_test = df_result_test.drop(['label'], axis=1)

#保存结果
df_result_train.to_csv(output_train_path, index=False)
df_result_test.to_csv(output_test_path, index=False)
print('train result', df_result_train.shape)
print('test result', df_result_test.shape)
print('output_path : ', output_train_path, output_test_path)
print('process time : ',time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime()))
# !ls

(73534, 116)
Index(['id', 'title', 'career', 'zip_code', 'residence', 'loan', 'term',
       'interest_rate', 'issue_time', 'syndicated',
       ...
       '0_amount', 'total_amount_avg', '1_amount_avg', '0_amount_avg',
       'total_amount_avg2', '1_amount_avg2', '0_amount_avg2', 'tx_count_avg',
       'tx_tmstp_max', 'tx_tmstp_min'],
      dtype='object', length=117)
career 10.0
balance_limit_cut 28
isna 
 id                                  0
title                               0
career                              0
zip_code                            0
residence                           0
loan                                0
term                                0
interest_rate                       0
issue_time                          0
syndicated                          0
installment                         0
record_time                         0
history_time                        0
total_accounts                      0
balance_accounts                    0
balance_limit     

In [90]:
print('done time : ',time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime()))
!wc -l $output_train_path
!wc -l $output_test_path
df_result_train

done time :  2025-09-01 17:56:43
53481 train.dat.v25
20055 test.dat.v25


Unnamed: 0,id,title,career,zip_code,residence,loan,term,interest_rate,issue_time,syndicated,installment,record_time,history_time,total_accounts,balance_accounts,balance_limit,balance,level,label,cash_has_trans,cash_span_months,cash_total_in_amount,cash_total_in_count,cash_total_in_max,cash_total_out_amount,cash_total_out_count,cash_total_out_max,cash_total_net_amount,cash_month_has_trans_months,cash_month_neg_amount_months,month,month_in_amount,month_in_count,month_in_max,month_out_amount,month_out_count,month_out_max,month_net_amount,cash_month_in_amount_last1m,cash_month_out_amount_last1m,cash_month_net_amount_last1m,cash_month_in_amount_last2m,cash_month_out_amount_last2m,cash_month_net_amount_last2m,cash_month_in_amount_last3m,cash_month_out_amount_last3m,cash_month_net_amount_last3m,cash_month_in_amount_last4m,cash_month_out_amount_last4m,cash_month_net_amount_last4m,cash_month_in_amount_last6m,cash_month_out_amount_last6m,cash_month_net_amount_last6m,cash_avg_out_amount,cash_avg_in_amount,cash_avg_net_amount,cash_aum_month_avg_last_1m,cash_aum_month_avg_last_2m,cash_aum_month_avg_last_3m,cash_aum_month_avg_last_4m,cash_aum_month_avg_last_6m,loan_total_loan,loan_month_repay,max_txn_time,loan_has_repayment,history_time_s,time_account_day,loan_repay_term,loan_remain_repay_term,loan_remain_repay_amount,loan_total_debt,loan_balance_ratio,loan_loan_ratio,balance_account_avg,loan_term_avg,balance_accounts_ratio,loan_log,balance_account_avg_log,loan_term_avg_log,balance_accounts_ratio_log,interest_rate_log,balance_log,balance_limit_log,balance_accounts_log,zip_province,zip_city,level_hash,level_ord,grade,interest_rate_cut,interest_rate_log_cut,balance_cut,loan_cut,balance_limit_cut,loan_term_avg_cut,balance_account_avg_cut,record_time_year,record_time_month,record_time_week,record_time_year_month,level_default_ratio,interest_rate_default_ratio,term_default_ratio,tx_max_min_days,tx_count,total_amount,1_amount,0_amount,total_amount_avg,1_amount_avg,0_amount_avg,total_amount_avg2,1_amount_avg2,0_amount_avg2,tx_count_avg,tx_tmstp_max,tx_tmstp_min
0,0,9,0.0,221373,1,7200,36,0.1095,1238631967,0,1,1238630622,472006661,17.0,9.0,36200.0,13856.00,A4,0.0,1,6.0,59707.500000,6.0,10638.10000,12079.500000,42.0,1301.060000,47628.000000,6.0,0.0,2009-03-01,10328.400000,1.0,10328.400000,763.790000,2.0,410.230000,9564.610000,10328.400000,763.790000,9564.610000,10328.400000,1954.870000,9564.61000,30329.700000,6087.250000,25433.530000,40298.050000,7029.390000,34459.74000,59707.500000,12079.500000,48819.08000,2013.250000,9951.250000,7938.000000,47628.000000,47628.000000,47628.000000,47628.000000,47628.000000,7988.40,221.900000,2009-03-28,0,1984-12-16 00:57:41,8867.0,0,36,7988.400000,21844.400000,0.382762,0.198895,1539.555556,200.000000,0.529412,8.881836,7.339249,5.298317,-0.635989,-2.211831,9.536474,10.496814,2.197225,22,2213,629,4.0,1,10,20,40,4,43,6,42,2009,4,14,200904,0.167864,0.166035,0.148731,163.0,48.0,71787.000000,12079.500000,59707.500000,440.411043,74.107362,366.303681,1495.562500,251.656250,1243.906250,0.294479,1.238198e+09,1.224115e+09
1,1,8,10.0,311681,0,21300,36,0.1295,1128212052,0,0,1161907665,763779041,17.0,9.0,20400.0,13773.00,B0,1.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,24058.35,668.287500,NaT,0,1994-03-16 00:50:41,,0,36,24058.350000,37831.350000,0.675147,1.044118,1530.333333,591.666667,0.529412,9.966462,7.333241,6.382943,-0.635989,-2.044074,9.530465,9.923290,2.197225,31,3116,847,6.0,2,14,25,40,19,35,20,42,2006,10,43,200610,0.183134,0.184066,0.148731,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00
2,2,8,7.0,271562,1,10400,60,0.2105,1249171509,0,0,1383958593,727143443,17.0,9.0,10800.0,2023.00,B4,0.0,1,7.0,6522.380000,4.0,1696.54000,15883.720000,44.0,3394.940000,-9361.340000,7.0,3.0,2013-11-01,,,,3609.080000,3.0,3394.940000,,,3609.080000,,1696.540000,5655.030000,-349.41000,3331.490000,8787.560000,-1846.990000,3331.490000,10702.540000,-1846.99000,5015.840000,13023.330000,-1033.12000,2269.102857,931.768571,-1337.334286,-9273.233333,-5538.170000,-5648.978333,-3118.624286,-1375.686667,12589.20,209.820000,2013-11-09,1,1993-01-16 00:17:23,7601.0,52,8,1678.560000,3701.560000,0.187315,0.962963,224.777778,173.333333,0.529412,9.249561,5.415112,5.155217,-0.635989,-1.558270,7.612337,9.287301,2.197225,27,2715,327,10.0,2,30,38,19,10,29,4,1,2013,11,45,201311,0.238525,0.241333,0.316692,180.0,48.0,22406.100000,15883.720000,6522.380000,124.478333,88.242889,36.235444,466.793750,330.910833,135.882917,0.266667,1.383955e+09,1.368403e+09
3,3,7,2.0,522083,0,33050,36,0.1640,1172882234,0,1,1214353935,687660346,17.0,9.0,24700.0,21992.00,B3,0.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,38470.20,1068.616667,NaT,0,1991-10-17 00:45:46,,0,36,38470.200000,60462.200000,0.890364,1.338057,2443.555556,918.055556,0.529412,10.405777,7.801209,6.822258,-0.635989,-1.807889,9.998434,10.114559,2.197225,52,5220,414,9.0,2,21,31,45,22,38,27,46,2008,6,26,200806,0.233343,0.229479,0.148731,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00
4,4,8,3.0,101026,1,5200,36,0.1435,1172882384,0,0,1240274527,322012875,17.0,9.0,5100.0,1669.00,B2,1.0,1,6.0,20339.900000,6.0,3672.20000,30823.100000,87.0,3561.840000,-10483.200000,6.0,3.0,2009-03-01,3344.250000,1.0,3344.250000,12591.130000,25.0,3561.840000,-9246.880000,3344.250000,12591.130000,-9246.880000,6538.000000,14381.290000,-7843.29000,10210.200000,21544.170000,-11333.970000,13370.350000,23091.210000,-9720.86000,20339.900000,30823.100000,-10483.20000,5137.183333,3389.983333,-1747.200000,-6613.362917,-402.454000,1568.347500,550.397778,-294.607273,5946.20,165.172222,2009-03-29,1,1980-03-16 00:01:15,10604.0,25,11,1816.894444,3485.894444,0.327255,1.019608,185.444444,144.444444,0.529412,8.556414,5.222755,4.972895,-0.635989,-1.941420,7.419980,8.536996,2.197225,10,1010,492,8.0,2,17,28,16,1,24,1,1,2009,4,17,200904,0.215479,0.221494,0.148731,169.0,93.0,51163.000000,30823.100000,20339.900000,302.739645,182.385207,120.354438,550.139785,331.431183,218.708602,0.550296,1.238285e+09,1.223683e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53475,53475,2,2.0,603000,1,9000,12,0.2355,1172880000,0,0,1157587200,1061769600,12.0,5.0,3535.0,2595.73,A4,0.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,11119.50,926.625000,NaT,0,2003-08-25 00:00:00,,0,12,11119.500000,13715.230000,0.734294,2.545969,519.146000,750.000000,0.416667,9.104980,6.252185,6.620073,-0.875469,-1.446044,7.861623,8.170469,1.609438,60,6030,629,4.0,1,35,42,23,7,20,23,9,2006,9,36,200609,0.167864,0.218524,0.182842,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00
53476,53476,0,10.0,601702,1,8000,12,0.3070,1160092800,0,0,1138665600,1038268800,5.0,2.0,1965.0,1433.34,B2,0.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,10456.00,871.333333,NaT,0,2002-11-26 00:00:00,,0,12,10456.000000,11889.340000,0.729435,4.071247,716.670000,666.666667,0.400000,8.987197,6.574615,6.502290,-0.916291,-1.180908,7.267763,7.583248,0.693147,60,6017,492,8.0,2,48,49,13,5,12,21,18,2006,1,5,200601,0.215479,0.178876,0.182842,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00
53477,53477,2,10.0,602808,1,10000,12,0.0940,1180310400,0,0,1108771200,1087603200,12.0,5.0,7253.0,3813.79,B2,0.0,1,7.0,44160.408112,112.0,1409.02403,69300.836223,139.0,2215.151184,-25140.428112,7.0,6.0,2006-10-01,4566.674466,13.0,851.874207,6235.746855,13.0,1176.692442,-1669.072389,4566.674466,6235.746855,-1669.072389,14253.790478,19131.482308,-4877.69183,23688.804651,33813.686172,-10124.881521,31954.603178,45277.872759,-13323.26958,44160.408112,69291.373841,-25130.96573,9900.119460,6308.629730,-3591.489730,-24030.025452,-21880.937345,-17163.032750,-14052.944817,-1880.823969,10940.00,911.666667,2006-10-22,0,2004-06-19 00:00:00,855.0,0,12,10940.000000,14753.790000,0.525822,1.378740,762.758000,833.333333,0.416667,9.210340,6.636941,6.725434,-0.875469,-2.364460,8.246379,8.889170,1.609438,60,6028,492,8.0,2,7,16,27,9,26,25,21,2005,2,7,200502,0.215479,0.140234,0.182842,180.0,251.0,113461.244335,69300.836223,44160.408112,630.340246,385.004646,245.335601,452.036830,276.098949,175.937881,1.394444,1.161475e+09,1.145923e+09
53478,53478,0,10.0,602102,2,9000,12,0.2440,1176768000,0,0,1159660800,1071792000,3.0,3.0,2045.0,1006.40,A2,0.0,0,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,0.000000,0.000000,0.000000,,,,,,11196.00,933.000000,NaT,0,2003-12-19 00:00:00,,0,12,11196.000000,12202.400000,0.492127,4.400978,335.466667,750.000000,1.000000,9.104980,5.815523,6.620073,0.000000,-1.410587,6.914135,7.623153,1.098612,60,6021,138,2.0,1,36,43,10,7,12,23,3,2006,10,39,200610,0.132637,0.192063,0.182842,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00
