## Data

In [2]:
import numpy as np
import pandas as pd

In [3]:
DATA_PATH = '../data/train_data.csv'
dtype = {
    'userID': 'int16',
    'answerCode': 'int8',
    'KnowledgeTag': 'int16'
}

data = pd.read_csv(DATA_PATH, dtype=dtype, parse_dates=['Timestamp'])
data = data.sort_values(by=['userID', 'Timestamp']).reset_index(drop=True)
data

Unnamed: 0,userID,assessmentItemID,testId,answerCode,Timestamp,KnowledgeTag
0,0,A060001001,A060000001,1,2020-03-24 00:17:11,7224
1,0,A060001002,A060000001,1,2020-03-24 00:17:14,7225
2,0,A060001003,A060000001,1,2020-03-24 00:17:22,7225
3,0,A060001004,A060000001,1,2020-03-24 00:17:29,7225
4,0,A060001005,A060000001,1,2020-03-24 00:17:36,7225
...,...,...,...,...,...,...
2266581,7441,A030071005,A030000071,0,2020-06-05 06:50:21,438
2266582,7441,A040165001,A040000165,1,2020-08-21 01:06:39,8836
2266583,7441,A040165002,A040000165,1,2020-08-21 01:06:50,8836
2266584,7441,A040165003,A040000165,1,2020-08-21 01:07:36,8836


In [19]:
df = data.copy()

In [20]:
df['question'] = df['assessmentItemID'].apply(lambda x: x[1:])
df['testID'] = df['assessmentItemID'].apply(lambda x: x[1:7])
df['testID1'] = df['assessmentItemID'].apply(lambda x: x[1:4])
df['testID2'] = df['assessmentItemID'].apply(lambda x: x[4:7])
df['testNum'] = df['assessmentItemID'].apply(lambda x: x[7:])
df['KnowledgeTag'] = df['KnowledgeTag'].astype('str')
df['Time'] = (df.groupby(['userID','testID'])['Timestamp'].shift(-1) - df['Timestamp']).apply(lambda x: x.seconds)
df['Time'] = df['Time'].ffill().astype('int')
df = df[['userID','question','testID','testID1','testID2','testNum','KnowledgeTag','Timestamp','Time','answerCode']]
df

Unnamed: 0,userID,question,testID,testID1,testID2,testNum,KnowledgeTag,Timestamp,Time,answerCode
0,0,060001001,060001,060,001,001,7224,2020-03-24 00:17:11,3,1
1,0,060001002,060001,060,001,002,7225,2020-03-24 00:17:14,8,1
2,0,060001003,060001,060,001,003,7225,2020-03-24 00:17:22,7,1
3,0,060001004,060001,060,001,004,7225,2020-03-24 00:17:29,7,1
4,0,060001005,060001,060,001,005,7225,2020-03-24 00:17:36,11,1
...,...,...,...,...,...,...,...,...,...,...
2266581,7441,030071005,030071,030,071,005,438,2020-06-05 06:50:21,24,0
2266582,7441,040165001,040165,040,165,001,8836,2020-08-21 01:06:39,11,1
2266583,7441,040165002,040165,040,165,002,8836,2020-08-21 01:06:50,46,1
2266584,7441,040165003,040165,040,165,003,8836,2020-08-21 01:07:36,73,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2266586 entries, 0 to 2266585
Data columns (total 9 columns):
 #   Column        Dtype         
---  ------        -----         
 0   userID        int16         
 1   testID        object        
 2   testID1       object        
 3   testID2       object        
 4   testNum       object        
 5   KnowledgeTag  object        
 6   Timestamp     datetime64[ns]
 7   Time          int64         
 8   answerCode    int8          
dtypes: datetime64[ns](1), int16(1), int64(1), int8(1), object(5)
memory usage: 127.5+ MB


In [6]:
df.isnull().sum()

userID          0
testID          0
testID1         0
testID2         0
testNum         0
KnowledgeTag    0
Timestamp       0
Time            0
answerCode      0
dtype: int64

## Feature Engineering

### shift

In [21]:
df['Time_s1'] = df.groupby(['userID','testID'])['Time'].shift(1)
df['Time_s2'] = df.groupby(['userID','testID'])['Time'].shift(2)
df['Time_s3'] = df.groupby(['userID','testID'])['Time'].shift(3)
# df['Time_s4'] = df.groupby(['userID','testID'])['Time'].shift(4)
# df['Time_s5'] = df.groupby(['userID','testID'])['Time'].shift(5)

df['answer_s1'] = df.groupby(['userID','testID'])['answerCode'].shift(1)
df['answer_s2'] = df.groupby(['userID','testID'])['answerCode'].shift(2)
df['answer_s3'] = df.groupby(['userID','testID'])['answerCode'].shift(3)
# df['answer_s4'] = df.groupby(['userID','testID'])['answerCode'].shift(4)
# df['answer_s5'] = df.groupby(['userID','testID'])['answerCode'].shift(5)

In [10]:
print(len(df))
df.isnull().sum()

2266586


userID                0
testID                0
testID1               0
testID2               0
testNum               0
KnowledgeTag          0
Timestamp             0
Time                  0
answerCode            0
Time_s1          365164
Time_s2          730328
Time_s3         1095233
Time_s4         1458558
Time_s5         1809606
answer_s1        365164
answer_s2        730328
answer_s3       1095233
answer_s4       1458558
answer_s5       1809606
dtype: int64

### 이동 평균

In [22]:
df['rolling_Time'] = df.groupby(['userID'])['Time'].rolling(3).mean().values
df['rolling_correct'] = df.groupby(['userID'])['answerCode'].rolling(3).mean().values
df

Unnamed: 0,userID,question,testID,testID1,testID2,testNum,KnowledgeTag,Timestamp,Time,answerCode,Time_s1,Time_s2,Time_s3,answer_s1,answer_s2,answer_s3,rolling_Time,rolling_correct
0,0,060001001,060001,060,001,001,7224,2020-03-24 00:17:11,3,1,,,,,,,,
1,0,060001002,060001,060,001,002,7225,2020-03-24 00:17:14,8,1,3.0,,,1.0,,,,
2,0,060001003,060001,060,001,003,7225,2020-03-24 00:17:22,7,1,8.0,3.0,,1.0,1.0,,6.000000,1.000000
3,0,060001004,060001,060,001,004,7225,2020-03-24 00:17:29,7,1,7.0,8.0,3.0,1.0,1.0,1.0,7.333333,1.000000
4,0,060001005,060001,060,001,005,7225,2020-03-24 00:17:36,11,1,7.0,7.0,8.0,1.0,1.0,1.0,8.333333,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2266581,7441,030071005,030071,030,071,005,438,2020-06-05 06:50:21,24,0,24.0,34.0,40.0,0.0,1.0,0.0,27.333333,0.333333
2266582,7441,040165001,040165,040,165,001,8836,2020-08-21 01:06:39,11,1,,,,,,,19.666667,0.333333
2266583,7441,040165002,040165,040,165,002,8836,2020-08-21 01:06:50,46,1,11.0,,,1.0,,,27.000000,0.666667
2266584,7441,040165003,040165,040,165,003,8836,2020-08-21 01:07:36,73,1,46.0,11.0,,1.0,1.0,,43.333333,1.000000


### 누적합 : User 별 문제 수 / 정답 수 / 정답률

In [23]:
# 문제 수
df[f'user_past_count'] = df.groupby('userID').cumcount()
# 정답 수
df['shift'] = df.groupby('userID')['answerCode'].shift().fillna(0)
df[f'user_past_correct'] = df.groupby('userID')['shift'].cumsum()
df = df.drop('shift', axis=1)
# 정답률
df[f'user_past_average_correct'] = (df[f'user_past_correct'] / df[f'user_past_count']).fillna(0)
df

Unnamed: 0,userID,question,testID,testID1,testID2,testNum,KnowledgeTag,Timestamp,Time,answerCode,...,Time_s2,Time_s3,answer_s1,answer_s2,answer_s3,rolling_Time,rolling_correct,user_past_count,user_past_correct,user_past_average_correct
0,0,060001001,060001,060,001,001,7224,2020-03-24 00:17:11,3,1,...,,,,,,,,0,0.0,0.000000
1,0,060001002,060001,060,001,002,7225,2020-03-24 00:17:14,8,1,...,,,1.0,,,,,1,1.0,1.000000
2,0,060001003,060001,060,001,003,7225,2020-03-24 00:17:22,7,1,...,3.0,,1.0,1.0,,6.000000,1.000000,2,2.0,1.000000
3,0,060001004,060001,060,001,004,7225,2020-03-24 00:17:29,7,1,...,8.0,3.0,1.0,1.0,1.0,7.333333,1.000000,3,3.0,1.000000
4,0,060001005,060001,060,001,005,7225,2020-03-24 00:17:36,11,1,...,7.0,8.0,1.0,1.0,1.0,8.333333,1.000000,4,4.0,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2266581,7441,030071005,030071,030,071,005,438,2020-06-05 06:50:21,24,0,...,34.0,40.0,0.0,1.0,0.0,27.333333,0.333333,4,1.0,0.250000
2266582,7441,040165001,040165,040,165,001,8836,2020-08-21 01:06:39,11,1,...,,,,,,19.666667,0.333333,5,1.0,0.200000
2266583,7441,040165002,040165,040,165,002,8836,2020-08-21 01:06:50,46,1,...,,,1.0,,,27.000000,0.666667,6,2.0,0.333333
2266584,7441,040165003,040165,040,165,003,8836,2020-08-21 01:07:36,73,1,...,11.0,,1.0,1.0,,43.333333,1.000000,7,3.0,0.428571


### 누적합 : Feature 별 문제 수 / 정답 수 / 정답률

In [24]:
feature_list = ['question','testID','testID1','testID2','testNum','KnowledgeTag']
for feature in feature_list:
    # 문제 수
    df[f'{feature}_past_count'] = df.groupby(feature).cumcount()
    # 정답 수
    df['shift'] = df.groupby(feature)['answerCode'].shift().fillna(0)
    df[f'{feature}_past_correct'] = df.groupby(feature)['shift'].cumsum()
    df = df.drop('shift', axis=1)
    # 정답률
    df[f'{feature}_past_average_correct'] = (df[f'{feature}_past_correct'] / df[f'{feature}_past_count']).fillna(0)
df

Unnamed: 0,userID,question,testID,testID1,testID2,testNum,KnowledgeTag,Timestamp,Time,answerCode,...,testID1_past_average_correct,testID2_past_count,testID2_past_correct,testID2_past_average_correct,testNum_past_count,testNum_past_correct,testNum_past_average_correct,KnowledgeTag_past_count,KnowledgeTag_past_correct,KnowledgeTag_past_average_correct
0,0,060001001,060001,060,001,001,7224,2020-03-24 00:17:11,3,1,...,0.000000,0,0.0,0.000000,0,0.0,0.000000,0,0.0,0.000000
1,0,060001002,060001,060,001,002,7225,2020-03-24 00:17:14,8,1,...,1.000000,1,1.0,1.000000,0,0.0,0.000000,0,0.0,0.000000
2,0,060001003,060001,060,001,003,7225,2020-03-24 00:17:22,7,1,...,1.000000,2,2.0,1.000000,0,0.0,0.000000,1,1.0,1.000000
3,0,060001004,060001,060,001,004,7225,2020-03-24 00:17:29,7,1,...,1.000000,3,3.0,1.000000,0,0.0,0.000000,2,2.0,1.000000
4,0,060001005,060001,060,001,005,7225,2020-03-24 00:17:36,11,1,...,1.000000,4,4.0,1.000000,0,0.0,0.000000,3,3.0,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2266581,7441,030071005,030071,030,071,005,438,2020-06-05 06:50:21,24,0,...,0.702240,13697,8751.0,0.638899,360606,216052.0,0.599136,4079,2814.0,0.689875
2266582,7441,040165001,040165,040,165,001,8836,2020-08-21 01:06:39,11,1,...,0.684052,7542,5359.0,0.710554,371754,278784.0,0.749915,3147,2195.0,0.697490
2266583,7441,040165002,040165,040,165,002,8836,2020-08-21 01:06:50,46,1,...,0.684053,7543,5360.0,0.710593,370971,267122.0,0.720062,3148,2196.0,0.697586
2266584,7441,040165003,040165,040,165,003,8836,2020-08-21 01:07:36,73,1,...,0.684054,7544,5361.0,0.710631,371639,255603.0,0.687772,3149,2197.0,0.697682


### 누적합 : User & Feature 별 문제 수 / 정답 수 / 정답률

In [25]:
feature_list = ['question','testID','testID1','testID2','testNum','KnowledgeTag']
for feature in feature_list:
    # 문제 수
    df[f'user_{feature}_past_count'] = df.groupby(['userID',feature]).cumcount()
    # 정답 수
    df['shift'] = df.groupby(['userID',feature])['answerCode'].shift().fillna(0)
    df[f'user_{feature}_past_correct'] = df.groupby(['userID',feature])['shift'].cumsum()
    df = df.drop('shift', axis=1)
    # 정답률
    df[f'user_{feature}_past_average_correct'] = (df[f'user_{feature}_past_correct'] / df[f'user_{feature}_past_count']).fillna(0)
df

Unnamed: 0,userID,question,testID,testID1,testID2,testNum,KnowledgeTag,Timestamp,Time,answerCode,...,user_testID1_past_average_correct,user_testID2_past_count,user_testID2_past_correct,user_testID2_past_average_correct,user_testNum_past_count,user_testNum_past_correct,user_testNum_past_average_correct,user_KnowledgeTag_past_count,user_KnowledgeTag_past_correct,user_KnowledgeTag_past_average_correct
0,0,060001001,060001,060,001,001,7224,2020-03-24 00:17:11,3,1,...,0.00,0,0.0,0.00,0,0.0,0.0,0,0.0,0.00
1,0,060001002,060001,060,001,002,7225,2020-03-24 00:17:14,8,1,...,1.00,1,1.0,1.00,0,0.0,0.0,0,0.0,0.00
2,0,060001003,060001,060,001,003,7225,2020-03-24 00:17:22,7,1,...,1.00,2,2.0,1.00,0,0.0,0.0,1,1.0,1.00
3,0,060001004,060001,060,001,004,7225,2020-03-24 00:17:29,7,1,...,1.00,3,3.0,1.00,0,0.0,0.0,2,2.0,1.00
4,0,060001005,060001,060,001,005,7225,2020-03-24 00:17:36,11,1,...,1.00,4,4.0,1.00,0,0.0,0.0,3,3.0,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2266581,7441,030071005,030071,030,071,005,438,2020-06-05 06:50:21,24,0,...,0.25,4,1.0,0.25,0,0.0,0.0,4,1.0,0.25
2266582,7441,040165001,040165,040,165,001,8836,2020-08-21 01:06:39,11,1,...,0.00,0,0.0,0.00,1,0.0,0.0,0,0.0,0.00
2266583,7441,040165002,040165,040,165,002,8836,2020-08-21 01:06:50,46,1,...,1.00,1,1.0,1.00,1,0.0,0.0,1,1.0,1.00
2266584,7441,040165003,040165,040,165,003,8836,2020-08-21 01:07:36,73,1,...,1.00,2,2.0,1.00,1,1.0,1.0,2,2.0,1.00


### 상관계수

In [26]:
print(len(df.columns))
df.columns

57


Index(['userID', 'question', 'testID', 'testID1', 'testID2', 'testNum',
       'KnowledgeTag', 'Timestamp', 'Time', 'answerCode', 'Time_s1', 'Time_s2',
       'Time_s3', 'answer_s1', 'answer_s2', 'answer_s3', 'rolling_Time',
       'rolling_correct', 'user_past_count', 'user_past_correct',
       'user_past_average_correct', 'question_past_count',
       'question_past_correct', 'question_past_average_correct',
       'testID_past_count', 'testID_past_correct',
       'testID_past_average_correct', 'testID1_past_count',
       'testID1_past_correct', 'testID1_past_average_correct',
       'testID2_past_count', 'testID2_past_correct',
       'testID2_past_average_correct', 'testNum_past_count',
       'testNum_past_correct', 'testNum_past_average_correct',
       'KnowledgeTag_past_count', 'KnowledgeTag_past_correct',
       'KnowledgeTag_past_average_correct', 'user_question_past_count',
       'user_question_past_correct', 'user_question_past_average_correct',
       'user_testID_past

In [27]:
df[['answerCode', 'userID', 'question', 'testID', 'testID1', 'testID2',
    'testNum', 'KnowledgeTag', 'Timestamp', 'Time']].corr()

Unnamed: 0,answerCode,userID,question,testID,testID1,testID2,testNum,KnowledgeTag,Timestamp,Time
answerCode,1.0,-0.035235,-0.201803,-0.201803,-0.201841,0.0587,-0.165713,0.012086,-0.036482,-0.000902
userID,-0.035235,1.0,-0.065527,-0.065527,-0.065891,0.179791,-0.021852,-0.015064,0.126468,-0.016541
question,-0.201803,-0.065527,1.0,1.0,0.999998,-0.204635,0.236795,-0.150523,0.044949,0.000802
testID,-0.201803,-0.065527,1.0,1.0,0.999998,-0.204635,0.236795,-0.150523,0.044949,0.000802
testID1,-0.201841,-0.065891,0.999998,0.999998,1.0,-0.206732,0.236791,-0.150631,0.043472,0.000809
testID2,0.0587,0.179791,-0.204635,-0.204635,-0.206732,1.0,-0.046904,0.080247,0.664978,-0.003525
testNum,-0.165713,-0.021852,0.236795,0.236795,0.236791,-0.046904,1.0,0.012776,0.010575,0.048217
KnowledgeTag,0.012086,-0.015064,-0.150523,-0.150523,-0.150631,0.080247,0.012776,1.0,0.130494,3.8e-05
Timestamp,-0.036482,0.126468,0.044949,0.044949,0.043472,0.664978,0.010575,0.130494,1.0,-0.015716
Time,-0.000902,-0.016541,0.000802,0.000802,0.000809,-0.003525,0.048217,3.8e-05,-0.015716,1.0


In [29]:
df[['answerCode','Time_s1', 'Time_s2', 'Time_s3',
    'answer_s1', 'answer_s2', 'answer_s3', 'rolling_Time', 'rolling_correct',
    'user_past_count', 'user_past_correct', 'user_past_average_correct']].corr()

Unnamed: 0,answerCode,Time_s1,Time_s2,Time_s3,answer_s1,answer_s2,answer_s3,rolling_Time,rolling_correct,user_past_count,user_past_correct,user_past_average_correct
answerCode,1.0,0.013902,0.012093,0.013409,0.367876,0.343081,0.324096,0.012775,0.724547,0.012336,0.079323,0.290078
Time_s1,0.013902,1.0,0.001249,-0.002164,-0.003971,0.000916,0.004059,0.169914,0.003212,0.029408,0.030857,0.011088
Time_s2,0.012093,0.001249,1.0,-0.000135,0.012418,-0.007025,-0.000761,0.197306,0.006231,0.03245,0.034029,0.012107
Time_s3,0.013409,-0.002164,-0.000135,1.0,0.01055,0.01241,-0.009621,0.00123,0.016032,0.036474,0.038311,0.014085
answer_s1,0.367876,-0.003971,0.012418,0.01055,1.0,0.37413,0.350807,0.011397,0.751686,0.0125,0.084159,0.341927
answer_s2,0.343081,0.000916,-0.007025,0.01241,0.37413,1.0,0.379971,0.008645,0.74396,0.010222,0.085053,0.350382
answer_s3,0.324096,0.004059,-0.000761,-0.009621,0.350807,0.379971,1.0,0.01233,0.462428,0.009319,0.087166,0.358018
rolling_Time,0.012775,0.169914,0.197306,0.00123,0.011397,0.008645,0.01233,1.0,0.012375,0.003867,0.008153,0.020916
rolling_correct,0.724547,0.003212,0.006231,0.016032,0.751686,0.74396,0.462428,0.012375,1.0,0.017487,0.111642,0.439339
user_past_count,0.012336,0.029408,0.03245,0.036474,0.0125,0.010222,0.009319,0.003867,0.017487,1.0,0.952046,0.085161


In [30]:
df[['answerCode', 
    'question_past_count', 'question_past_correct', 'question_past_average_correct',
    'testID_past_count', 'testID_past_correct', 'testID_past_average_correct',
    'testID1_past_count', 'testID1_past_correct', 'testID1_past_average_correct',
    'testID2_past_count', 'testID2_past_correct', 'testID2_past_average_correct',
    'testNum_past_count', 'testNum_past_correct', 'testNum_past_average_correct',
    'KnowledgeTag_past_count', 'KnowledgeTag_past_correct', 'KnowledgeTag_past_average_correct',]].corr()

Unnamed: 0,answerCode,question_past_count,question_past_correct,question_past_average_correct,testID_past_count,testID_past_correct,testID_past_average_correct,testID1_past_count,testID1_past_correct,testID1_past_average_correct,testID2_past_count,testID2_past_correct,testID2_past_average_correct,testNum_past_count,testNum_past_correct,testNum_past_average_correct,KnowledgeTag_past_count,KnowledgeTag_past_correct,KnowledgeTag_past_average_correct
answerCode,1.0,-0.059749,0.119692,0.377333,-0.107166,-0.004687,0.294803,-0.022178,0.029185,0.22338,-0.071605,-0.057767,0.122733,0.028425,0.046287,0.170441,0.014195,0.075194,0.284245
question_past_count,-0.059749,1.0,0.86518,-0.038786,0.914466,0.904425,-0.085568,0.815037,0.76992,-0.073303,0.875508,0.883914,-0.137408,0.765014,0.733077,-0.042813,0.693722,0.635031,-0.060619
question_past_correct,0.119692,0.86518,1.0,0.367724,0.71478,0.861477,0.235334,0.780214,0.807365,0.182329,0.749638,0.778468,-0.000388,0.780073,0.771622,0.129023,0.691798,0.722385,0.246353
question_past_average_correct,0.377333,-0.038786,0.367724,1.0,-0.145777,0.08952,0.688588,0.066702,0.169209,0.48276,-0.062438,-0.028354,0.273489,0.156082,0.191154,0.369706,0.099285,0.229154,0.650315
testID_past_count,-0.107166,0.914466,0.71478,-0.145777,1.0,0.913586,-0.232492,0.725188,0.616182,-0.289278,0.821174,0.82343,-0.163301,0.591159,0.555284,-0.171949,0.566077,0.469219,-0.213881
testID_past_correct,-0.004687,0.904425,0.861477,0.08952,0.913586,1.0,0.098226,0.805959,0.768922,-0.021405,0.808019,0.834853,-0.028498,0.673883,0.639388,-0.117162,0.661681,0.648688,0.088251
testID_past_average_correct,0.294803,-0.085568,0.235334,0.688588,-0.232492,0.098226,1.0,0.061596,0.207686,0.684915,-0.118773,-0.071662,0.407681,0.092914,0.100067,0.15592,0.119589,0.298315,0.866068
testID1_past_count,-0.022178,0.815037,0.780214,0.066702,0.725188,0.805959,0.061596,1.0,0.965796,0.065856,0.779553,0.812202,0.045994,0.847025,0.811436,-0.049709,0.657778,0.634255,0.061253
testID1_past_correct,0.029185,0.76992,0.807365,0.169209,0.616182,0.768922,0.207686,0.965796,1.0,0.281469,0.716036,0.752153,0.081825,0.851107,0.820239,0.009473,0.693233,0.712152,0.218575
testID1_past_average_correct,0.22338,-0.073303,0.182329,0.48276,-0.289278,-0.021405,0.684915,0.065856,0.281469,1.0,-0.124903,-0.106353,0.177602,0.116564,0.127846,0.236643,0.188867,0.339401,0.732196


In [31]:
df[['answerCode', 
    'user_question_past_count', 'user_question_past_correct', 'user_question_past_average_correct',
    'user_testID_past_count', 'user_testID_past_correct', 'user_testID_past_average_correct',
    'user_testID1_past_count', 'user_testID1_past_correct', 'user_testID1_past_average_correct',
    'user_testID2_past_count', 'user_testID2_past_correct', 'user_testID2_past_average_correct',
    'user_testNum_past_count', 'user_testNum_past_correct', 'user_testNum_past_average_correct',
    'user_KnowledgeTag_past_count', 'user_KnowledgeTag_past_correct', 'user_KnowledgeTag_past_average_correct']].corr()

Unnamed: 0,answerCode,user_question_past_count,user_question_past_correct,user_question_past_average_correct,user_testID_past_count,user_testID_past_correct,user_testID_past_average_correct,user_testID1_past_count,user_testID1_past_correct,user_testID1_past_average_correct,user_testID2_past_count,user_testID2_past_correct,user_testID2_past_average_correct,user_testNum_past_count,user_testNum_past_correct,user_testNum_past_average_correct,user_KnowledgeTag_past_count,user_KnowledgeTag_past_correct,user_KnowledgeTag_past_average_correct
answerCode,1.0,-0.004976,0.009079,0.009796,-0.154971,0.114325,0.258937,0.028464,0.140285,0.407106,-0.12287,0.07009,0.244501,0.056689,0.119616,0.263451,-0.017581,0.131991,0.252647
user_question_past_count,-0.004976,1.0,0.820548,0.811143,0.420927,0.337216,0.028744,0.214107,0.195015,0.010644,0.278083,0.234182,0.024525,0.092403,0.085091,0.015453,0.271382,0.234789,0.037339
user_question_past_correct,0.009079,0.820548,1.0,0.993276,0.325471,0.327195,0.054267,0.179895,0.182354,0.033006,0.215089,0.229341,0.050995,0.085107,0.089485,0.038891,0.228404,0.234906,0.062484
user_question_past_average_correct,0.009796,0.811143,0.993276,1.0,0.320178,0.323722,0.054887,0.178446,0.182202,0.033822,0.211786,0.22714,0.051601,0.085326,0.090253,0.039637,0.224637,0.232805,0.063162
user_testID_past_count,-0.154971,0.420927,0.325471,0.320178,1.0,0.761468,0.259411,0.12065,0.079553,-0.065049,0.698333,0.560956,0.230887,-0.124899,-0.183577,-0.30131,0.37979,0.300574,0.183337
user_testID_past_correct,0.114325,0.337216,0.327195,0.323722,0.761468,1.0,0.647945,0.121972,0.185383,0.303248,0.51953,0.722224,0.602666,-0.065098,-0.074443,-0.059195,0.339897,0.472047,0.514894
user_testID_past_average_correct,0.258937,0.028744,0.054267,0.054887,0.259411,0.647945,1.0,0.042515,0.14751,0.461161,0.165107,0.459169,0.926923,0.01728,0.040045,0.120195,0.162661,0.36438,0.747775
user_testID1_past_count,0.028464,0.214107,0.179895,0.178446,0.12065,0.121972,0.042515,1.0,0.927721,0.081609,0.13107,0.134065,0.048906,0.721306,0.6702,0.118452,0.310366,0.295089,0.080369
user_testID1_past_correct,0.140285,0.195015,0.182354,0.182202,0.079553,0.185383,0.14751,0.927721,1.0,0.312677,0.083232,0.164678,0.143248,0.684567,0.707547,0.25768,0.312965,0.37223,0.191418
user_testID1_past_average_correct,0.407106,0.010644,0.033006,0.033822,-0.065049,0.303248,0.461161,0.081609,0.312677,1.0,-0.075799,0.194945,0.433909,0.085809,0.198236,0.503093,0.082906,0.294316,0.468288
