In [1]:
import sys
sys.path.append("..")

from dkt_dataset import Preprocess
from helper import train_base_lstm

In [2]:
import pandas as pd
from IPython.display import clear_output
from fe.feature import FEPipeline
from fe.agg import (
    AggFeBase,
    MakeCorrectCount, 
    MakeCorrectPercent, 
    MakeQuestionCount, 
    MakeTopNCorrectPercent
)

from fe.seq import (
    SplitAssessmentItemID,
    MakeFirstClass,
    MakeSecondClass,
    MakeTimeDiff,
    MakeYMD,
    ConvertTime
)

class MakeDifficultyByFirstClass(AggFeBase):
    name = "make_difficulty_by_firstclass"
    description = {
        "firstClassDifficulty" : "유저의 대분류별 정답률을 나타냅니다."
    }
    
    pre_fe = {"make_first_class"}
    agg_column = ["userID", "firstClass"]
    
    @classmethod
    def _transform(cls, df):
        grouped_df = df.groupby(cls.agg_column).answerCode.mean()
        a, b = zip(*grouped_df.index)
        
        right_df = pd.DataFrame(
            {"userID": a, "firstClass": b, "firstClassDifficulty": list(grouped_df)}
        )
        
        return right_df

In [36]:
from utils import get_args, get_root_dir
from helper import FeatureTestTrainer

In [20]:
class NewSplitPreprocess(Preprocess):
    def split_data(self):
        self.logger.info("Split based on User")
        self.logger.info(f"Original Train Dataset: {len(self.datas['train'])}")

        self.datas["valid"] = self.datas["test"][self.datas["test"].answerCode != -1]

        self.logger.info(f"Split Train Dataset: {len(self.datas['train'])}")
        self.logger.info(f"Split Valid Dataset: {len(self.datas['valid'])}")

In [21]:
args = get_args()
args.root_dir = get_root_dir("../split_test")
args.data_dir = "../../input/data/train_dataset/"

In [22]:
fe_pipeline = FEPipeline(
    args, [
        SplitAssessmentItemID,
        ConvertTime,
        MakeFirstClass,
        MakeSecondClass,
        MakeCorrectCount,
        MakeQuestionCount,
        MakeCorrectPercent,
        MakeDifficultyByFirstClass
    ]
)

columns = [
    "userID",
    "answerCode",
    "testPaper",
    "timeSec",
    "firstClass",
    "secondClass",
    "correctPer",
    "firstClassDifficulty"
]

pre_encoders = {
    "label": ["testPaper", "firstClass", "secondClass"],
    "min_max": ["correctPer"],
    "std": ["timeSec", "firstClassDifficulty"],
}

In [23]:
args.columns = columns[1:]

In [24]:
fe_pipeline.debug()
fe_pipeline.description()

[Feature Descriptions]

feature name : base_feature
feature type : seq
 - userID               : 사용자의 고유 번호입니다. 총 7,442명의 학생이 있습니다
 - assessmentItemID     : 사용자가 푼 문항의 일련 번호입니다.
 - testID               : 사용자가 푼 문항이 포함된 시험지의 일련 번호입니다.
 - answerCode           : 사용자가 푼 문항의 정답 여부를 담고 있는 이진 (0/1) 데이터입니다.
 - Timestamp            : 사용자가 문항을 푼 시간 정보입니다.
 - KnowledgeTag         : 사용자가 푼 문항의 고유 태그가 담겨져 있습니다.

feature name : split_assessmentitem_id
feature type : seq
 - testPaper            : 시험지 번호입니다.
 - testPaperCnt         : 시험지의 문항 번호입니다.

feature name : convert_time
feature type : seq
 - timeSec              : 사용자가 문항을 푼 타임스태프 정보입니다.

feature name : make_first_class
feature type : seq
 - firstClass           : 대분류에 해당합니다.

feature name : make_second_class
feature type : seq
 - secondClass          : 중분류에 해당합니다.

feature name : make_correct_count
feature type : agg
 - correctCnt           : 사용자가 맞춘 문항수를 나타냅니다.

feature name : make_question_count
feature type : agg
 - quesCnt              : 사

In [29]:
args.columns = ["userID"] + args.columns

In [66]:
preprocess = NewSplitPreprocess(args, fe_pipeline, columns)

In [67]:
preprocess.feature_engineering() # feature 
preprocess.split_data() # train => train, valid
preprocess.scaling(pre_encoders) # vector로 바꿔주고 Scaling해주고
preprocess.data_augmentation(choices=[3]) # [1, 2] 1: test_data추가, 2: user_id 기준 group_by 
clear_output()

In [68]:
args.columns

['answerCode',
 'testPaper',
 'timeSec',
 'firstClass',
 'secondClass',
 'correctPer',
 'firstClassDifficulty']

In [81]:
len(set(preprocess.get_data("train").userID.unique()).intersection(set(preprocess.get_data("test").userID.unique())))

0

In [82]:
len(set(preprocess.get_data("train").userID.unique()).intersection(set(preprocess.get_data("valid").userID.unique())))

0

In [157]:
train_data = preprocess.get_data("train_grouped")
valid_data = preprocess.get_data("valid_grouped")
test_data = preprocess.get_data("test_grouped")

In [74]:
from models.lstm.model import LSTM
trainer = FeatureTestTrainer(args, LSTM)

In [75]:
args.columns

['answerCode',
 'testPaper',
 'timeSec',
 'firstClass',
 'secondClass',
 'correctPer',
 'firstClassDifficulty']

In [76]:
train_loader, _ = trainer._get_loaders(train_data, valid_data)

In [77]:
trainer.debug(train_data, valid_data, test_data)

In [78]:
auc, acc = trainer.run_cv(train_data, valid_data, test_data, folds=5, seeds=[1, 2, 3, 4, 5])

VBox(children=(Label(value=' 5.77MB of 5.77MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6914605498313904
Training steps: 50 Loss: 0.6795990467071533
Training steps: 100 Loss: 0.66401606798172
Training steps: 150 Loss: 0.6197633743286133
Training steps: 200 Loss: 0.611476480960846
VALID AUC : 0.7661739212593249 ACC : 0.7049138804457954



Training steps: 0 Loss: 0.6078208684921265
Training steps: 50 Loss: 0.6115068197250366
Training steps: 100 Loss: 0.5551007390022278
Training steps: 150 Loss: 0.5534874796867371
Training steps: 200 Loss: 0.5410237312316895
VALID AUC : 0.7719573638634197 ACC : 0.7033941236068896



Training steps: 0 Loss: 0.5134978890419006
Training steps: 50 Loss: 0.5707082748413086
Training steps: 100 Loss: 0.5677574276924133
Training steps: 150 Loss: 0.5889707803726196
Training steps: 200 Loss: 0.5838099718093872
VALID AUC : 0.7735396845936443 ACC : 0.7082066869300911



Training steps: 0 Loss: 0.5919654369354248
Training steps: 50 Loss: 0.5907580256462097
Training steps: 100 Loss: 0.5812669992446899
Training steps: 150 Loss: 0.5119457244873047
Training steps: 200 Loss: 0.5490837097167969
VALID AUC : 0.7739058032633344 ACC : 0.7084599797365755



Training steps: 0 Loss: 0.5187664031982422
Training steps: 50 Loss: 0.5507243871688843
Training steps: 100 Loss: 0.618769109249115
Training steps: 150 Loss: 0.6025223731994629
Training steps: 200 Loss: 0.5995501279830933
VALID AUC : 0.7745371397351211 ACC : 0.707193515704154



Training steps: 0 Loss: 0.6132122278213501
Training steps: 50 Loss: 0.596794843673706
Training steps: 100 Loss: 0.5277812480926514
Training steps: 150 Loss: 0.600231409072876
Training steps: 200 Loss: 0.5254315137863159
VALID AUC : 0.7747145506753426 ACC : 0.7069402228976697



Training steps: 0 Loss: 0.5199386477470398
Training steps: 50 Loss: 0.6370387673377991
Training steps: 100 Loss: 0.5058273673057556
Training steps: 150 Loss: 0.6421599388122559
Training steps: 200 Loss: 0.5394273996353149
VALID AUC : 0.7749529386029774 ACC : 0.7082066869300911



Training steps: 0 Loss: 0.503588080406189
Training steps: 50 Loss: 0.5742260217666626
Training steps: 100 Loss: 0.5678952932357788
Training steps: 150 Loss: 0.6470595598220825
Training steps: 200 Loss: 0.6078394651412964
VALID AUC : 0.774972836356765 ACC : 0.7084599797365755



Training steps: 0 Loss: 0.5008181929588318
Training steps: 50 Loss: 0.5540046095848083
Training steps: 100 Loss: 0.5055907368659973
Training steps: 150 Loss: 0.6183093786239624
Training steps: 200 Loss: 0.5676316022872925
VALID AUC : 0.7746028665089224 ACC : 0.708966565349544



Training steps: 0 Loss: 0.5256476402282715
Training steps: 50 Loss: 0.5425374507904053
Training steps: 100 Loss: 0.5462218523025513
Training steps: 150 Loss: 0.5488754510879517
Training steps: 200 Loss: 0.586246132850647
VALID AUC : 0.7747507517499753 ACC : 0.7092198581560284



Training steps: 0 Loss: 0.5811713337898254
Training steps: 50 Loss: 0.7365767955780029
Training steps: 100 Loss: 0.5575495958328247
Training steps: 150 Loss: 0.559294581413269
Training steps: 200 Loss: 0.5686966776847839
VALID AUC : 0.7746544722961649 ACC : 0.7069402228976697



Training steps: 0 Loss: 0.6305075883865356
Training steps: 50 Loss: 0.6240187287330627
Training steps: 100 Loss: 0.5202264189720154
Training steps: 150 Loss: 0.4983265995979309
Training steps: 200 Loss: 0.5562132000923157
VALID AUC : 0.7749438241480167 ACC : 0.7077001013171226



Training steps: 0 Loss: 0.631051778793335
Training steps: 50 Loss: 0.5920586585998535
Training steps: 100 Loss: 0.5383285284042358
Training steps: 150 Loss: 0.5719915628433228
Training steps: 200 Loss: 0.598799467086792
VALID AUC : 0.7747284149166913 ACC : 0.7079533941236069



VBox(children=(Label(value=' 5.77MB of 5.77MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,12.0
train_loss,0.56464
train_auc,0.78168
train_acc,0.71382
valid_auc,0.77473
valid_acc,0.70795
_runtime,44.0
_timestamp,1623404518.0
_step,12.0


0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
train_loss,█▂▂▁▁▁▁▁▁▁▁▁▁
train_auc,▁▇███████████
train_acc,▁▆▇▇▇█▇▇█████
valid_auc,▁▆▇▇█████████
valid_acc,▃▁▇▇▆▅▇▇██▅▆▆
_runtime,▁▂▂▃▃▄▅▅▆▆▇▇█
_timestamp,▁▂▂▃▃▄▅▅▆▆▇▇█
_step,▁▂▂▃▃▄▅▅▆▆▇▇█


[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6992930173873901
Training steps: 50 Loss: 0.6928524971008301
Training steps: 100 Loss: 0.6794002056121826
Training steps: 150 Loss: 0.6639728546142578
Training steps: 200 Loss: 0.5827869772911072
VALID AUC : 0.768689276533537 ACC : 0.6963019250253293



Training steps: 0 Loss: 0.630569338798523
Training steps: 50 Loss: 0.5898756980895996
Training steps: 100 Loss: 0.5844754576683044
Training steps: 150 Loss: 0.5791062116622925
Training steps: 200 Loss: 0.5855634212493896
VALID AUC : 0.7711094890810686 ACC : 0.7011144883485309



Training steps: 0 Loss: 0.5226792693138123
Training steps: 50 Loss: 0.60425865650177
Training steps: 100 Loss: 0.5936834812164307
Training steps: 150 Loss: 0.5851699113845825
Training steps: 200 Loss: 0.5042080879211426
VALID AUC : 0.7727050841402419 ACC : 0.7001013171225937



Training steps: 0 Loss: 0.5912057161331177
Training steps: 50 Loss: 0.643416166305542
Training steps: 100 Loss: 0.539395809173584
Training steps: 150 Loss: 0.56778883934021
Training steps: 200 Loss: 0.560995876789093
VALID AUC : 0.7737015290199654 ACC : 0.7008611955420466



Training steps: 0 Loss: 0.5697289705276489
Training steps: 50 Loss: 0.6468511819839478
Training steps: 100 Loss: 0.5798032283782959
Training steps: 150 Loss: 0.5673441886901855
Training steps: 200 Loss: 0.5636029243469238
VALID AUC : 0.7745557896206503 ACC : 0.7021276595744681



Training steps: 0 Loss: 0.5490702390670776
Training steps: 50 Loss: 0.5363490581512451
Training steps: 100 Loss: 0.6179380416870117
Training steps: 150 Loss: 0.6036038994789124
Training steps: 200 Loss: 0.6198813915252686
VALID AUC : 0.7751933090234147 ACC : 0.7036474164133738



Training steps: 0 Loss: 0.5330266356468201
Training steps: 50 Loss: 0.5575742125511169
Training steps: 100 Loss: 0.5697990655899048
Training steps: 150 Loss: 0.4619908034801483
Training steps: 200 Loss: 0.5202050805091858
VALID AUC : 0.7754997495092846 ACC : 0.7041540020263425



Training steps: 0 Loss: 0.4808439314365387
Training steps: 50 Loss: 0.5269299745559692
Training steps: 100 Loss: 0.470925509929657
Training steps: 150 Loss: 0.6030152440071106
Training steps: 200 Loss: 0.594698429107666
VALID AUC : 0.7762353093355014 ACC : 0.7049138804457954



Training steps: 0 Loss: 0.5579037666320801
Training steps: 50 Loss: 0.5213374495506287
Training steps: 100 Loss: 0.6054607629776001
Training steps: 150 Loss: 0.571386456489563
Training steps: 200 Loss: 0.5140939354896545
VALID AUC : 0.7759799422639433 ACC : 0.7066869300911854



Training steps: 0 Loss: 0.5114179849624634
Training steps: 50 Loss: 0.5561589002609253
Training steps: 100 Loss: 0.5101161003112793
Training steps: 150 Loss: 0.5698599815368652
Training steps: 200 Loss: 0.5692266821861267
VALID AUC : 0.7766813675971781 ACC : 0.7079533941236069



Training steps: 0 Loss: 0.5500303506851196
Training steps: 50 Loss: 0.5155755281448364
Training steps: 100 Loss: 0.5876955986022949
Training steps: 150 Loss: 0.5181998610496521
Training steps: 200 Loss: 0.5751667022705078
VALID AUC : 0.776451408907614 ACC : 0.7077001013171226



Training steps: 0 Loss: 0.5209335684776306
Training steps: 50 Loss: 0.6294851303100586
Training steps: 100 Loss: 0.6111680865287781
Training steps: 150 Loss: 0.6056864261627197
Training steps: 200 Loss: 0.573022723197937
VALID AUC : 0.7769149193912666 ACC : 0.7079533941236069



Training steps: 0 Loss: 0.5795552134513855
Training steps: 50 Loss: 0.6034218072891235
Training steps: 100 Loss: 0.6923911571502686
Training steps: 150 Loss: 0.500023603439331
Training steps: 200 Loss: 0.6027054190635681
VALID AUC : 0.7768447255278785 ACC : 0.7082066869300911



Training steps: 0 Loss: 0.5939286947250366
Training steps: 50 Loss: 0.5581960678100586
Training steps: 100 Loss: 0.6433559656143188
Training steps: 150 Loss: 0.5799442529678345
Training steps: 200 Loss: 0.550249457359314
VALID AUC : 0.7767591326450998 ACC : 0.7087132725430598



Training steps: 0 Loss: 0.4975228011608124
Training steps: 50 Loss: 0.5340028405189514
Training steps: 100 Loss: 0.5722466707229614
Training steps: 150 Loss: 0.620250940322876
Training steps: 200 Loss: 0.5533452033996582
VALID AUC : 0.7772477948604234 ACC : 0.7087132725430598



Training steps: 0 Loss: 0.611280620098114
Training steps: 50 Loss: 0.5879604816436768
Training steps: 100 Loss: 0.5673375725746155
Training steps: 150 Loss: 0.5503016114234924
Training steps: 200 Loss: 0.5578497648239136
VALID AUC : 0.7772906554643934 ACC : 0.7099797365754813



Training steps: 0 Loss: 0.5963739156723022
Training steps: 50 Loss: 0.4995095431804657
Training steps: 100 Loss: 0.5169239044189453
Training steps: 150 Loss: 0.5662350654602051
Training steps: 200 Loss: 0.5780038833618164
VALID AUC : 0.7769979457708133 ACC : 0.7074468085106383



Training steps: 0 Loss: 0.6790558695793152
Training steps: 50 Loss: 0.4938156306743622
Training steps: 100 Loss: 0.537197470664978
Training steps: 150 Loss: 0.5694968700408936
Training steps: 200 Loss: 0.6750121116638184
VALID AUC : 0.7770717327387259 ACC : 0.7069402228976697



Training steps: 0 Loss: 0.6101818084716797
Training steps: 50 Loss: 0.6309150457382202
Training steps: 100 Loss: 0.5428738594055176
Training steps: 150 Loss: 0.5406882762908936
Training steps: 200 Loss: 0.6023023128509521
VALID AUC : 0.7771048406304153 ACC : 0.7059270516717325



Training steps: 0 Loss: 0.5856865048408508
Training steps: 50 Loss: 0.5922126770019531
Training steps: 100 Loss: 0.6060779094696045
Training steps: 150 Loss: 0.48467937111854553
Training steps: 200 Loss: 0.5457831621170044
VALID AUC : 0.7771122834897874 ACC : 0.7084599797365755



VBox(children=(Label(value=' 5.77MB of 5.77MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,19.0
train_loss,0.56418
train_auc,0.78202
train_acc,0.71338
valid_auc,0.77711
valid_acc,0.70846
_runtime,68.0
_timestamp,1623404590.0
_step,19.0


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_auc,▁▇▇▇████████████████
train_acc,▁▇▇▇████████████████
valid_auc,▁▃▄▅▆▆▇▇▇█▇█████████
valid_acc,▁▃▃▃▄▅▅▅▆▇▇▇▇▇▇█▇▆▆▇
_runtime,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇██
_timestamp,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇██
_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██


[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6977226138114929
Training steps: 50 Loss: 0.6879802942276001
Training steps: 100 Loss: 0.6727813482284546
Training steps: 150 Loss: 0.6473334431648254
Training steps: 200 Loss: 0.619423508644104
VALID AUC : 0.770963270963271 ACC : 0.7008611955420466



Training steps: 0 Loss: 0.595924437046051
Training steps: 50 Loss: 0.6539539694786072
Training steps: 100 Loss: 0.6403425931930542
Training steps: 150 Loss: 0.5642470121383667
Training steps: 200 Loss: 0.540826141834259
VALID AUC : 0.7749145299145299 ACC : 0.7026342451874367



Training steps: 0 Loss: 0.5018512010574341
Training steps: 50 Loss: 0.5620393753051758
Training steps: 100 Loss: 0.625359833240509
Training steps: 150 Loss: 0.5238800048828125
Training steps: 200 Loss: 0.5669413805007935
VALID AUC : 0.7770106003439337 ACC : 0.7059270516717325



Training steps: 0 Loss: 0.6182305812835693
Training steps: 50 Loss: 0.6075901985168457
Training steps: 100 Loss: 0.6285566091537476
Training steps: 150 Loss: 0.6435513496398926
Training steps: 200 Loss: 0.5935214757919312
VALID AUC : 0.7787133287133288 ACC : 0.7036474164133738



Training steps: 0 Loss: 0.5888941884040833
Training steps: 50 Loss: 0.6751599311828613
Training steps: 100 Loss: 0.5425357818603516
Training steps: 150 Loss: 0.5216089487075806
Training steps: 200 Loss: 0.5525755882263184
VALID AUC : 0.7793511460178127 ACC : 0.7059270516717325



Training steps: 0 Loss: 0.6257911920547485
Training steps: 50 Loss: 0.5887433290481567
Training steps: 100 Loss: 0.5855381488800049
Training steps: 150 Loss: 0.5736437439918518
Training steps: 200 Loss: 0.6787871718406677
VALID AUC : 0.7797582197582197 ACC : 0.7061803444782169



Training steps: 0 Loss: 0.6162322759628296
Training steps: 50 Loss: 0.5997665524482727
Training steps: 100 Loss: 0.5713402032852173
Training steps: 150 Loss: 0.5130209922790527
Training steps: 200 Loss: 0.5269345045089722
VALID AUC : 0.7808520058520059 ACC : 0.7069402228976697



Training steps: 0 Loss: 0.46821966767311096
Training steps: 50 Loss: 0.5647320747375488
Training steps: 100 Loss: 0.5462440252304077
Training steps: 150 Loss: 0.4780936539173126
Training steps: 200 Loss: 0.5830329060554504
VALID AUC : 0.7803790970457636 ACC : 0.707193515704154



Training steps: 0 Loss: 0.5579469203948975
Training steps: 50 Loss: 0.7009466290473938
Training steps: 100 Loss: 0.5334693193435669
Training steps: 150 Loss: 0.6028881072998047
Training steps: 200 Loss: 0.5600790977478027
VALID AUC : 0.7808572675239343 ACC : 0.7056737588652482



Training steps: 0 Loss: 0.5315427780151367
Training steps: 50 Loss: 0.5700286626815796
Training steps: 100 Loss: 0.5683374404907227
Training steps: 150 Loss: 0.5644292831420898
Training steps: 200 Loss: 0.5109124779701233
VALID AUC : 0.780929775929776 ACC : 0.7064336372847011



Training steps: 0 Loss: 0.6239681243896484
Training steps: 50 Loss: 0.498975932598114
Training steps: 100 Loss: 0.5299121141433716
Training steps: 150 Loss: 0.5797358751296997
Training steps: 200 Loss: 0.6504656672477722
VALID AUC : 0.7812376479043145 ACC : 0.7084599797365755



Training steps: 0 Loss: 0.6751903891563416
Training steps: 50 Loss: 0.5430334806442261
Training steps: 100 Loss: 0.557906985282898
Training steps: 150 Loss: 0.5701483488082886
Training steps: 200 Loss: 0.6118913888931274
VALID AUC : 0.7815577115577115 ACC : 0.7084599797365755



Training steps: 0 Loss: 0.47606104612350464
Training steps: 50 Loss: 0.532028079032898
Training steps: 100 Loss: 0.6461909413337708
Training steps: 150 Loss: 0.6396211385726929
Training steps: 200 Loss: 0.5513244867324829
VALID AUC : 0.781679628346295 ACC : 0.709726443768997



Training steps: 0 Loss: 0.5391976833343506
Training steps: 50 Loss: 0.5091882348060608
Training steps: 100 Loss: 0.5459716320037842
Training steps: 150 Loss: 0.6477074027061462
Training steps: 200 Loss: 0.5940631628036499
VALID AUC : 0.7815030415030415 ACC : 0.709726443768997



Training steps: 0 Loss: 0.5677794814109802
Training steps: 50 Loss: 0.5531822443008423
Training steps: 100 Loss: 0.5832763314247131
Training steps: 150 Loss: 0.5656651854515076
Training steps: 200 Loss: 0.4762006402015686
VALID AUC : 0.7813729113729113 ACC : 0.7069402228976697



Training steps: 0 Loss: 0.5679396390914917
Training steps: 50 Loss: 0.6185888051986694
Training steps: 100 Loss: 0.5699814558029175
Training steps: 150 Loss: 0.6075723171234131
Training steps: 200 Loss: 0.5444631576538086
VALID AUC : 0.7815008598341932 ACC : 0.7092198581560284



Training steps: 0 Loss: 0.5198460817337036
Training steps: 50 Loss: 0.5156384706497192
Training steps: 100 Loss: 0.47411543130874634
Training steps: 150 Loss: 0.5467678904533386
Training steps: 200 Loss: 0.5876615643501282
VALID AUC : 0.7818790585457253 ACC : 0.7092198581560284



Training steps: 0 Loss: 0.5343640446662903
Training steps: 50 Loss: 0.46798259019851685
Training steps: 100 Loss: 0.6384103298187256
Training steps: 150 Loss: 0.5551512837409973
Training steps: 200 Loss: 0.5439321994781494
VALID AUC : 0.7818849618849619 ACC : 0.7087132725430598



Training steps: 0 Loss: 0.539951503276825
Training steps: 50 Loss: 0.6227497458457947
Training steps: 100 Loss: 0.5046347379684448
Training steps: 150 Loss: 0.5611745119094849
Training steps: 200 Loss: 0.574172854423523
VALID AUC : 0.7817851184517851 ACC : 0.7102330293819655



Training steps: 0 Loss: 0.5218226909637451
Training steps: 50 Loss: 0.5474698543548584
Training steps: 100 Loss: 0.5759352445602417
Training steps: 150 Loss: 0.4847087562084198
Training steps: 200 Loss: 0.5448772311210632
VALID AUC : 0.781705038371705 ACC : 0.7079533941236069



VBox(children=(Label(value=' 5.77MB of 5.77MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,19.0
train_loss,0.56831
train_auc,0.7785
train_acc,0.70673
valid_auc,0.78171
valid_acc,0.70795
_runtime,68.0
_timestamp,1623404662.0
_step,19.0


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_auc,▁▇▇▇████████████████
train_acc,▁▇▇█████████████████
valid_auc,▁▄▅▆▆▇▇▇▇▇██████████
valid_acc,▁▂▅▃▅▅▆▆▅▅▇▇██▆▇▇▇█▆
_runtime,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▆▇▇██
_timestamp,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▆▇▇██
_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██


[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6893694400787354
Training steps: 50 Loss: 0.6787620782852173
Training steps: 100 Loss: 0.6629106402397156
Training steps: 150 Loss: 0.6386033296585083
Training steps: 200 Loss: 0.6168570518493652
VALID AUC : 0.7665777816559816 ACC : 0.6922492401215805



Training steps: 0 Loss: 0.6142791509628296
Training steps: 50 Loss: 0.5463283061981201
Training steps: 100 Loss: 0.5568525791168213
Training steps: 150 Loss: 0.4955759346485138
Training steps: 200 Loss: 0.5594042539596558
VALID AUC : 0.7681528172014311 ACC : 0.6937689969604863



Training steps: 0 Loss: 0.5841468572616577
Training steps: 50 Loss: 0.6159460544586182
Training steps: 100 Loss: 0.5561993718147278
Training steps: 150 Loss: 0.5238215923309326
Training steps: 200 Loss: 0.5422248244285583
VALID AUC : 0.7696574034626658 ACC : 0.6973150962512664



Training steps: 0 Loss: 0.5602443218231201
Training steps: 50 Loss: 0.6123101115226746
Training steps: 100 Loss: 0.5775495767593384
Training steps: 150 Loss: 0.6361199617385864
Training steps: 200 Loss: 0.6680476665496826
VALID AUC : 0.7701085868566531 ACC : 0.6978216818642351



Training steps: 0 Loss: 0.5249186754226685
Training steps: 50 Loss: 0.5077577829360962
Training steps: 100 Loss: 0.5360866785049438
Training steps: 150 Loss: 0.4856778383255005
Training steps: 200 Loss: 0.5146434307098389
VALID AUC : 0.7704123272131854 ACC : 0.6990881458966566



Training steps: 0 Loss: 0.5472831726074219
Training steps: 50 Loss: 0.5074766874313354
Training steps: 100 Loss: 0.5700969099998474
Training steps: 150 Loss: 0.594957172870636
Training steps: 200 Loss: 0.5766851305961609
VALID AUC : 0.7705506593231736 ACC : 0.7006079027355623



Training steps: 0 Loss: 0.566672682762146
Training steps: 50 Loss: 0.46611854434013367
Training steps: 100 Loss: 0.4785139560699463
Training steps: 150 Loss: 0.4950575828552246
Training steps: 200 Loss: 0.5836055278778076
VALID AUC : 0.7704885510288931 ACC : 0.7018743667679838



Training steps: 0 Loss: 0.5896592736244202
Training steps: 50 Loss: 0.5570715665817261
Training steps: 100 Loss: 0.4955548942089081
Training steps: 150 Loss: 0.48937761783599854
Training steps: 200 Loss: 0.48340365290641785
VALID AUC : 0.7707586707798441 ACC : 0.7013677811550152



Training steps: 0 Loss: 0.5228601098060608
Training steps: 50 Loss: 0.5152183771133423
Training steps: 100 Loss: 0.5318632125854492
Training steps: 150 Loss: 0.5099489092826843
Training steps: 200 Loss: 0.5056708455085754
VALID AUC : 0.7705067728838267 ACC : 0.6985815602836879



Training steps: 0 Loss: 0.6289745569229126
Training steps: 50 Loss: 0.5621281862258911
Training steps: 100 Loss: 0.5152347087860107
Training steps: 150 Loss: 0.5291506052017212
Training steps: 200 Loss: 0.6011649370193481
VALID AUC : 0.7704831614661662 ACC : 0.6990881458966566



Training steps: 0 Loss: 0.4860227704048157
Training steps: 50 Loss: 0.6320185661315918
Training steps: 100 Loss: 0.6126279830932617
Training steps: 150 Loss: 0.5537700653076172
Training steps: 200 Loss: 0.5232439637184143
VALID AUC : 0.7706152057529732 ACC : 0.7011144883485309



Training steps: 0 Loss: 0.6082488894462585
Training steps: 50 Loss: 0.5151845216751099
Training steps: 100 Loss: 0.5173142552375793
Training steps: 150 Loss: 0.5723379850387573
Training steps: 200 Loss: 0.5296265482902527
VALID AUC : 0.7706181571801808 ACC : 0.6993414387031408



Training steps: 0 Loss: 0.529248833656311
Training steps: 50 Loss: 0.5202993750572205
Training steps: 100 Loss: 0.5663237571716309
Training steps: 150 Loss: 0.5406597852706909
Training steps: 200 Loss: 0.5024064779281616
VALID AUC : 0.7706972041001741 ACC : 0.6988348530901722



VBox(children=(Label(value=' 5.77MB of 5.77MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,12.0
train_loss,0.56102
train_auc,0.78466
train_acc,0.71756
valid_auc,0.7707
valid_acc,0.69883
_runtime,46.0
_timestamp,1623404711.0
_step,12.0


0,1
epoch,▁▂▂▃▃▄▅▅▆▆▇▇█
train_loss,█▂▂▁▁▁▁▁▁▁▁▁▁
train_auc,▁▇███████████
train_acc,▁▇▇▇█████████
valid_auc,▁▄▆▇▇████████
valid_acc,▁▂▅▅▆▇██▆▆▇▆▆
_runtime,▁▂▂▃▃▄▅▅▆▆▇▇█
_timestamp,▁▂▂▃▃▄▅▅▆▆▇▇█
_step,▁▂▂▃▃▄▅▅▆▆▇▇█


[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.6896981596946716
Training steps: 50 Loss: 0.6900671720504761
Training steps: 100 Loss: 0.6861625909805298
Training steps: 150 Loss: 0.6746559143066406
Training steps: 200 Loss: 0.649752676486969
VALID AUC : 0.7741768650170361 ACC : 0.7036474164133738



Training steps: 0 Loss: 0.6230430603027344
Training steps: 50 Loss: 0.5846840143203735
Training steps: 100 Loss: 0.511428952217102
Training steps: 150 Loss: 0.5034840106964111
Training steps: 200 Loss: 0.5856384038925171
VALID AUC : 0.7772343794749557 ACC : 0.7082066869300911



Training steps: 0 Loss: 0.5947161316871643
Training steps: 50 Loss: 0.5388880372047424
Training steps: 100 Loss: 0.5866844654083252
Training steps: 150 Loss: 0.5690162181854248
Training steps: 200 Loss: 0.536395788192749
VALID AUC : 0.7786482567659725 ACC : 0.7069402228976697



Training steps: 0 Loss: 0.5492608547210693
Training steps: 50 Loss: 0.5798891186714172
Training steps: 100 Loss: 0.5706085562705994
Training steps: 150 Loss: 0.6030460596084595
Training steps: 200 Loss: 0.5429593324661255
VALID AUC : 0.7793213543118184 ACC : 0.7104863221884499



Training steps: 0 Loss: 0.5388408899307251
Training steps: 50 Loss: 0.5770831108093262
Training steps: 100 Loss: 0.5768698453903198
Training steps: 150 Loss: 0.5731824636459351
Training steps: 200 Loss: 0.5690631866455078
VALID AUC : 0.7794217859624293 ACC : 0.7099797365754813



Training steps: 0 Loss: 0.5436680316925049
Training steps: 50 Loss: 0.5079683065414429
Training steps: 100 Loss: 0.6003727912902832
Training steps: 150 Loss: 0.4717066287994385
Training steps: 200 Loss: 0.6844483613967896
VALID AUC : 0.7799420527356067 ACC : 0.7112462006079028



Training steps: 0 Loss: 0.5601626038551331
Training steps: 50 Loss: 0.5879883170127869
Training steps: 100 Loss: 0.556464672088623
Training steps: 150 Loss: 0.551841139793396
Training steps: 200 Loss: 0.554437518119812
VALID AUC : 0.7800734358284134 ACC : 0.7094731509625126



Training steps: 0 Loss: 0.6402342319488525
Training steps: 50 Loss: 0.5753796100616455
Training steps: 100 Loss: 0.5524563789367676
Training steps: 150 Loss: 0.5059913396835327
Training steps: 200 Loss: 0.49868282675743103
VALID AUC : 0.780032723765442 ACC : 0.708966565349544



Training steps: 0 Loss: 0.5216934680938721
Training steps: 50 Loss: 0.5644708871841431
Training steps: 100 Loss: 0.5503538846969604
Training steps: 150 Loss: 0.5940344333648682
Training steps: 200 Loss: 0.4483167231082916
VALID AUC : 0.7801219820738493 ACC : 0.7084599797365755



Training steps: 0 Loss: 0.5976600646972656
Training steps: 50 Loss: 0.5144531726837158
Training steps: 100 Loss: 0.5885078310966492
Training steps: 150 Loss: 0.7365555763244629
Training steps: 200 Loss: 0.5393996834754944
VALID AUC : 0.7800865356089278 ACC : 0.7092198581560284



Training steps: 0 Loss: 0.5996023416519165
Training steps: 50 Loss: 0.5399265289306641
Training steps: 100 Loss: 0.5960204601287842
Training steps: 150 Loss: 0.557273268699646
Training steps: 200 Loss: 0.5499653816223145
VALID AUC : 0.7799383282882055 ACC : 0.7104863221884499



Training steps: 0 Loss: 0.5733518600463867
Training steps: 50 Loss: 0.533018946647644
Training steps: 100 Loss: 0.5614207983016968
Training steps: 150 Loss: 0.6308104991912842
Training steps: 200 Loss: 0.46934792399406433
VALID AUC : 0.7801847839627862 ACC : 0.709726443768997



Training steps: 0 Loss: 0.5252739191055298
Training steps: 50 Loss: 0.5585050582885742
Training steps: 100 Loss: 0.6282821297645569
Training steps: 150 Loss: 0.5581463575363159
Training steps: 200 Loss: 0.5489150285720825
VALID AUC : 0.7802480995686063 ACC : 0.7094731509625126



Training steps: 0 Loss: 0.5586574077606201
Training steps: 50 Loss: 0.5942895412445068
Training steps: 100 Loss: 0.46654242277145386
Training steps: 150 Loss: 0.5214309096336365
Training steps: 200 Loss: 0.6107655167579651
VALID AUC : 0.780115560612813 ACC : 0.7094731509625126



Training steps: 0 Loss: 0.5862085819244385
Training steps: 50 Loss: 0.6036398410797119
Training steps: 100 Loss: 0.5881652235984802
Training steps: 150 Loss: 0.6017634272575378
Training steps: 200 Loss: 0.5775519609451294
VALID AUC : 0.7800508322855648 ACC : 0.7099797365754813



Training steps: 0 Loss: 0.5286556482315063
Training steps: 50 Loss: 0.5447274446487427
Training steps: 100 Loss: 0.5693047642707825
Training steps: 150 Loss: 0.5338605642318726
Training steps: 200 Loss: 0.673575758934021
VALID AUC : 0.7801695008855194 ACC : 0.709726443768997



Training steps: 0 Loss: 0.5401846170425415
Training steps: 50 Loss: 0.531859815120697
Training steps: 100 Loss: 0.5637170076370239
Training steps: 150 Loss: 0.5863024592399597
Training steps: 200 Loss: 0.554169774055481
VALID AUC : 0.7801730969036998 ACC : 0.7102330293819655



Training steps: 0 Loss: 0.49741095304489136
Training steps: 50 Loss: 0.49698102474212646
Training steps: 100 Loss: 0.6302911043167114
Training steps: 150 Loss: 0.5149060487747192
Training steps: 200 Loss: 0.5168042182922363
VALID AUC : 0.7801231379368361 ACC : 0.708966565349544



In [79]:
print(f"auc : {auc} acc : {acc}")
print(f"logging path : {trainer.prefix_save_path}")

auc : 0.7770310448109141 acc : 0.7075987841945289
logging path : ../split_test/LOG_[06.11_18:40]


**LB AUC : 0.7313 LB ACC : 0.6882**

In [158]:
auc, acc = trainer.run(train_data, valid_data, test_data)

VBox(children=(Label(value=' 5.77MB of 5.77MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: wandb version 0.10.32 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


Training steps: 0 Loss: 0.700603187084198
Training steps: 50 Loss: 0.6957963705062866
Training steps: 100 Loss: 0.6836996078491211
Training steps: 150 Loss: 0.6726133227348328
Training steps: 200 Loss: 0.665005624294281
Training steps: 250 Loss: 0.5939368009567261
VALID AUC : 0.77769436356305 ACC : 0.7060895822848515



Training steps: 0 Loss: 0.5544260740280151
Training steps: 50 Loss: 0.5366555452346802
Training steps: 100 Loss: 0.5577244758605957
Training steps: 150 Loss: 0.6209127306938171
Training steps: 200 Loss: 0.5739538669586182
Training steps: 250 Loss: 0.6146116256713867
VALID AUC : 0.783435254021612 ACC : 0.7101157523905385



Training steps: 0 Loss: 0.5501046776771545
Training steps: 50 Loss: 0.5619213581085205
Training steps: 100 Loss: 0.5576613545417786
Training steps: 150 Loss: 0.5835872888565063
Training steps: 200 Loss: 0.5580599308013916
Training steps: 250 Loss: 0.6445865631103516
VALID AUC : 0.7860476048638254 ACC : 0.7161550075490689



Training steps: 0 Loss: 0.5884982347488403
Training steps: 50 Loss: 0.519839882850647
Training steps: 100 Loss: 0.5290238261222839
Training steps: 150 Loss: 0.5839225053787231
Training steps: 200 Loss: 0.6324130892753601
Training steps: 250 Loss: 0.6108105182647705
VALID AUC : 0.7878165265510484 ACC : 0.7141419224962254



Training steps: 0 Loss: 0.6371676325798035
Training steps: 50 Loss: 0.5093662738800049
Training steps: 100 Loss: 0.538307785987854
Training steps: 150 Loss: 0.5686457753181458
Training steps: 200 Loss: 0.6063700914382935
Training steps: 250 Loss: 0.6229968667030334
VALID AUC : 0.7889117180423935 ACC : 0.7151484650226472



Training steps: 0 Loss: 0.5798571109771729
Training steps: 50 Loss: 0.5529870390892029
Training steps: 100 Loss: 0.5669925808906555
Training steps: 150 Loss: 0.5337899923324585
Training steps: 200 Loss: 0.49725237488746643
Training steps: 250 Loss: 0.6092047095298767
VALID AUC : 0.7896614328468292 ACC : 0.7166582788122798



Training steps: 0 Loss: 0.5343998670578003
Training steps: 50 Loss: 0.5504298806190491
Training steps: 100 Loss: 0.5947290658950806
Training steps: 150 Loss: 0.5801835656166077
Training steps: 200 Loss: 0.7258906960487366
Training steps: 250 Loss: 0.5901753306388855
VALID AUC : 0.7896472490532318 ACC : 0.7166582788122798



Training steps: 0 Loss: 0.5484820008277893
Training steps: 50 Loss: 0.6090624332427979
Training steps: 100 Loss: 0.5725345611572266
Training steps: 150 Loss: 0.46471142768859863
Training steps: 200 Loss: 0.516993522644043
Training steps: 250 Loss: 0.5926272869110107
VALID AUC : 0.7897911132454344 ACC : 0.7146451937594364



Training steps: 0 Loss: 0.5951389074325562
Training steps: 50 Loss: 0.5376636981964111
Training steps: 100 Loss: 0.587437629699707
Training steps: 150 Loss: 0.7073774337768555
Training steps: 200 Loss: 0.5266275405883789
Training steps: 250 Loss: 0.5785427093505859
VALID AUC : 0.7900676972205843 ACC : 0.7161550075490689



Training steps: 0 Loss: 0.6116013526916504
Training steps: 50 Loss: 0.6006002426147461
Training steps: 100 Loss: 0.5044102072715759
Training steps: 150 Loss: 0.65506911277771
Training steps: 200 Loss: 0.610359787940979
Training steps: 250 Loss: 0.5635261535644531
VALID AUC : 0.7904030426263523 ACC : 0.7156517362858581



Training steps: 0 Loss: 0.5454117655754089
Training steps: 50 Loss: 0.5834535956382751
Training steps: 100 Loss: 0.6092569828033447
Training steps: 150 Loss: 0.5780490040779114
Training steps: 200 Loss: 0.4889211058616638
Training steps: 250 Loss: 0.6069608330726624
VALID AUC : 0.7901669837757663 ACC : 0.7161550075490689



Training steps: 0 Loss: 0.6297566890716553
Training steps: 50 Loss: 0.5434718132019043
Training steps: 100 Loss: 0.604882001876831
Training steps: 150 Loss: 0.5360378623008728
Training steps: 200 Loss: 0.5769190788269043
Training steps: 250 Loss: 0.600072979927063
VALID AUC : 0.7906988760356702 ACC : 0.7171615500754907



Training steps: 0 Loss: 0.5546300411224365
Training steps: 50 Loss: 0.7074905633926392
Training steps: 100 Loss: 0.6230628490447998
Training steps: 150 Loss: 0.7607958316802979
Training steps: 200 Loss: 0.4773101210594177
Training steps: 250 Loss: 0.5801773071289062
VALID AUC : 0.7901902857223907 ACC : 0.7166582788122798



Training steps: 0 Loss: 0.6786515712738037
Training steps: 50 Loss: 0.5602739453315735
Training steps: 100 Loss: 0.6185828447341919
Training steps: 150 Loss: 0.4971354007720947
Training steps: 200 Loss: 0.5733383893966675
Training steps: 250 Loss: 0.5385435223579407
VALID AUC : 0.7903868325765266 ACC : 0.7161550075490689



Training steps: 0 Loss: 0.5610966682434082
Training steps: 50 Loss: 0.5270990133285522
Training steps: 100 Loss: 0.6232728958129883
Training steps: 150 Loss: 0.6399747133255005
Training steps: 200 Loss: 0.5946403741836548
Training steps: 250 Loss: 0.5840374827384949
VALID AUC : 0.7902905854056868 ACC : 0.7166582788122798



Training steps: 0 Loss: 0.5220494866371155
Training steps: 50 Loss: 0.6842190027236938
Training steps: 100 Loss: 0.5202454328536987
Training steps: 150 Loss: 0.6269791126251221
Training steps: 200 Loss: 0.5264116525650024
Training steps: 250 Loss: 0.6108989119529724
VALID AUC : 0.7905924975836894 ACC : 0.7146451937594364



Training steps: 0 Loss: 0.5571156740188599
Training steps: 50 Loss: 0.6294474601745605
Training steps: 100 Loss: 0.5501905679702759
Training steps: 150 Loss: 0.5600674748420715
Training steps: 200 Loss: 0.600213885307312
Training steps: 250 Loss: 0.5868146419525146
VALID AUC : 0.7902601915622639 ACC : 0.7141419224962254



In [156]:
auc, acc

(0.7770310448109141, 0.7075987841945289)

In [84]:
preprocess.get_data("test")

Unnamed: 0,userID,answerCode,testPaper,timeSec,firstClass,secondClass,correctPer,firstClassDifficulty
0,3,1,804,-2.357753,4,267,0.69112,-0.005758
1,3,1,804,-2.357749,4,267,0.69112,-0.005758
2,3,0,804,-2.357736,4,266,0.69112,-0.005758
3,3,0,804,-2.357735,4,266,0.69112,-0.005758
4,3,0,804,-2.357734,4,265,0.69112,-0.005758
...,...,...,...,...,...,...,...,...
260109,7439,0,713,1.088795,3,783,0.62500,-0.135739
260110,7439,1,713,1.088798,3,783,0.62500,-0.135739
260111,7439,1,713,1.088801,3,761,0.62500,-0.135739
260112,7439,1,713,1.088813,3,761,0.62500,-0.135739


In [88]:
preprocess.get_data("valid").userID.nunique(), preprocess.get_data("test").userID.nunique()

(744, 744)

In [95]:
valid_grouepd = preprocess.get_data("valid_grouped")

In [97]:
import numpy as np

In [112]:
len(valid_data[0]) // 20

8

In [118]:
len(valid_data[0])

174

In [117]:
np.split(valid_data[0], 6)

[array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1]),
 array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        1, 1, 1, 0, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 0, 1, 1]),
 array([1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1]),
 array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1]),
 array([1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 0, 1, 1, 0])]

In [121]:
np.split(valid_data[0])

ValueError: array split does not result in an equal division

In [103]:
len(valid_data)

7

In [123]:
max_seq_len = 20

In [149]:
new_valid_grouped = []

for valid_data in valid_grouepd:
    
    split_idx = (len(valid_data[0]) // max_seq_len) - 1
    
    if split_idx < 2:
        new_valid_grouped.append(valid_data)
        continue
    
    print(split_idx, end=' ')
    split_end = int(max_seq_len * split_idx)
    
    for valid_d in valid_data:
        split_d = np.split(valid_d[:split_end], split_idx)

7 42 5 4 20 29 10 23 14 25 19 15 23 15 9 15 26 31 2 4 30 37 6 17 16 15 16 18 27 10 23 15 8 16 28 18 11 7 4 11 28 25 38 17 8 18 19 29 8 8 3 9 14 19 32 4 26 17 18 8 4 19 32 5 4 7 5 15 19 19 3 10 16 7 12 9 11 5 30 23 18 16 4 14 4 3 15 42 13 5 30 8 6 26 10 22 14 2 2 17 12 25 5 2 3 24 16 11 4 6 6 6 21 2 9 12 2 8 28 8 30 10 7 24 9 5 26 23 15 10 5 3 5 7 7 23 12 3 22 13 10 9 17 9 11 13 18 5 5 22 27 24 25 6 2 14 11 15 17 11 16 26 20 16 27 17 13 8 14 6 4 9 33 19 5 17 10 12 17 15 4 19 29 6 25 13 12 10 9 14 25 9 9 28 4 5 4 24 4 12 22 29 29 12 14 7 6 20 13 27 15 21 14 19 24 10 17 4 21 22 8 12 12 11 5 5 16 13 8 3 7 37 7 30 3 2 8 11 9 22 17 16 10 9 10 24 19 18 23 9 33 16 27 14 11 2 18 12 10 2 8 23 7 17 7 9 11 15 2 2 26 17 11 26 9 16 5 22 7 7 11 2 21 10 6 21 14 13 9 14 9 6 9 8 5 16 20 5 6 7 10 3 14 43 21 15 28 10 4 7 8 9 8 10 3 10 24 24 22 17 9 19 4 12 3 19 10 21 6 17 30 17 23 11 3 30 16 18 9 2 23 10 30 8 11 14 17 10 18 11 16 17 42 5 6 22 6 8 10 20 7 14 7 10 11 6 14 6 20 5 16 15 15 6 21 9 20 8 3 11 4 

In [153]:
valid_data

(array([1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1]),
 array([586, 586, 586, 586, 586, 780, 780, 780, 780, 780, 780, 713, 713,
        713, 713]),
 array([-2.29728262, -2.29728162, -2.29727634, -2.29727448, -2.29726949,
         0.41494951,  0.41495108,  0.41495879,  0.41498305,  0.41498862,
         0.41498933,  1.08879517,  1.08879774,  1.08880074,  1.08881344]),
 array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]),
 array([627, 627, 627, 627, 627, 237, 237, 237, 237, 237, 237, 783, 783,
        761, 761]),
 array([0.625, 0.625, 0.625, 0.625, 0.625, 0.625, 0.625, 0.625, 0.625,
        0.625, 0.625, 0.625, 0.625, 0.625, 0.625]),
 array([-0.13573852, -0.13573852, -0.13573852, -0.13573852, -0.13573852,
        -0.13573852, -0.13573852, -0.13573852, -0.13573852, -0.13573852,
        -0.13573852, -0.13573852, -0.13573852, -0.13573852, -0.13573852]))

In [151]:
np.array(new_valid_grouped)

  np.array(new_valid_grouped)


array([[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 0]),
        array([363, 363, 363, 363, 363, 364, 364, 364, 364, 364, 372, 372, 372,
       372, 372, 373, 373, 373, 373, 373, 373, 373, 382, 382, 382, 382,
       382, 382, 382, 371, 371, 371, 371, 371, 371, 383, 383, 383, 383,
       383, 383, 383, 383, 384, 384, 384, 384, 385, 385, 385, 385, 385]),
        array([0.70953743, 0.70953971, 0.70954228, 0.70954727, 0.7095497 ,
       0.73347028, 0.73347556, 0.73347813, 0.7334837 , 0.73348627,
       0.7738529 , 0.77385732, 0.77386845, 0.77387159, 0.77387502,
       0.79563357, 0.795642  , 0.79564642, 0.79565712, 0.7956664 ,
       0.79566697, 0.79566768, 0.79619916, 0.79620987, 0.79621572,
       0.79621615, 0.79621643, 0.796217  , 0.79621757, 0.79759822,
       0.7976172 , 0.79763162, 0.79764903, 0.79765545, 0.79766359,
       0.80706581, 0.80706981, 0.80707

In [143]:
valid_data

(array([1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1]),
 array([586, 586, 586, 586, 586, 780, 780, 780, 780, 780, 780, 713, 713,
        713, 713]),
 array([-2.29728262, -2.29728162, -2.29727634, -2.29727448, -2.29726949,
         0.41494951,  0.41495108,  0.41495879,  0.41498305,  0.41498862,
         0.41498933,  1.08879517,  1.08879774,  1.08880074,  1.08881344]),
 array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]),
 array([627, 627, 627, 627, 627, 237, 237, 237, 237, 237, 237, 783, 783,
        761, 761]),
 array([0.625, 0.625, 0.625, 0.625, 0.625, 0.625, 0.625, 0.625, 0.625,
        0.625, 0.625, 0.625, 0.625, 0.625, 0.625]),
 array([-0.13573852, -0.13573852, -0.13573852, -0.13573852, -0.13573852,
        -0.13573852, -0.13573852, -0.13573852, -0.13573852, -0.13573852,
        -0.13573852, -0.13573852, -0.13573852, -0.13573852, -0.13573852]))

In [131]:
valid_data

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
        0, 0, 0, 1, 1, 0, 0, 0]),
 array([363, 363, 363, 363, 363, 364, 364, 364, 364, 364, 372, 372, 372,
        372, 372, 373, 373, 373, 373, 373, 373, 373, 382, 382, 382, 382,
        382, 382, 382, 371, 371, 371, 371, 371, 371, 383, 383, 383, 383,
        383, 383, 383, 383, 384, 384, 384, 384, 385, 385, 385, 385, 385]),
 array([0.70953743, 0.70953971, 0.70954228, 0.70954727, 0.7095497 ,
        0.73347028, 0.73347556, 0.73347813, 0.7334837 , 0.73348627,
        0.7738529 , 0.77385732, 0.77386845, 0.77387159, 0.77387502,
        0.79563357, 0.795642  , 0.79564642, 0.79565712, 0.7956664 ,
        0.79566697, 0.79566768, 0.79619916, 0.79620987, 0.79621572,
        0.79621615, 0.79621643, 0.796217  , 0.79621757, 0.79759822,
        0.7976172 , 0.79763162, 0.79764903, 0.79765545, 0.79766359,
        0.80706581, 0.80706981, 0.80707295, 0.80

In [94]:
model = trainer._get_model()

In [89]:
preprocess.get_data("valid")

Unnamed: 0,userID,answerCode,testPaper,timeSec,firstClass,secondClass,correctPer,firstClassDifficulty
0,3,1,804,-2.357753,4,267,0.69112,-0.005758
1,3,1,804,-2.357749,4,267,0.69112,-0.005758
2,3,0,804,-2.357736,4,266,0.69112,-0.005758
3,3,0,804,-2.357735,4,266,0.69112,-0.005758
4,3,0,804,-2.357734,4,265,0.69112,-0.005758
...,...,...,...,...,...,...,...,...
260108,7439,1,780,0.414989,3,237,0.62500,-0.135739
260109,7439,0,713,1.088795,3,783,0.62500,-0.135739
260110,7439,1,713,1.088798,3,783,0.62500,-0.135739
260111,7439,1,713,1.088801,3,761,0.62500,-0.135739


In [90]:
preprocess.get_data("test")

Unnamed: 0,userID,answerCode,testPaper,timeSec,firstClass,secondClass,correctPer,firstClassDifficulty
0,3,1,804,-2.357753,4,267,0.69112,-0.005758
1,3,1,804,-2.357749,4,267,0.69112,-0.005758
2,3,0,804,-2.357736,4,266,0.69112,-0.005758
3,3,0,804,-2.357735,4,266,0.69112,-0.005758
4,3,0,804,-2.357734,4,265,0.69112,-0.005758
...,...,...,...,...,...,...,...,...
260109,7439,0,713,1.088795,3,783,0.62500,-0.135739
260110,7439,1,713,1.088798,3,783,0.62500,-0.135739
260111,7439,1,713,1.088801,3,761,0.62500,-0.135739
260112,7439,1,713,1.088813,3,761,0.62500,-0.135739
