In [64]:
import pandas as pd
import numpy as np
import collections
import xgboost as xgb
from pathlib import Path
import soundfile as sf
from tqdm.auto import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


# path = Path('/content/drive/MyDrive/TZ/stc/input/wav_data')
path = Path('/content/drive/MyDrive/TZ/stc/input/')

In [52]:
def find_length() -> int:
    """
    we need make all waves equal length and we need find mean
    return: mean(all length waves) * 8000   
    """
    tmp = []
    for p in tqdm((path / 'wav_data').glob('*.wav')):
        d, sr = sf.read(p)
        tmp.append(d.shape[0]/sr)

    print(f'Length waves max : {np.max(tmp)}, min: {np.min(tmp)}, mean: {np.mean(tmp)}')    
    return int(sr * np.mean(tmp))


def make_data(path: Path, length: int) -> list:
    """
    path: Path, path to wave file
    length: int, set length to make eq. len waves
            if > we cut, < expand zeros
    after each waves make FFT, reshape and get mean features
    return: list    
    """
    d, _ = sf.read(p)   
    if d.shape[0] < length:
        d = np.append(d, [0]*(length-d.shape[0]), axis = 0)
    else:
        d = d[:length]
    d_f = np.fft.fft(d)[:len(d)//2]
    return d_f.reshape(352, 54).mean(axis = 1)


def make_target(name: str) ->pd.DataFrame:
    """
    name: str, f.txt
    """
    data = pd.read_csv(path / name, header = None)
    data['target'] = 0
    data.columns = ['id', 'target']
    data['id'] = data['id'].str.split('.').str[0]
    return data


length = find_length()
dct = collections.defaultdict(list)
for p in tqdm((path / 'wav_data').glob('*.wav')):
    dct[p.stem].append(make_data(p, length))
    dct[p.stem] = np.ravel(dct[p.stem])

0it [00:00, ?it/s]

Length waves max : 10.6565, min: 2.4035, mean: 4.752068014705882


0it [00:00, ?it/s]

In [69]:
pd.DataFrame.from_dict(
    dct,
    orient='index',
    columns = [f'col_{i}' for i in range(352)],
        
)
# c

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,col_11,col_12,col_13,col_14,col_15,col_16,col_17,col_18,col_19,col_20,col_21,col_22,col_23,col_24,col_25,col_26,col_27,col_28,col_29,col_30,col_31,col_32,col_33,col_34,col_35,col_36,col_37,col_38,col_39,...,col_312,col_313,col_314,col_315,col_316,col_317,col_318,col_319,col_320,col_321,col_322,col_323,col_324,col_325,col_326,col_327,col_328,col_329,col_330,col_331,col_332,col_333,col_334,col_335,col_336,col_337,col_338,col_339,col_340,col_341,col_342,col_343,col_344,col_345,col_346,col_347,col_348,col_349,col_350,col_351
A30000A2,0.608733-0.301638j,0.211616-0.353851j,0.052127-0.277182j,0.038498-0.185488j,-0.008882-0.158700j,-0.013668-0.118373j,0.006393-0.104132j,-0.019994-0.099283j,-0.021558-0.053559j,-0.001814-0.084998j,-0.010876-0.050701j,-0.018198-0.081591j,0.000682-0.056217j,-0.016977-0.090537j,-0.021160-0.058163j,-0.028615-0.047240j,0.025883+0.024417j,0.047539-0.218598j,-0.261646+0.052031j,0.029137-0.003626j,-0.067078-0.199672j,-0.308794+0.547466j,0.432776-0.581089j,0.155520-0.067445j,-0.741885+0.471972j,0.210501-0.224296j,0.021954-0.005984j,-0.083125-0.011786j,0.034002+0.024178j,-0.043001+0.028613j,0.023855-0.013924j,-0.010990-0.023863j,0.010490+0.005513j,-0.137620-0.114977j,-0.028034-0.015419j,-0.128635+0.112940j,-0.054013-0.130178j,0.099095+0.105139j,-0.062081-0.044671j,0.108593-0.022273j,...,0.016468-0.007298j,0.009750+0.002339j,-0.011083+0.000323j,0.006570+0.003047j,0.007731+0.006294j,0.000930-0.000628j,0.007507-0.001061j,0.001487-0.005121j,0.002958+0.001231j,-0.001566+0.002394j,0.005000-0.003398j,0.002226+0.003773j,0.001790-0.001846j,-0.003101+0.000565j,0.006700-0.000099j,-0.001355+0.004428j,0.004891+0.000063j,-0.001753+0.000950j,0.004209+0.001656j,0.002078+0.001933j,0.002472-0.000622j,0.003935+0.001462j,0.000162+0.000460j,0.002193+0.002473j,0.006261+0.001958j,0.005018-0.001007j,0.000216-0.002107j,0.000548-0.000615j,-0.000327-0.000932j,0.003644+0.005288j,0.005405+0.000294j,0.004511-0.000132j,0.004950-0.001964j,-0.002255-0.002972j,0.000717+0.003885j,0.005937+0.001929j,0.003952-0.003714j,0.000701-0.001351j,0.000792-0.001152j,0.001419-0.000070j
A30000D3,0.223829-0.489295j,-0.004449+0.009514j,-0.020257+0.006756j,0.040592+0.014718j,0.021104-0.032440j,-0.040740-0.022084j,-0.005802-0.005613j,-0.021012-0.006900j,-0.022852-0.008865j,-0.016162+0.009751j,-0.015511+0.006446j,-0.018825-0.002130j,-0.024549+0.029529j,0.015027+0.020501j,-0.003884-0.001720j,0.026610-0.006195j,0.005097-0.021266j,-0.223110+0.216564j,0.404322-0.090144j,-0.122968-0.556266j,0.508957+0.424637j,0.099950-0.506191j,-1.528821+0.056254j,1.858820+0.552408j,-1.302970+0.076953j,0.121610+0.047811j,0.128881-0.027057j,0.037571-0.017267j,0.054890-0.018532j,-0.113391+0.029959j,0.107514-0.025216j,-0.051356+0.018763j,0.061173+0.003307j,0.020392-0.050842j,0.036249-0.022570j,-0.085959+0.203914j,0.093457-0.624836j,-0.320196+0.078430j,0.144801+0.483950j,0.122089+0.274091j,...,0.000417+0.001253j,-0.004497+0.006029j,0.003132-0.001300j,-0.001003-0.001918j,0.002135+0.004131j,0.002156+0.001308j,-0.001054-0.000055j,0.005195-0.000770j,0.004149-0.004854j,-0.004758-0.004057j,-0.001093+0.000514j,-0.001985-0.001158j,0.002715-0.000494j,-0.003402-0.002722j,0.001412-0.000781j,-0.001697-0.002526j,-0.000251+0.000524j,-0.005371+0.002005j,-0.002451-0.003837j,-0.002761+0.003154j,0.001001+0.004606j,0.000201+0.000756j,0.004199-0.000427j,-0.005072-0.000761j,0.004478+0.002357j,-0.000271-0.004009j,0.002013-0.003175j,-0.007799+0.001479j,0.002562+0.003882j,-0.000307-0.003414j,0.000252+0.001897j,-0.001681-0.006526j,-0.004992+0.003392j,0.001953-0.000944j,-0.000756+0.004900j,-0.000291-0.004952j,0.001325+0.002739j,-0.003386-0.001056j,0.004058-0.003895j,-0.003248+0.003511j
A30000A1,-0.015923+0.014479j,-0.007111+0.007241j,0.016331+0.009544j,0.002326+0.013186j,0.058463-0.004888j,-0.017597-0.031444j,-0.017817+0.008572j,0.017739+0.005100j,0.017942-0.011622j,-0.000013-0.019041j,-0.012193-0.008176j,-0.016390-0.007706j,-0.009737+0.006700j,0.007668+0.026936j,0.002225-0.007544j,0.046502-0.006178j,-0.159088+0.064763j,0.267481-0.085004j,-0.146186+0.349871j,-0.011652+0.098723j,-0.040167-0.405733j,0.275212-0.208941j,-0.180232+0.400482j,-0.071587-0.194540j,0.142944-0.052747j,0.078670+0.009946j,-0.068931-0.028050j,0.068759-0.071168j,-0.201813+0.056206j,0.068275-0.088687j,-0.042007+0.096999j,0.075226+0.047220j,-0.096158-0.012006j,0.165772+0.064239j,-0.071677-0.130600j,-0.069861+0.079968j,0.031927-0.431263j,-0.264425+0.500026j,0.099084+0.180515j,0.154833-1.049391j,...,-0.000375-0.004956j,-0.000867+0.002616j,-0.002974-0.000179j,0.001234-0.001580j,0.003142-0.000349j,-0.001154+0.001444j,-0.001672-0.002190j,0.001865+0.003037j,0.000432+0.000983j,0.003641-0.001645j,-0.003470+0.000019j,0.003474-0.002565j,-0.000074+0.005349j,0.001337-0.002328j,0.001783-0.003009j,0.001581+0.002247j,-0.005745-0.001551j,0.006262+0.000502j,0.003316-0.002202j,0.000395+0.001777j,-0.001784-0.004606j,-0.001050-0.000388j,-0.000615+0.002007j,0.000749-0.000252j,0.000931+0.001309j,-0.001020+0.000153j,-0.001159-0.000383j,0.000423+0.003901j,0.001099+0.001883j,0.002585-0.000673j,0.001473+0.000975j,0.003402-0.000780j,0.002369+0.000760j,0.000450+0.001359j,-0.003123-0.004880j,0.004382+0.000898j,-0.001683-0.002462j,0.001782+0.000936j,0.000166-0.003743j,0.000146+0.001258j
A30000C3,0.175443+0.407593j,-0.044406+0.057738j,-0.033658+0.018559j,-0.043898+0.022290j,0.030009-0.009461j,0.027941+0.014648j,-0.046713+0.008092j,0.003748+0.012818j,-0.023657-0.014442j,0.046384-0.013747j,-0.013095-0.013133j,0.021587-0.006357j,0.008911-0.021550j,0.044094-0.048270j,0.029605+0.017495j,-0.000518-0.105908j,0.153857-0.234660j,0.562847-0.222549j,1.118692+0.836332j,-2.607494+2.858697j,1.599095-2.345258j,-1.138622+0.442770j,0.296654-1.200721j,-0.097137+0.626235j,0.320428-0.368120j,-0.541860+0.371265j,0.083076+0.055032j,-0.468505-0.165885j,0.112439-0.270077j,-0.274769+0.052498j,0.040204-0.146661j,-0.257311-0.065205j,0.035019-0.435635j,0.138029-0.190262j,0.164149+0.045674j,-0.155004-0.297460j,0.738321-0.838297j,0.404725+0.809086j,-1.606853+0.790671j,5.037479-5.077748j,...,0.026351+0.002923j,-0.034137-0.001019j,-0.010549+0.016894j,-0.026222-0.005789j,-0.003771+0.013346j,-0.025131-0.011813j,-0.013349+0.020512j,-0.021232-0.001013j,-0.009299+0.008047j,-0.020980-0.000679j,-0.001254-0.000165j,-0.019947+0.003663j,-0.004730-0.000284j,-0.011724+0.002453j,-0.006409+0.004548j,-0.018005-0.012923j,-0.002885+0.009885j,-0.024039+0.001490j,-0.017516+0.016530j,0.002641+0.001189j,-0.040071-0.011666j,-0.015169+0.004094j,-0.002349-0.000014j,-0.031980+0.010515j,-0.003227-0.006271j,-0.015735+0.005170j,-0.009834+0.002800j,-0.014872-0.004311j,-0.015621+0.011554j,-0.016058+0.005308j,-0.014425+0.005449j,-0.004490-0.005368j,-0.023531-0.000403j,-0.005287-0.002847j,-0.018372+0.003009j,-0.022267+0.008082j,-0.000578-0.005822j,-0.020915+0.006952j,-0.011531-0.011500j,-0.015112+0.010182j
A30000L1,0.243987+0.025270j,0.022751+0.003757j,0.014529+0.021503j,-0.017870+0.031155j,-0.012017+0.014986j,-0.045830-0.055465j,0.001703+0.038018j,-0.016831+0.029059j,0.033343-0.001391j,0.024066-0.007180j,0.004597-0.010938j,0.013056+0.016434j,0.019139-0.017653j,0.027395-0.028828j,-0.051763-0.013306j,0.112559-0.074627j,-0.164154+0.112182j,0.030740-0.009726j,0.096613+0.038854j,0.173386+0.298399j,-0.096644-0.401796j,-0.179253-0.123330j,-0.837252+0.309272j,0.958825-0.313576j,-0.115018+0.369270j,0.117855-0.084245j,0.114437-0.135105j,-0.168785-0.055233j,0.023737+0.257922j,0.028901-0.191303j,-0.080174+0.048823j,-0.000564-0.190828j,0.085063+0.187177j,0.061854-0.157391j,-0.183014+0.120663j,0.013965-0.190259j,0.171683+0.359850j,-0.069560-0.395116j,-0.231829+0.435521j,0.216506-0.090978j,...,-0.002165-0.007526j,-0.001602+0.003068j,0.002368+0.002370j,-0.001981+0.004944j,-0.005146+0.001063j,0.006962+0.003174j,-0.002078-0.002189j,-0.002608+0.001704j,0.004656-0.000038j,-0.002600+0.000997j,0.005069-0.000926j,0.002904+0.003586j,-0.004428+0.003299j,-0.001707-0.003307j,-0.006277-0.003533j,-0.003018-0.001867j,0.000722+0.002227j,-0.003917-0.003532j,0.000518+0.006095j,0.000617+0.000871j,-0.001829-0.003473j,-0.006767+0.002230j,0.006508-0.000717j,-0.002572-0.001272j,-0.002928-0.006111j,-0.002175+0.002553j,-0.003765+0.004006j,0.003022-0.002387j,0.009693-0.001753j,-0.004339+0.001847j,0.009469+0.005298j,-0.000309-0.001058j,0.004705+0.004217j,0.001026-0.000355j,0.003306-0.001345j,-0.005454-0.005585j,0.001741-0.005557j,-0.002313-0.003053j,0.003034-0.000173j,0.001057+0.002508j
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
A30003I1,-0.268972+0.441171j,-0.002175+0.055914j,-0.002240+0.033797j,-0.001780+0.026387j,0.002618+0.015348j,-0.002824+0.014898j,-0.006080+0.009864j,-0.001379+0.011702j,-0.004532+0.010397j,0.001764+0.011872j,-0.002078+0.007558j,-0.005036-0.029902j,0.023552+0.091690j,-0.019524-0.028430j,-0.049743+0.013237j,0.081029-0.007362j,-0.098586+0.070061j,0.145762-0.069496j,-0.106769+0.067014j,0.095127-0.116260j,-0.112559+0.046706j,0.091135+0.036980j,-0.129797-0.006470j,0.076909+0.038745j,0.185542-0.098143j,-0.234932-0.013546j,0.109819+0.019497j,-0.015829-0.010308j,0.009601-0.148363j,-0.132467+0.097764j,-0.090234-0.023616j,0.084959+0.207632j,0.003473-0.044102j,0.035380+0.050691j,0.036931-0.107366j,-0.022786+0.074356j,0.120298-0.503975j,-0.373014+0.572291j,0.251252-0.166077j,-0.091539+0.000861j,...,-0.025305+0.100881j,0.070121-0.004062j,-0.156775+0.062273j,0.017867-0.145361j,0.131454+0.072468j,-0.073640+0.045459j,0.016540-0.005056j,-0.015477+0.025554j,-0.034472-0.041428j,-0.030754-0.064218j,0.048978-0.011576j,0.034466-0.044828j,-0.004383+0.077359j,-0.022973-0.031401j,0.057848+0.013395j,-0.046208+0.049227j,-0.032842+0.004510j,-0.002115-0.017191j,0.037026+0.009878j,-0.020375-0.018488j,0.049609+0.034015j,-0.042126+0.001041j,0.018040-0.027335j,0.012407+0.030904j,-0.037604+0.006252j,0.032601-0.041383j,0.005408+0.007688j,-0.018491+0.005654j,0.022591-0.007751j,-0.023429+0.017257j,0.003525-0.025533j,0.006949+0.008372j,-0.006086-0.000596j,-0.002967-0.001985j,0.017322+0.012692j,-0.024902-0.009360j,0.017794-0.014418j,-0.021780+0.005222j,0.024574-0.005603j,-0.015787+0.018403j
A30003C3,-0.541544-0.043584j,0.034901-0.009766j,0.041349+0.007993j,0.018765+0.015025j,0.025174+0.022106j,0.016822+0.006577j,0.032363+0.030744j,0.021048+0.013690j,0.029635+0.002742j,0.045341-0.061731j,0.373595+0.184240j,-0.575465+0.252893j,0.252498+0.125225j,-0.205643-0.227055j,-0.037835+0.468302j,-0.170200-0.055391j,-0.257199+0.220425j,-0.074616-0.005709j,-0.542190+0.098661j,-0.027199+0.227328j,-2.452218-0.722715j,0.388166-2.397658j,0.353908-0.452593j,2.165950-0.624981j,1.436626+2.138519j,-0.019311-1.470442j,0.278211+0.577640j,0.644936-0.000412j,-0.044571+0.674995j,0.819352+1.093881j,-0.056200-1.034769j,0.564424+3.743734j,-3.222213-1.599939j,1.414268+0.412432j,1.581694+1.796913j,-2.463174-4.476698j,2.494261+1.401195j,0.378127+0.985212j,-1.291130-0.071536j,0.689224+0.149185j,...,-0.163331-0.196154j,0.129370+0.017267j,-0.136991+0.141326j,-0.001484-0.329075j,0.247157+0.165235j,-0.274044+0.090244j,0.022438-0.068971j,-0.054112-0.032685j,0.172258-0.196463j,-0.018849+0.142442j,0.067335-0.037233j,-0.054703+0.119521j,0.021761-0.013626j,0.022340+0.040091j,-0.009390-0.071980j,0.082146-0.067309j,0.027338+0.100363j,-0.030122-0.096697j,0.127357+0.034766j,-0.047638-0.002404j,-0.034265+0.026651j,0.028272+0.038854j,0.017174-0.009209j,0.002065+0.020426j,0.035753-0.023894j,0.011572+0.034026j,-0.051526-0.045787j,0.074342-0.017780j,0.003906+0.018222j,-0.026775-0.009145j,0.046601-0.007986j,0.025008+0.006502j,-0.001140+0.034757j,0.015035-0.028427j,0.007074+0.013426j,-0.006159+0.012405j,0.026645-0.044310j,0.050049-0.007925j,-0.029584+0.003031j,0.035378-0.012651j
A30003S5,-0.647169-0.025829j,-0.069046-0.018855j,-0.088319+0.003079j,-0.079756-0.021250j,-0.073455-0.010036j,-0.089995-0.021579j,-0.076513-0.042713j,-0.081906-0.021349j,-0.071592-0.065812j,-0.058353-0.070366j,0.232710+0.044827j,-0.335314+0.302981j,-0.332278+0.016495j,-0.238686-0.336799j,-0.251847-0.176998j,-0.082012-0.060880j,-0.154175-0.166271j,-0.633550-0.207824j,-0.071785+0.141884j,-0.798767-1.061468j,-1.963547-2.133705j,6.397407-4.419145j,0.774490+5.963126j,-1.013279-0.192405j,-0.010235-0.719309j,-0.643919+1.269925j,0.638645-0.528687j,-0.073804+0.212035j,0.132618+0.295934j,0.007033+0.383505j,0.159273-0.259656j,-1.603023+0.349481j,3.351516-1.302981j,-1.788800+1.675897j,-0.351703-1.789110j,0.082984+0.552646j,-0.300386+0.438829j,1.131248-0.426142j,-0.589631+0.548149j,0.580639-0.713285j,...,-0.266823+0.140705j,0.035469-0.460266j,0.217528+0.343053j,-0.341853-0.055159j,0.174315+0.042843j,-0.053588-0.119220j,0.099479+0.087769j,-0.109979-0.110323j,-0.006381+0.155586j,-0.057192-0.124655j,0.012620-0.029775j,0.027475+0.086739j,-0.059275-0.019703j,0.051563+0.000044j,-0.109591+0.182904j,0.074499-0.214052j,-0.022824+0.065862j,-0.021122+0.047818j,-0.008977-0.075470j,-0.021710+0.068356j,0.056283-0.051352j,0.036916+0.095180j,-0.115318-0.027315j,0.030066-0.047685j,0.011802+0.002439j,-0.093656-0.003994j,0.015113+0.088693j,-0.055828-0.050472j,-0.020339+0.020943j,0.067043-0.035586j,-0.061625+0.042430j,0.001347+0.033873j,-0.058180-0.054181j,-0.000423+0.006033j,0.008057+0.017349j,-0.053742-0.023481j,0.002757+0.026633j,0.019650-0.025650j,0.014967-0.009494j,-0.041271+0.010727j
A30003W3,-0.265619+0.441993j,0.001794+0.052971j,-0.000765+0.034156j,0.001349+0.020623j,-0.003319+0.014896j,-0.000659+0.016547j,-0.004554+0.011045j,-0.000855+0.015469j,-0.000348+0.005168j,0.004621+0.010627j,-0.019334-0.037969j,0.001961+0.014056j,-0.004459+0.002702j,-0.052895+0.111641j,0.078563-0.093031j,0.004661+0.045382j,0.001318+0.016171j,-0.005689-0.030608j,0.005622+0.004339j,-0.022212-0.040132j,0.053255+0.047063j,-0.020744-0.305966j,-0.138175-0.046045j,0.244720+0.473459j,-0.007988-0.092602j,-0.099471-0.164599j,-0.143549+0.029223j,0.089276+0.297373j,-0.078957-0.273075j,0.074756+0.088127j,0.030160+0.011841j,-0.063425+0.008655j,0.390642-0.393423j,-0.656490+0.372465j,0.405530-0.246838j,0.104629+0.219640j,-0.143206+0.241230j,0.090459-0.334145j,-0.161476-0.058091j,-0.245329-0.147745j,...,-0.009716+0.024727j,-0.002421+0.013349j,-0.019526-0.038961j,0.020909+0.032127j,-0.024792+0.019564j,0.035081-0.055630j,0.008212-0.000098j,-0.033377-0.012455j,0.013455+0.012015j,-0.015778+0.005952j,0.008474-0.007688j,-0.015444+0.012871j,-0.018008-0.015688j,0.007230-0.004444j,0.006925+0.023898j,-0.015650-0.020667j,0.012580-0.004335j,-0.006867+0.017531j,0.011005+0.002551j,-0.010116-0.004462j,-0.005759-0.003698j,0.006687+0.002836j,-0.009070-0.008197j,-0.001691-0.014630j,0.003446+0.018322j,-0.003407-0.004499j,-0.006414-0.005807j,-0.013515+0.000646j,0.002484+0.018626j,-0.002658+0.000107j,0.008822-0.019102j,-0.016218+0.003057j,0.008486+0.014673j,0.002369-0.005781j,-0.000439-0.005693j,-0.005294+0.002578j,0.000713-0.008917j,-0.007015+0.005498j,0.001065-0.001677j,-0.005601-0.003776j


In [60]:
#make target
f = make_target('f.txt')
m = make_target('m.txt')

result = pd.DataFrame.from_dict(
    dct,
    orient='index',
    columns = [f'col_{i}' for i in range(352)]    
)
# columns
result = result.reset_index()
result.columns = ['id'] +  [f'col_{i}' for i in range(352)]  

#merge target&target by id
data = result.merge(f, on='id', how='left')
data.fillna(1, inplace= True)
#check correct
assert all(sorted(data[data.target == 0]['id'].values) == f.id.values) == True, 'merge not correct for woman'
assert all(sorted(data[data.target == 1]['id'].values) == m.id.values) == True, 'merge not correct for man'

In [62]:
result.head()

Unnamed: 0,id,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,col_11,col_12,col_13,col_14,col_15,col_16,col_17,col_18,col_19,col_20,col_21,col_22,col_23,col_24,col_25,col_26,col_27,col_28,col_29,col_30,col_31,col_32,col_33,col_34,col_35,col_36,col_37,col_38,...,col_312,col_313,col_314,col_315,col_316,col_317,col_318,col_319,col_320,col_321,col_322,col_323,col_324,col_325,col_326,col_327,col_328,col_329,col_330,col_331,col_332,col_333,col_334,col_335,col_336,col_337,col_338,col_339,col_340,col_341,col_342,col_343,col_344,col_345,col_346,col_347,col_348,col_349,col_350,col_351
0,A30000A2,0.608733-0.301638j,0.211616-0.353851j,0.052127-0.277182j,0.038498-0.185488j,-0.008882-0.158700j,-0.013668-0.118373j,0.006393-0.104132j,-0.019994-0.099283j,-0.021558-0.053559j,-0.001814-0.084998j,-0.010876-0.050701j,-0.018198-0.081591j,0.000682-0.056217j,-0.016977-0.090537j,-0.021160-0.058163j,-0.028615-0.047240j,0.025883+0.024417j,0.047539-0.218598j,-0.261646+0.052031j,0.029137-0.003626j,-0.067078-0.199672j,-0.308794+0.547466j,0.432776-0.581089j,0.155520-0.067445j,-0.741885+0.471972j,0.210501-0.224296j,0.021954-0.005984j,-0.083125-0.011786j,0.034002+0.024178j,-0.043001+0.028613j,0.023855-0.013924j,-0.010990-0.023863j,0.010490+0.005513j,-0.137620-0.114977j,-0.028034-0.015419j,-0.128635+0.112940j,-0.054013-0.130178j,0.099095+0.105139j,-0.062081-0.044671j,...,0.016468-0.007298j,0.009750+0.002339j,-0.011083+0.000323j,0.006570+0.003047j,0.007731+0.006294j,0.000930-0.000628j,0.007507-0.001061j,0.001487-0.005121j,0.002958+0.001231j,-0.001566+0.002394j,0.005000-0.003398j,0.002226+0.003773j,0.001790-0.001846j,-0.003101+0.000565j,0.006700-0.000099j,-0.001355+0.004428j,0.004891+0.000063j,-0.001753+0.000950j,0.004209+0.001656j,0.002078+0.001933j,0.002472-0.000622j,0.003935+0.001462j,0.000162+0.000460j,0.002193+0.002473j,0.006261+0.001958j,0.005018-0.001007j,0.000216-0.002107j,0.000548-0.000615j,-0.000327-0.000932j,0.003644+0.005288j,0.005405+0.000294j,0.004511-0.000132j,0.004950-0.001964j,-0.002255-0.002972j,0.000717+0.003885j,0.005937+0.001929j,0.003952-0.003714j,0.000701-0.001351j,0.000792-0.001152j,0.001419-0.000070j
1,A30000D3,0.223829-0.489295j,-0.004449+0.009514j,-0.020257+0.006756j,0.040592+0.014718j,0.021104-0.032440j,-0.040740-0.022084j,-0.005802-0.005613j,-0.021012-0.006900j,-0.022852-0.008865j,-0.016162+0.009751j,-0.015511+0.006446j,-0.018825-0.002130j,-0.024549+0.029529j,0.015027+0.020501j,-0.003884-0.001720j,0.026610-0.006195j,0.005097-0.021266j,-0.223110+0.216564j,0.404322-0.090144j,-0.122968-0.556266j,0.508957+0.424637j,0.099950-0.506191j,-1.528821+0.056254j,1.858820+0.552408j,-1.302970+0.076953j,0.121610+0.047811j,0.128881-0.027057j,0.037571-0.017267j,0.054890-0.018532j,-0.113391+0.029959j,0.107514-0.025216j,-0.051356+0.018763j,0.061173+0.003307j,0.020392-0.050842j,0.036249-0.022570j,-0.085959+0.203914j,0.093457-0.624836j,-0.320196+0.078430j,0.144801+0.483950j,...,0.000417+0.001253j,-0.004497+0.006029j,0.003132-0.001300j,-0.001003-0.001918j,0.002135+0.004131j,0.002156+0.001308j,-0.001054-0.000055j,0.005195-0.000770j,0.004149-0.004854j,-0.004758-0.004057j,-0.001093+0.000514j,-0.001985-0.001158j,0.002715-0.000494j,-0.003402-0.002722j,0.001412-0.000781j,-0.001697-0.002526j,-0.000251+0.000524j,-0.005371+0.002005j,-0.002451-0.003837j,-0.002761+0.003154j,0.001001+0.004606j,0.000201+0.000756j,0.004199-0.000427j,-0.005072-0.000761j,0.004478+0.002357j,-0.000271-0.004009j,0.002013-0.003175j,-0.007799+0.001479j,0.002562+0.003882j,-0.000307-0.003414j,0.000252+0.001897j,-0.001681-0.006526j,-0.004992+0.003392j,0.001953-0.000944j,-0.000756+0.004900j,-0.000291-0.004952j,0.001325+0.002739j,-0.003386-0.001056j,0.004058-0.003895j,-0.003248+0.003511j
2,A30000A1,-0.015923+0.014479j,-0.007111+0.007241j,0.016331+0.009544j,0.002326+0.013186j,0.058463-0.004888j,-0.017597-0.031444j,-0.017817+0.008572j,0.017739+0.005100j,0.017942-0.011622j,-0.000013-0.019041j,-0.012193-0.008176j,-0.016390-0.007706j,-0.009737+0.006700j,0.007668+0.026936j,0.002225-0.007544j,0.046502-0.006178j,-0.159088+0.064763j,0.267481-0.085004j,-0.146186+0.349871j,-0.011652+0.098723j,-0.040167-0.405733j,0.275212-0.208941j,-0.180232+0.400482j,-0.071587-0.194540j,0.142944-0.052747j,0.078670+0.009946j,-0.068931-0.028050j,0.068759-0.071168j,-0.201813+0.056206j,0.068275-0.088687j,-0.042007+0.096999j,0.075226+0.047220j,-0.096158-0.012006j,0.165772+0.064239j,-0.071677-0.130600j,-0.069861+0.079968j,0.031927-0.431263j,-0.264425+0.500026j,0.099084+0.180515j,...,-0.000375-0.004956j,-0.000867+0.002616j,-0.002974-0.000179j,0.001234-0.001580j,0.003142-0.000349j,-0.001154+0.001444j,-0.001672-0.002190j,0.001865+0.003037j,0.000432+0.000983j,0.003641-0.001645j,-0.003470+0.000019j,0.003474-0.002565j,-0.000074+0.005349j,0.001337-0.002328j,0.001783-0.003009j,0.001581+0.002247j,-0.005745-0.001551j,0.006262+0.000502j,0.003316-0.002202j,0.000395+0.001777j,-0.001784-0.004606j,-0.001050-0.000388j,-0.000615+0.002007j,0.000749-0.000252j,0.000931+0.001309j,-0.001020+0.000153j,-0.001159-0.000383j,0.000423+0.003901j,0.001099+0.001883j,0.002585-0.000673j,0.001473+0.000975j,0.003402-0.000780j,0.002369+0.000760j,0.000450+0.001359j,-0.003123-0.004880j,0.004382+0.000898j,-0.001683-0.002462j,0.001782+0.000936j,0.000166-0.003743j,0.000146+0.001258j
3,A30000C3,0.175443+0.407593j,-0.044406+0.057738j,-0.033658+0.018559j,-0.043898+0.022290j,0.030009-0.009461j,0.027941+0.014648j,-0.046713+0.008092j,0.003748+0.012818j,-0.023657-0.014442j,0.046384-0.013747j,-0.013095-0.013133j,0.021587-0.006357j,0.008911-0.021550j,0.044094-0.048270j,0.029605+0.017495j,-0.000518-0.105908j,0.153857-0.234660j,0.562847-0.222549j,1.118692+0.836332j,-2.607494+2.858697j,1.599095-2.345258j,-1.138622+0.442770j,0.296654-1.200721j,-0.097137+0.626235j,0.320428-0.368120j,-0.541860+0.371265j,0.083076+0.055032j,-0.468505-0.165885j,0.112439-0.270077j,-0.274769+0.052498j,0.040204-0.146661j,-0.257311-0.065205j,0.035019-0.435635j,0.138029-0.190262j,0.164149+0.045674j,-0.155004-0.297460j,0.738321-0.838297j,0.404725+0.809086j,-1.606853+0.790671j,...,0.026351+0.002923j,-0.034137-0.001019j,-0.010549+0.016894j,-0.026222-0.005789j,-0.003771+0.013346j,-0.025131-0.011813j,-0.013349+0.020512j,-0.021232-0.001013j,-0.009299+0.008047j,-0.020980-0.000679j,-0.001254-0.000165j,-0.019947+0.003663j,-0.004730-0.000284j,-0.011724+0.002453j,-0.006409+0.004548j,-0.018005-0.012923j,-0.002885+0.009885j,-0.024039+0.001490j,-0.017516+0.016530j,0.002641+0.001189j,-0.040071-0.011666j,-0.015169+0.004094j,-0.002349-0.000014j,-0.031980+0.010515j,-0.003227-0.006271j,-0.015735+0.005170j,-0.009834+0.002800j,-0.014872-0.004311j,-0.015621+0.011554j,-0.016058+0.005308j,-0.014425+0.005449j,-0.004490-0.005368j,-0.023531-0.000403j,-0.005287-0.002847j,-0.018372+0.003009j,-0.022267+0.008082j,-0.000578-0.005822j,-0.020915+0.006952j,-0.011531-0.011500j,-0.015112+0.010182j
4,A30000L1,0.243987+0.025270j,0.022751+0.003757j,0.014529+0.021503j,-0.017870+0.031155j,-0.012017+0.014986j,-0.045830-0.055465j,0.001703+0.038018j,-0.016831+0.029059j,0.033343-0.001391j,0.024066-0.007180j,0.004597-0.010938j,0.013056+0.016434j,0.019139-0.017653j,0.027395-0.028828j,-0.051763-0.013306j,0.112559-0.074627j,-0.164154+0.112182j,0.030740-0.009726j,0.096613+0.038854j,0.173386+0.298399j,-0.096644-0.401796j,-0.179253-0.123330j,-0.837252+0.309272j,0.958825-0.313576j,-0.115018+0.369270j,0.117855-0.084245j,0.114437-0.135105j,-0.168785-0.055233j,0.023737+0.257922j,0.028901-0.191303j,-0.080174+0.048823j,-0.000564-0.190828j,0.085063+0.187177j,0.061854-0.157391j,-0.183014+0.120663j,0.013965-0.190259j,0.171683+0.359850j,-0.069560-0.395116j,-0.231829+0.435521j,...,-0.002165-0.007526j,-0.001602+0.003068j,0.002368+0.002370j,-0.001981+0.004944j,-0.005146+0.001063j,0.006962+0.003174j,-0.002078-0.002189j,-0.002608+0.001704j,0.004656-0.000038j,-0.002600+0.000997j,0.005069-0.000926j,0.002904+0.003586j,-0.004428+0.003299j,-0.001707-0.003307j,-0.006277-0.003533j,-0.003018-0.001867j,0.000722+0.002227j,-0.003917-0.003532j,0.000518+0.006095j,0.000617+0.000871j,-0.001829-0.003473j,-0.006767+0.002230j,0.006508-0.000717j,-0.002572-0.001272j,-0.002928-0.006111j,-0.002175+0.002553j,-0.003765+0.004006j,0.003022-0.002387j,0.009693-0.001753j,-0.004339+0.001847j,0.009469+0.005298j,-0.000309-0.001058j,0.004705+0.004217j,0.001026-0.000355j,0.003306-0.001345j,-0.005454-0.005585j,0.001741-0.005557j,-0.002313-0.003053j,0.003034-0.000173j,0.001057+0.002508j


In [65]:
data = data.sample(frac=1).reset_index(drop=True)
y = data.target.values
X = data.drop(['id','target'], axis = 1).astype('float64')

  return arr.astype(dtype, copy=True)


In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=13)
clf = LogisticRegression(random_state=13).fit(X_train, y_train)
y_ = clf.predict_proba(X_test)[:, 1]
roc_auc_score(y_test, y_)

0.49870129870129876

In [68]:
model = xgb.XGBClassifier(
    n_estimators=1000,
    max_depth=4,
    learning_rate=0.05,
    verbosity=0,
    objective='binary:logistic',
    subsample=0.95,
    colsample_bytree=0.95,
    random_state=13,
    n_jobs=2,
    )
model.fit(X_train, y_train)
y_ = model.predict_proba(X_test)[:, 1]
roc_auc_score(y_test, y_)

0.9012987012987014