In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error

train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')

test['loss'] = np.nan
joined = pd.concat([train, test])

def logregobj(preds, dtrain):
    labels = dtrain.get_label()
    con = 2
    x =preds-labels
    grad =con*x / (np.abs(x)+con)
    hess =con**2 / (np.abs(x)+con)**2
    return grad, hess 

def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    return 'mae', mean_absolute_error(np.exp(preds), np.exp(labels))


cat_feature = [n for n in joined.columns if n.startswith('cat')]    
cont_feature = [n for n in joined.columns if n.startswith('cont')] 
             
               
if __name__ == '__main__':
    
    
    for column in cat_feature:
        joined[column] = pd.factorize(joined[column].values, sort=True)[0]

    
    train = joined[joined['loss'].notnull()]
    test = joined[joined['loss'].isnull()]

    shift = 200
    y = np.log(train['loss'] + shift)
    ids = test['id']
    X = train.drop(['loss', 'id'], 1)
    X_test = test.drop(['loss', 'id'], 1)
    
    #X = X.sample(frac=0.1)
    #y = y .iloc[X.index.values]
    
    n_folds  = 10
    kf = KFold(n_splits=n_folds)
    prediction = np.zeros(ids.shape)
    
    final_fold_prediction= []
    final_fold_real = []

    partial_evaluation = open('temp_scores.txt','w')
    #for i, (train_index, test_index) in enumerate(kf):
    i = 0
    for train_index, test_index in kf.split(X):
        print('\n Fold %d' % (i + 1))
        X_train, X_val = X.iloc[train_index], X.iloc[test_index]
        y_train, y_val = y.iloc[train_index], y.iloc[test_index]
        ++i
        
        RANDOM_STATE = 2016
        params = {
            'min_child_weight': 1,
            'eta': 0.001,
            'colsample_bytree': 0.5,
            'max_depth': 12,
            'subsample': 0.8,
            'alpha': 1,
            'gamma': 1,
            'silent': 1,
            'verbose_eval': True,
            'seed': RANDOM_STATE
        }
    
        xgtrain = xgb.DMatrix(X_train, label=y_train)
        xgtrain_2 = xgb.DMatrix(X_val, label=y_val)
        
        xgtest = xgb.DMatrix(X_test)   
        
        
        watchlist = [(xgtrain, 'train'), (xgtrain_2, 'eval')]
                     
        
                     
        model = xgb.train(params, xgtrain, 100000, watchlist, obj=logregobj, feval=evalerror, early_stopping_rounds=300)        
        prediction += np.exp(model.predict(xgtest)) - shift

        X_val = xgb.DMatrix(X_val) 
        temp_serises = pd.Series(np.exp(model.predict(X_val))-shift)
        final_fold_prediction.append( temp_serises )
        temp_serises = np.exp(y_val) -shift
        final_fold_real.append(temp_serises )
        
        temp_cv_score = mean_absolute_error(np.exp(model.predict(X_val))-shift, np.exp(y_val) -shift)
        
        partial_evalutaion.write('fold '+str(i)+' '+str(temp_cv_score)+'\n')
        partial_evalutaion.flush()
                

    
    prediction = prediction/n_folds
    submission = pd.DataFrame()
    submission['id'] = ids    
    submission['loss'] = prediction

    submission.to_csv('sub_v_5_long2.csv', index=False)
    
    final_fold_prediction = pd.concat(final_fold_prediction,ignore_index=True)
    final_fold_real = pd.concat(final_fold_real,ignore_index=True)
    
    cv_score = mean_absolute_error(final_fold_prediction, final_fold_real)
    print (cv_score)


 Fold 1
[0]	train-rmse:7.30281	eval-rmse:7.30362	train-mae:3234.8	eval-mae:3243.13
Multiple eval metrics have been passed: 'eval-mae' will be used for early stopping.

Will train until eval-mae hasn't improved in 300 rounds.
[1]	train-rmse:7.26965	eval-rmse:7.27045	train-mae:3234.74	eval-mae:3243.07
[2]	train-rmse:7.23671	eval-rmse:7.2375	train-mae:3234.68	eval-mae:3243.01
[3]	train-rmse:7.20395	eval-rmse:7.20474	train-mae:3234.62	eval-mae:3242.95
[4]	train-rmse:7.17144	eval-rmse:7.17223	train-mae:3234.56	eval-mae:3242.89
[5]	train-rmse:7.13933	eval-rmse:7.14012	train-mae:3234.5	eval-mae:3242.83
[6]	train-rmse:7.10735	eval-rmse:7.10814	train-mae:3234.43	eval-mae:3242.76
[7]	train-rmse:7.07569	eval-rmse:7.07649	train-mae:3234.36	eval-mae:3242.69
[8]	train-rmse:7.04429	eval-rmse:7.04508	train-mae:3234.3	eval-mae:3242.62
[9]	train-rmse:7.0131	eval-rmse:7.01388	train-mae:3234.23	eval-mae:3242.55
[10]	train-rmse:6.98226	eval-rmse:6.98306	train-mae:3234.16	eval-mae:3242.48
[11]	train-rmse:6

[106]	train-rmse:4.84992	eval-rmse:4.8504	train-mae:3216.22	eval-mae:3224.54
[107]	train-rmse:4.83393	eval-rmse:4.8344	train-mae:3215.89	eval-mae:3224.21
[108]	train-rmse:4.81809	eval-rmse:4.81856	train-mae:3215.55	eval-mae:3223.87
[109]	train-rmse:4.80233	eval-rmse:4.80279	train-mae:3215.21	eval-mae:3223.53
[110]	train-rmse:4.78663	eval-rmse:4.78709	train-mae:3214.87	eval-mae:3223.19
[111]	train-rmse:4.77102	eval-rmse:4.77148	train-mae:3214.52	eval-mae:3222.84
[112]	train-rmse:4.7555	eval-rmse:4.75595	train-mae:3214.17	eval-mae:3222.49
[113]	train-rmse:4.74005	eval-rmse:4.74049	train-mae:3213.81	eval-mae:3222.13
[114]	train-rmse:4.72472	eval-rmse:4.72515	train-mae:3213.46	eval-mae:3221.78
[115]	train-rmse:4.70947	eval-rmse:4.70989	train-mae:3213.09	eval-mae:3221.41
[116]	train-rmse:4.6943	eval-rmse:4.69472	train-mae:3212.73	eval-mae:3221.05
[117]	train-rmse:4.6792	eval-rmse:4.67962	train-mae:3212.36	eval-mae:3220.68
[118]	train-rmse:4.66422	eval-rmse:4.66464	train-mae:3211.99	eval-mae

[212]	train-rmse:3.55559	eval-rmse:3.55539	train-mae:3160.1	eval-mae:3168.35
[213]	train-rmse:3.54633	eval-rmse:3.54612	train-mae:3159.37	eval-mae:3167.62
[214]	train-rmse:3.5371	eval-rmse:3.53688	train-mae:3158.63	eval-mae:3166.88
[215]	train-rmse:3.52791	eval-rmse:3.52768	train-mae:3157.89	eval-mae:3166.14
[216]	train-rmse:3.51875	eval-rmse:3.51852	train-mae:3157.15	eval-mae:3165.39
[217]	train-rmse:3.50965	eval-rmse:3.50942	train-mae:3156.4	eval-mae:3164.64
[218]	train-rmse:3.50061	eval-rmse:3.50037	train-mae:3155.65	eval-mae:3163.9
[219]	train-rmse:3.49159	eval-rmse:3.49134	train-mae:3154.9	eval-mae:3163.14
[220]	train-rmse:3.4826	eval-rmse:3.48234	train-mae:3154.14	eval-mae:3162.38
[221]	train-rmse:3.47366	eval-rmse:3.4734	train-mae:3153.37	eval-mae:3161.61
[222]	train-rmse:3.46475	eval-rmse:3.46448	train-mae:3152.6	eval-mae:3160.84
[223]	train-rmse:3.45589	eval-rmse:3.45561	train-mae:3151.83	eval-mae:3160.07
[224]	train-rmse:3.44708	eval-rmse:3.4468	train-mae:3151.06	eval-mae:315

[318]	train-rmse:2.76293	eval-rmse:2.76222	train-mae:3062.97	eval-mae:3071.02
[319]	train-rmse:2.75693	eval-rmse:2.75622	train-mae:3061.88	eval-mae:3069.93
[320]	train-rmse:2.75096	eval-rmse:2.75024	train-mae:3060.79	eval-mae:3068.84
[321]	train-rmse:2.74501	eval-rmse:2.74429	train-mae:3059.7	eval-mae:3067.75
[322]	train-rmse:2.73908	eval-rmse:2.73835	train-mae:3058.6	eval-mae:3066.65
[323]	train-rmse:2.73318	eval-rmse:2.73245	train-mae:3057.51	eval-mae:3065.56
[324]	train-rmse:2.7273	eval-rmse:2.72657	train-mae:3056.41	eval-mae:3064.46
[325]	train-rmse:2.72144	eval-rmse:2.72069	train-mae:3055.31	eval-mae:3063.35
[326]	train-rmse:2.7156	eval-rmse:2.71484	train-mae:3054.21	eval-mae:3062.24
[327]	train-rmse:2.70978	eval-rmse:2.70904	train-mae:3053.1	eval-mae:3061.14
[328]	train-rmse:2.704	eval-rmse:2.70324	train-mae:3052	eval-mae:3060.03
[329]	train-rmse:2.69821	eval-rmse:2.69746	train-mae:3050.89	eval-mae:3058.92
[330]	train-rmse:2.69246	eval-rmse:2.6917	train-mae:3049.77	eval-mae:3057.

[424]	train-rmse:2.2326	eval-rmse:2.23161	train-mae:2934.6	eval-mae:2942.32
[425]	train-rmse:2.22845	eval-rmse:2.22745	train-mae:2933.28	eval-mae:2941
[426]	train-rmse:2.22431	eval-rmse:2.22331	train-mae:2931.96	eval-mae:2939.67
[427]	train-rmse:2.22018	eval-rmse:2.21918	train-mae:2930.63	eval-mae:2938.34
[428]	train-rmse:2.21607	eval-rmse:2.21507	train-mae:2929.31	eval-mae:2937.02
[429]	train-rmse:2.21197	eval-rmse:2.21096	train-mae:2927.98	eval-mae:2935.69
[430]	train-rmse:2.20788	eval-rmse:2.20688	train-mae:2926.66	eval-mae:2934.35
[431]	train-rmse:2.20381	eval-rmse:2.20279	train-mae:2925.33	eval-mae:2933.02
[432]	train-rmse:2.19974	eval-rmse:2.19873	train-mae:2924.01	eval-mae:2931.7
[433]	train-rmse:2.1957	eval-rmse:2.19468	train-mae:2922.67	eval-mae:2930.36
[434]	train-rmse:2.19167	eval-rmse:2.19065	train-mae:2921.34	eval-mae:2929.02
[435]	train-rmse:2.18765	eval-rmse:2.18662	train-mae:2920.01	eval-mae:2927.69
[436]	train-rmse:2.18363	eval-rmse:2.18261	train-mae:2918.67	eval-mae:2

[530]	train-rmse:1.8565	eval-rmse:1.85537	train-mae:2787.57	eval-mae:2794.86
[531]	train-rmse:1.85348	eval-rmse:1.85235	train-mae:2786.14	eval-mae:2793.42
[532]	train-rmse:1.85047	eval-rmse:1.84934	train-mae:2784.7	eval-mae:2791.97
[533]	train-rmse:1.84747	eval-rmse:1.84634	train-mae:2783.26	eval-mae:2790.52
[534]	train-rmse:1.84448	eval-rmse:1.84335	train-mae:2781.81	eval-mae:2789.07
[535]	train-rmse:1.84149	eval-rmse:1.84036	train-mae:2780.37	eval-mae:2787.63
[536]	train-rmse:1.83851	eval-rmse:1.83738	train-mae:2778.93	eval-mae:2786.18
[537]	train-rmse:1.83554	eval-rmse:1.83441	train-mae:2777.49	eval-mae:2784.73
[538]	train-rmse:1.83258	eval-rmse:1.83145	train-mae:2776.05	eval-mae:2783.28
[539]	train-rmse:1.82963	eval-rmse:1.82849	train-mae:2774.6	eval-mae:2781.83
[540]	train-rmse:1.82669	eval-rmse:1.82556	train-mae:2773.15	eval-mae:2780.38
[541]	train-rmse:1.82376	eval-rmse:1.82262	train-mae:2771.71	eval-mae:2778.93
[542]	train-rmse:1.82084	eval-rmse:1.8197	train-mae:2770.26	eval-ma

[636]	train-rmse:1.57844	eval-rmse:1.57733	train-mae:2632.52	eval-mae:2639.32
[637]	train-rmse:1.57616	eval-rmse:1.57506	train-mae:2631.04	eval-mae:2637.83
[638]	train-rmse:1.5739	eval-rmse:1.5728	train-mae:2629.57	eval-mae:2636.35
[639]	train-rmse:1.57164	eval-rmse:1.57054	train-mae:2628.1	eval-mae:2634.88
[640]	train-rmse:1.56939	eval-rmse:1.56828	train-mae:2626.62	eval-mae:2633.4
[641]	train-rmse:1.56714	eval-rmse:1.56604	train-mae:2625.15	eval-mae:2631.92
[642]	train-rmse:1.5649	eval-rmse:1.56379	train-mae:2623.67	eval-mae:2630.44
[643]	train-rmse:1.56266	eval-rmse:1.56155	train-mae:2622.19	eval-mae:2628.95
[644]	train-rmse:1.56043	eval-rmse:1.55933	train-mae:2620.72	eval-mae:2627.48
[645]	train-rmse:1.55821	eval-rmse:1.5571	train-mae:2619.25	eval-mae:2626
[646]	train-rmse:1.55599	eval-rmse:1.55488	train-mae:2617.77	eval-mae:2624.52
[647]	train-rmse:1.55377	eval-rmse:1.55266	train-mae:2616.29	eval-mae:2623.03
[648]	train-rmse:1.55156	eval-rmse:1.55046	train-mae:2614.81	eval-mae:262

[742]	train-rmse:1.36659	eval-rmse:1.36559	train-mae:2476.96	eval-mae:2483.28
[743]	train-rmse:1.36484	eval-rmse:1.36384	train-mae:2475.51	eval-mae:2481.82
[744]	train-rmse:1.36309	eval-rmse:1.36209	train-mae:2474.06	eval-mae:2480.37
[745]	train-rmse:1.36135	eval-rmse:1.36035	train-mae:2472.61	eval-mae:2478.92
[746]	train-rmse:1.35961	eval-rmse:1.35861	train-mae:2471.15	eval-mae:2477.47
[747]	train-rmse:1.35787	eval-rmse:1.35688	train-mae:2469.7	eval-mae:2476.01
[748]	train-rmse:1.35614	eval-rmse:1.35514	train-mae:2468.25	eval-mae:2474.56
[749]	train-rmse:1.35441	eval-rmse:1.35342	train-mae:2466.8	eval-mae:2473.1
[750]	train-rmse:1.35269	eval-rmse:1.35169	train-mae:2465.35	eval-mae:2471.65
[751]	train-rmse:1.35097	eval-rmse:1.34998	train-mae:2463.9	eval-mae:2470.2
[752]	train-rmse:1.34925	eval-rmse:1.34826	train-mae:2462.46	eval-mae:2468.76
[753]	train-rmse:1.34754	eval-rmse:1.34655	train-mae:2461.01	eval-mae:2467.31
[754]	train-rmse:1.34583	eval-rmse:1.34485	train-mae:2459.55	eval-mae

[848]	train-rmse:1.20148	eval-rmse:1.2007	train-mae:2326.11	eval-mae:2332.36
[849]	train-rmse:1.2001	eval-rmse:1.19933	train-mae:2324.72	eval-mae:2330.97
[850]	train-rmse:1.19872	eval-rmse:1.19795	train-mae:2323.33	eval-mae:2329.57
[851]	train-rmse:1.19735	eval-rmse:1.19659	train-mae:2321.94	eval-mae:2328.19
[852]	train-rmse:1.19598	eval-rmse:1.19522	train-mae:2320.56	eval-mae:2326.8
[853]	train-rmse:1.19462	eval-rmse:1.19386	train-mae:2319.17	eval-mae:2325.42
[854]	train-rmse:1.19325	eval-rmse:1.1925	train-mae:2317.78	eval-mae:2324.03
[855]	train-rmse:1.19189	eval-rmse:1.19114	train-mae:2316.4	eval-mae:2322.66
[856]	train-rmse:1.19053	eval-rmse:1.18978	train-mae:2315.02	eval-mae:2321.28
[857]	train-rmse:1.18918	eval-rmse:1.18843	train-mae:2313.64	eval-mae:2319.9
[858]	train-rmse:1.18783	eval-rmse:1.18709	train-mae:2312.26	eval-mae:2318.53
[859]	train-rmse:1.18648	eval-rmse:1.18574	train-mae:2310.87	eval-mae:2317.15
[860]	train-rmse:1.18514	eval-rmse:1.1844	train-mae:2309.5	eval-mae:23

[954]	train-rmse:1.07056	eval-rmse:1.07014	train-mae:2183.69	eval-mae:2190.43
[955]	train-rmse:1.06946	eval-rmse:1.06904	train-mae:2182.39	eval-mae:2189.13
[956]	train-rmse:1.06836	eval-rmse:1.06794	train-mae:2181.1	eval-mae:2187.85
[957]	train-rmse:1.06726	eval-rmse:1.06684	train-mae:2179.81	eval-mae:2186.56
[958]	train-rmse:1.06616	eval-rmse:1.06575	train-mae:2178.52	eval-mae:2185.27
[959]	train-rmse:1.06507	eval-rmse:1.06466	train-mae:2177.23	eval-mae:2183.99
[960]	train-rmse:1.06398	eval-rmse:1.06358	train-mae:2175.94	eval-mae:2182.7
[961]	train-rmse:1.06289	eval-rmse:1.06249	train-mae:2174.64	eval-mae:2181.42
[962]	train-rmse:1.0618	eval-rmse:1.06141	train-mae:2173.35	eval-mae:2180.13
[963]	train-rmse:1.06071	eval-rmse:1.06032	train-mae:2172.06	eval-mae:2178.84
[964]	train-rmse:1.05963	eval-rmse:1.05925	train-mae:2170.78	eval-mae:2177.57
[965]	train-rmse:1.05855	eval-rmse:1.05817	train-mae:2169.49	eval-mae:2176.28
[966]	train-rmse:1.05747	eval-rmse:1.0571	train-mae:2168.21	eval-ma

[1058]	train-rmse:0.967009	eval-rmse:0.967011	train-mae:2054.28	eval-mae:2061.57
[1059]	train-rmse:0.966113	eval-rmse:0.966119	train-mae:2053.08	eval-mae:2060.38
[1060]	train-rmse:0.965219	eval-rmse:0.965229	train-mae:2051.89	eval-mae:2059.19
[1061]	train-rmse:0.964326	eval-rmse:0.964338	train-mae:2050.7	eval-mae:2058.01
[1062]	train-rmse:0.963436	eval-rmse:0.963453	train-mae:2049.52	eval-mae:2056.83
[1063]	train-rmse:0.962546	eval-rmse:0.962569	train-mae:2048.34	eval-mae:2055.66
[1064]	train-rmse:0.96166	eval-rmse:0.961687	train-mae:2047.15	eval-mae:2054.48
[1065]	train-rmse:0.960776	eval-rmse:0.960807	train-mae:2045.97	eval-mae:2053.3
[1066]	train-rmse:0.959895	eval-rmse:0.959929	train-mae:2044.8	eval-mae:2052.13
[1067]	train-rmse:0.959012	eval-rmse:0.959053	train-mae:2043.61	eval-mae:2050.95
[1068]	train-rmse:0.958132	eval-rmse:0.958179	train-mae:2042.43	eval-mae:2049.78
[1069]	train-rmse:0.957252	eval-rmse:0.957302	train-mae:2041.25	eval-mae:2048.6
[1070]	train-rmse:0.956374	eval-r

[1160]	train-rmse:0.883919	eval-rmse:0.884419	train-mae:1938.4	eval-mae:1946.07
[1161]	train-rmse:0.883185	eval-rmse:0.883689	train-mae:1937.33	eval-mae:1945
[1162]	train-rmse:0.882445	eval-rmse:0.882956	train-mae:1936.24	eval-mae:1943.92
[1163]	train-rmse:0.881709	eval-rmse:0.882226	train-mae:1935.16	eval-mae:1942.85
[1164]	train-rmse:0.880974	eval-rmse:0.881497	train-mae:1934.09	eval-mae:1941.78
[1165]	train-rmse:0.88024	eval-rmse:0.880769	train-mae:1933.01	eval-mae:1940.7
[1166]	train-rmse:0.879511	eval-rmse:0.880043	train-mae:1931.94	eval-mae:1939.64
[1167]	train-rmse:0.878781	eval-rmse:0.879319	train-mae:1930.87	eval-mae:1938.56
[1168]	train-rmse:0.878052	eval-rmse:0.878595	train-mae:1929.8	eval-mae:1937.5
[1169]	train-rmse:0.877323	eval-rmse:0.87787	train-mae:1928.72	eval-mae:1936.42
[1170]	train-rmse:0.876593	eval-rmse:0.877145	train-mae:1927.64	eval-mae:1935.34
[1171]	train-rmse:0.87587	eval-rmse:0.876426	train-mae:1926.58	eval-mae:1934.27
[1172]	train-rmse:0.875147	eval-rmse:0

[1262]	train-rmse:0.815239	eval-rmse:0.816319	train-mae:1833.77	eval-mae:1841.71
[1263]	train-rmse:0.814627	eval-rmse:0.815713	train-mae:1832.8	eval-mae:1840.74
[1264]	train-rmse:0.814015	eval-rmse:0.815108	train-mae:1831.83	eval-mae:1839.77
[1265]	train-rmse:0.813406	eval-rmse:0.814504	train-mae:1830.86	eval-mae:1838.8
[1266]	train-rmse:0.812797	eval-rmse:0.813901	train-mae:1829.89	eval-mae:1837.83
[1267]	train-rmse:0.812191	eval-rmse:0.8133	train-mae:1828.92	eval-mae:1836.87
[1268]	train-rmse:0.811585	eval-rmse:0.8127	train-mae:1827.95	eval-mae:1835.91
[1269]	train-rmse:0.81098	eval-rmse:0.8121	train-mae:1826.99	eval-mae:1834.94
[1270]	train-rmse:0.810375	eval-rmse:0.811502	train-mae:1826.02	eval-mae:1833.98
[1271]	train-rmse:0.809773	eval-rmse:0.810906	train-mae:1825.07	eval-mae:1833.03
[1272]	train-rmse:0.809172	eval-rmse:0.810312	train-mae:1824.11	eval-mae:1832.07
[1273]	train-rmse:0.808574	eval-rmse:0.80972	train-mae:1823.16	eval-mae:1831.12
[1274]	train-rmse:0.807975	eval-rmse:0