In [2]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import nltk.corpus
from nltk.stem.snowball import SnowballStemmer
from sklearn.cross_validation import train_test_split
from ml_metrics import rmsle
import xgboost as xgb
import math
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import datasets, linear_model
import scipy.sparse as sps
from scipy.sparse import coo_matrix, hstack, vstack, csr_matrix
from scipy import io
from datetime import datetime
import gc
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
def evalerror(preds, dtrain):

    labels = dtrain.get_label()
    assert len(preds) == len(labels)
    labels = labels.tolist()
    preds = preds.tolist()
    terms_to_sum = [(math.log(labels[i] + 1) - math.log(max(0,preds[i]) + 1))
                    ** 2.0 for i,pred in enumerate(labels)]
    return 'error', (sum(terms_to_sum) * (1.0/len(preds))) ** 0.5

In [4]:
train = pd.read_csv('../input/train.csv', usecols=['Semana', 'Cliente_ID', 'Producto_ID', 'Demanda_uni_equil'])
test = pd.read_csv('../input/test.csv', usecols=['id', 'Semana', 'Cliente_ID', 'Producto_ID'])
train.tail()

Unnamed: 0,Semana,Cliente_ID,Producto_ID,Demanda_uni_equil
74180459,9,4528866,32873,4
74180460,9,4528866,34226,4
74180461,9,4528866,45112,4
74180462,9,4547943,40217,0
74180463,9,4708097,43159,1


In [5]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74180464 entries, 0 to 74180463
Data columns (total 4 columns):
Semana               int64
Cliente_ID           int64
Producto_ID          int64
Demanda_uni_equil    int64
dtypes: int64(4)
memory usage: 2.2 GB


In [6]:
test.head()

Unnamed: 0,id,Semana,Cliente_ID,Producto_ID
0,0,11,4639078,35305
1,1,11,4705135,1238
2,2,10,4549769,32940
3,3,11,4717855,43066
4,4,11,966351,1277


In [7]:
cliente_demand_total = train.groupby('Cliente_ID', as_index=False)['Demanda_uni_equil'].sum()
cliente_demand_total['total_demand']=cliente_demand_total['Demanda_uni_equil']
cliente_demand_total=cliente_demand_total.drop('Demanda_uni_equil', axis=1)
cliente_demand_total.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 880604 entries, 0 to 880603
Data columns (total 2 columns):
Cliente_ID      880604 non-null int64
total_demand    880604 non-null int64
dtypes: int64(2)
memory usage: 20.2 MB


In [8]:
cliente_demand_total.head()

Unnamed: 0,Cliente_ID,total_demand
0,26,6378
1,60,46830
2,65,68735
3,101,1224
4,105,17373


In [9]:
product_demand_total = train.groupby('Producto_ID', as_index=False)['Demanda_uni_equil'].sum()
product_demand_total['p_total_demand']=product_demand_total['Demanda_uni_equil']
product_demand_total=product_demand_total.drop('Demanda_uni_equil', axis=1)
product_demand_total.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1799 entries, 0 to 1798
Data columns (total 2 columns):
Producto_ID       1799 non-null int64
p_total_demand    1799 non-null int64
dtypes: int64(2)
memory usage: 42.2 KB


In [10]:
product_demand_total.head()

Unnamed: 0,Producto_ID,p_total_demand
0,41,22414
1,53,33185
2,72,715843
3,73,698935
4,100,1093


In [11]:
grouping = ['Semana', 'Cliente_ID', 'Producto_ID']
train_group = train.groupby(grouping,as_index=False)['Demanda_uni_equil'].sum()

In [12]:
train_group.tail()

Unnamed: 0,Semana,Cliente_ID,Producto_ID,Demanda_uni_equil
74013018,9,2015152015,1278,20
74013019,9,2015152015,2233,3
74013020,9,2015152015,2665,10
74013021,9,2015152015,4280,8
74013022,9,2015152015,31717,3


In [13]:
train_group.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74013023 entries, 0 to 74013022
Data columns (total 4 columns):
Semana               int64
Cliente_ID           int64
Producto_ID          int64
Demanda_uni_equil    int64
dtypes: int64(4)
memory usage: 2.8 GB


In [14]:
train_group.head()

Unnamed: 0,Semana,Cliente_ID,Producto_ID,Demanda_uni_equil
0,3,26,1182,39
1,3,26,4767,42
2,3,26,31393,20
3,3,26,31690,42
4,3,26,32962,3


In [15]:
train_cliprod = train_group
train_cliprod["cliprod"] = train_cliprod["Cliente_ID"].map(str) + ' ' + train_cliprod["Producto_ID"].map(str)

In [16]:
test_cliprod = test
test_cliprod["cliprod"] = test_cliprod["Cliente_ID"].map(str) + ' ' + test_cliprod["Producto_ID"].map(str)

In [17]:
train_cliprod.head()

Unnamed: 0,Semana,Cliente_ID,Producto_ID,Demanda_uni_equil,cliprod
0,3,26,1182,39,26 1182
1,3,26,4767,42,26 4767
2,3,26,31393,20,26 31393
3,3,26,31690,42,26 31690
4,3,26,32962,3,26 32962


In [18]:
test_cliprod.head()

Unnamed: 0,id,Semana,Cliente_ID,Producto_ID,cliprod
0,0,11,4639078,35305,4639078 35305
1,1,11,4705135,1238,4705135 1238
2,2,10,4549769,32940,4549769 32940
3,3,11,4717855,43066,4717855 43066
4,4,11,966351,1277,966351 1277


In [19]:
grouping = ['Semana', 'cliprod']
train_cliprod = train_cliprod.groupby(grouping,as_index=False)['Demanda_uni_equil'].sum()


In [20]:
train_cliprod.head()

Unnamed: 0,Semana,cliprod,Demanda_uni_equil
0,3,100000 43274,1
1,3,1000001 1220,2
2,3,1000001 1240,11
3,3,1000001 1242,2
4,3,1000001 1250,5


In [21]:
train_cliprod.tail()

Unnamed: 0,Semana,cliprod,Demanda_uni_equil
74013018,9,9999999 43066,1
74013019,9,9999999 43069,23
74013020,9,9999999 43084,1
74013021,9,9999999 43274,11
74013022,9,9999999 43285,45


In [22]:
s3 = train_cliprod[train_cliprod.Semana == 3].drop('Semana', axis=1).set_index('cliprod')
s4 = train_cliprod[train_cliprod.Semana == 4].drop('Semana', axis=1).set_index('cliprod')
s5 = train_cliprod[train_cliprod.Semana == 5].drop('Semana', axis=1).set_index('cliprod')
s6 = train_cliprod[train_cliprod.Semana == 6].drop('Semana', axis=1).set_index('cliprod')
s7 = train_cliprod[train_cliprod.Semana == 7].drop('Semana', axis=1).set_index('cliprod')
s8 = train_cliprod[train_cliprod.Semana == 8].drop('Semana', axis=1).set_index('cliprod')
s9 = train_cliprod[train_cliprod.Semana == 9].drop('Semana', axis=1).set_index('cliprod')

t10 = test_cliprod[test_cliprod.Semana == 10].drop('Semana', axis=1).set_index('cliprod')
t11 = test_cliprod[test_cliprod.Semana == 11].drop('Semana', axis=1).set_index('cliprod')

In [23]:
s3.head()

Unnamed: 0_level_0,Demanda_uni_equil
cliprod,Unnamed: 1_level_1
100000 43274,1
1000001 1220,2
1000001 1240,11
1000001 1242,2
1000001 1250,5


In [24]:
t10.head()

Unnamed: 0_level_0,id,Cliente_ID,Producto_ID
cliprod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4549769 32940,2,4549769,32940
4414012 35305,7,4414012,35305
397854 1240,8,397854,1240
4387996 2233,11,4387996,2233
4446449 1240,13,4446449,1240


In [25]:
s4["d1"]=s3['Demanda_uni_equil']
s5["d1"]=s4['Demanda_uni_equil']
s6["d1"]=s5['Demanda_uni_equil']
s7["d1"]=s6['Demanda_uni_equil']
s8["d1"]=s7['Demanda_uni_equil']
s9["d1"]=s8['Demanda_uni_equil']

t10["d1"]=s9['Demanda_uni_equil']

s5["d2"]=s3['Demanda_uni_equil']
s6["d2"]=s4['Demanda_uni_equil']
s7["d2"]=s5['Demanda_uni_equil']
s8["d2"]=s6['Demanda_uni_equil']
s9["d2"]=s7['Demanda_uni_equil']

t10["d2"]=s8['Demanda_uni_equil']
t11["d2"]=s9['Demanda_uni_equil']

s6["d3"]=s3['Demanda_uni_equil']
s7["d3"]=s4['Demanda_uni_equil']
s8["d3"]=s5['Demanda_uni_equil']
s9["d3"]=s6['Demanda_uni_equil']

t10["d3"]=s7['Demanda_uni_equil']
t11["d3"]=s8['Demanda_uni_equil']

s7["d4"]=s3['Demanda_uni_equil']
s8["d4"]=s4['Demanda_uni_equil']
s9["d4"]=s5['Demanda_uni_equil']

t10["d4"]=s6['Demanda_uni_equil']
t11["d4"]=s7['Demanda_uni_equil']

s8["d5"]=s3['Demanda_uni_equil']
s9["d5"]=s4['Demanda_uni_equil']

t10["d5"]=s5['Demanda_uni_equil']
t11["d5"]=s6['Demanda_uni_equil']

s9["d6"]=s3['Demanda_uni_equil']

t10["d6"]=s4['Demanda_uni_equil']
t11["d6"]=s5['Demanda_uni_equil']

In [26]:
s3.fillna(value=0, inplace=True)
s4.fillna(value=0, inplace=True)
s5.fillna(value=0, inplace=True)
s6.fillna(value=0, inplace=True)
s7.fillna(value=0, inplace=True)
s8.fillna(value=0, inplace=True)
s9.fillna(value=0, inplace=True)

t10.fillna(value=0, inplace=True)
t11.fillna(value=0, inplace=True)




In [27]:
s3["d1"]=np.nan

t11["d1"]=np.nan


s3["d2"]=np.nan
s4["d2"]=np.nan



s3["d3"]=np.nan
s4["d3"]=np.nan
s5["d3"]=np.nan



s3["d4"]=np.nan
s4["d4"]=np.nan
s5["d4"]=np.nan
s6["d4"]=np.nan


s3["d5"]=np.nan
s4["d5"]=np.nan
s5["d5"]=np.nan
s6["d5"]=np.nan
s7["d5"]=np.nan



s3["d6"]=np.nan
s4["d6"]=np.nan
s5["d6"]=np.nan
s6["d6"]=np.nan
s7["d6"]=np.nan
s8["d6"]=np.nan



In [28]:
s3.head()

Unnamed: 0_level_0,Demanda_uni_equil,d1,d2,d3,d4,d5,d6
cliprod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
100000 43274,1,,,,,,
1000001 1220,2,,,,,,
1000001 1240,11,,,,,,
1000001 1242,2,,,,,,
1000001 1250,5,,,,,,


In [29]:
t10.head()

Unnamed: 0_level_0,id,Cliente_ID,Producto_ID,d1,d2,d3,d4,d5,d6
cliprod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4549769 32940,2,4549769,32940,2.0,2.0,0.0,0.0,0.0,0.0
4414012 35305,7,4414012,35305,0.0,0.0,0.0,0.0,0.0,0.0
397854 1240,8,397854,1240,3.0,4.0,0.0,0.0,10.0,6.0
4387996 2233,11,4387996,2233,2.0,2.0,2.0,2.0,2.0,2.0
4446449 1240,13,4446449,1240,4.0,4.0,4.0,10.0,0.0,29.0


In [30]:
s3 = s3.reset_index()
s3.head()

Unnamed: 0,cliprod,Demanda_uni_equil,d1,d2,d3,d4,d5,d6
0,100000 43274,1,,,,,,
1,1000001 1220,2,,,,,,
2,1000001 1240,11,,,,,,
3,1000001 1242,2,,,,,,
4,1000001 1250,5,,,,,,


In [31]:
s3['Cliente_ID'], s3['Producto_ID'] = zip(*s3['cliprod'].apply(lambda x: x.split(' ', 1)))

In [32]:
s3.head()

Unnamed: 0,cliprod,Demanda_uni_equil,d1,d2,d3,d4,d5,d6,Cliente_ID,Producto_ID
0,100000 43274,1,,,,,,,100000,43274
1,1000001 1220,2,,,,,,,1000001,1220
2,1000001 1240,11,,,,,,,1000001,1240
3,1000001 1242,2,,,,,,,1000001,1242
4,1000001 1250,5,,,,,,,1000001,1250


In [33]:
s3 = s3.drop(['cliprod'], axis=1)
s3.head()

Unnamed: 0,Demanda_uni_equil,d1,d2,d3,d4,d5,d6,Cliente_ID,Producto_ID
0,1,,,,,,,100000,43274
1,2,,,,,,,1000001,1220
2,11,,,,,,,1000001,1240
3,2,,,,,,,1000001,1242
4,5,,,,,,,1000001,1250


In [34]:
s4 = s4.reset_index()
s4['Cliente_ID'], s4['Producto_ID'] = zip(*s4['cliprod'].apply(lambda x: x.split(' ', 1)))
s4 = s4.drop(['cliprod'], axis=1)

s5 = s5.reset_index()
s5['Cliente_ID'], s5['Producto_ID'] = zip(*s5['cliprod'].apply(lambda x: x.split(' ', 1)))
s5 = s5.drop(['cliprod'], axis=1)

s6 = s6.reset_index()
s6['Cliente_ID'], s6['Producto_ID'] = zip(*s6['cliprod'].apply(lambda x: x.split(' ', 1)))
s6 = s6.drop(['cliprod'], axis=1)

s7 = s7.reset_index()
s7['Cliente_ID'], s7['Producto_ID'] = zip(*s7['cliprod'].apply(lambda x: x.split(' ', 1)))
s7 = s7.drop(['cliprod'], axis=1)

s8 = s8.reset_index()
s8['Cliente_ID'], s8['Producto_ID'] = zip(*s8['cliprod'].apply(lambda x: x.split(' ', 1)))
s8 = s8.drop(['cliprod'], axis=1)

s9 = s9.reset_index()
s9['Cliente_ID'], s9['Producto_ID'] = zip(*s9['cliprod'].apply(lambda x: x.split(' ', 1)))
s9 = s9.drop(['cliprod'], axis=1)


t10 = t10.reset_index()
t10['Cliente_ID'], t10['Producto_ID'] = zip(*t10['cliprod'].apply(lambda x: x.split(' ', 1)))
t10 = t10.drop(['cliprod'], axis=1)

t11 = t11.reset_index()
t11['Cliente_ID'], t11['Producto_ID'] = zip(*t11['cliprod'].apply(lambda x: x.split(' ', 1)))
t11 = t11.drop(['cliprod'], axis=1)

In [35]:
t10.head()

Unnamed: 0,id,Cliente_ID,Producto_ID,d1,d2,d3,d4,d5,d6
0,2,4549769,32940,2.0,2.0,0.0,0.0,0.0,0.0
1,7,4414012,35305,0.0,0.0,0.0,0.0,0.0,0.0
2,8,397854,1240,3.0,4.0,0.0,0.0,10.0,6.0
3,11,4387996,2233,2.0,2.0,2.0,2.0,2.0,2.0
4,13,4446449,1240,4.0,4.0,4.0,10.0,0.0,29.0


In [36]:
train_joined = pd.concat([s3, s4, s5, s6, s7, s8, s9], ignore_index = True)
test_joined = pd.concat([t10, t11], ignore_index = True)
del s3, s4, s5, s6, s7, s8, s9, t10, t11
gc.collect()

48

In [37]:
train_joined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74013023 entries, 0 to 74013022
Data columns (total 9 columns):
Demanda_uni_equil    int64
d1                   float64
d2                   float64
d3                   float64
d4                   float64
d5                   float64
d6                   float64
Cliente_ID           object
Producto_ID          object
dtypes: float64(6), int64(1), object(2)
memory usage: 5.0+ GB


In [38]:
train_joined.head()

Unnamed: 0,Demanda_uni_equil,d1,d2,d3,d4,d5,d6,Cliente_ID,Producto_ID
0,1,,,,,,,100000,43274
1,2,,,,,,,1000001,1220
2,11,,,,,,,1000001,1240
3,2,,,,,,,1000001,1242
4,5,,,,,,,1000001,1250


In [39]:
train_joined[['Cliente_ID', 'Producto_ID']]=train_joined[['Cliente_ID', 'Producto_ID']].astype('int32')
test_joined[['Cliente_ID', 'Producto_ID']]=test_joined[['Cliente_ID', 'Producto_ID']].astype('int32')

In [40]:
train_joined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74013023 entries, 0 to 74013022
Data columns (total 9 columns):
Demanda_uni_equil    int64
d1                   float64
d2                   float64
d3                   float64
d4                   float64
d5                   float64
d6                   float64
Cliente_ID           int32
Producto_ID          int32
dtypes: float64(6), int32(2), int64(1)
memory usage: 4.4 GB


In [41]:
train_joined = pd.merge(train_joined, product_demand_total, on='Producto_ID')

In [42]:
train_joined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74013023 entries, 0 to 74013022
Data columns (total 10 columns):
Demanda_uni_equil    int64
d1                   float64
d2                   float64
d3                   float64
d4                   float64
d5                   float64
d6                   float64
Cliente_ID           int32
Producto_ID          int32
p_total_demand       int64
dtypes: float64(6), int32(2), int64(2)
memory usage: 5.5 GB


In [43]:
test_joined = pd.merge(test_joined, product_demand_total, on='Producto_ID')

In [44]:
test_joined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6973611 entries, 0 to 6973610
Data columns (total 10 columns):
Cliente_ID        int32
Producto_ID       int32
d1                float64
d2                float64
d3                float64
d4                float64
d5                float64
d6                float64
id                int64
p_total_demand    int64
dtypes: float64(6), int32(2), int64(2)
memory usage: 532.0 MB


In [45]:
train_joined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74013023 entries, 0 to 74013022
Data columns (total 10 columns):
Demanda_uni_equil    int64
d1                   float64
d2                   float64
d3                   float64
d4                   float64
d5                   float64
d6                   float64
Cliente_ID           int32
Producto_ID          int32
p_total_demand       int64
dtypes: float64(6), int32(2), int64(2)
memory usage: 5.5 GB


In [46]:
cliente_demand_total.info()
cliente_demand_total.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 880604 entries, 0 to 880603
Data columns (total 2 columns):
Cliente_ID      880604 non-null int64
total_demand    880604 non-null int64
dtypes: int64(2)
memory usage: 20.2 MB


Unnamed: 0,Cliente_ID,total_demand
0,26,6378
1,60,46830
2,65,68735
3,101,1224
4,105,17373


In [47]:
#train_joined = train_joined.join(cliente_demand_total, on='Cliente_ID', lsuffix='_t', ignore_index=True)

train_joined = pd.merge(train_joined, cliente_demand_total, on='Cliente_ID')

In [48]:
train_joined.head()

Unnamed: 0,Demanda_uni_equil,d1,d2,d3,d4,d5,d6,Cliente_ID,Producto_ID,p_total_demand,total_demand
0,1,,,,,,,100000,43274,1639486,112
1,2,1.0,,,,,,100000,43274,1639486,112
2,2,2.0,1.0,,,,,100000,43274,1639486,112
3,2,0.0,0.0,2.0,2.0,1.0,,100000,43274,1639486,112
4,3,0.0,0.0,,,,,100000,30572,7229285,112


In [49]:
train_joined.head()

Unnamed: 0,Demanda_uni_equil,d1,d2,d3,d4,d5,d6,Cliente_ID,Producto_ID,p_total_demand,total_demand
0,1,,,,,,,100000,43274,1639486,112
1,2,1.0,,,,,,100000,43274,1639486,112
2,2,2.0,1.0,,,,,100000,43274,1639486,112
3,2,0.0,0.0,2.0,2.0,1.0,,100000,43274,1639486,112
4,3,0.0,0.0,,,,,100000,30572,7229285,112


In [50]:
train_joined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74013023 entries, 0 to 74013022
Data columns (total 11 columns):
Demanda_uni_equil    int64
d1                   float64
d2                   float64
d3                   float64
d4                   float64
d5                   float64
d6                   float64
Cliente_ID           int32
Producto_ID          int32
p_total_demand       int64
total_demand         int64
dtypes: float64(6), int32(2), int64(3)
memory usage: 6.1 GB


In [51]:
test_joined = pd.merge(test_joined, cliente_demand_total, on='Cliente_ID')

In [52]:
test_joined.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6943675 entries, 0 to 6943674
Data columns (total 11 columns):
Cliente_ID        int32
Producto_ID       int32
d1                float64
d2                float64
d3                float64
d4                float64
d5                float64
d6                float64
id                int64
p_total_demand    int64
total_demand      int64
dtypes: float64(6), int32(2), int64(3)
memory usage: 582.7 MB


In [53]:
train_joined.fillna(value=-999, inplace=True)
test_joined.fillna(value=-999, inplace=True)

In [54]:
train_joined.to_csv('../input/train_alld_totals.csv', index=False)
test_joined.to_csv('../input/test_alld_totals.csv', index=False)

In [55]:
del test, test_cliprod, train, train_cliprod, train_group
gc.collect()

787

train_joined = train_joined.astype('in64')
test_joined = test_joined.astype('int64')
train_joined.head()

test_joined.to_csv('../input/test_improved_01.csv')
train_joined.to_csv('../input/train_improved_01.csv')

In [56]:
ids = test_joined['id']
test_joined = test_joined.drop(['id'],axis = 1)

In [57]:
y = train_joined['Demanda_uni_equil']
X = train_joined[test_joined.columns.values]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1729)

print ('Division_Set_Shapes:', X.shape, y.shape)
print ('Validation_Set_Shapes:', X_train.shape, X_test.shape)

('Division_Set_Shapes:', (74013023, 10), (74013023,))
('Validation_Set_Shapes:', (59210418, 10), (14802605, 10))


In [58]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59210418 entries, 28817307 to 38733043
Data columns (total 10 columns):
Cliente_ID        int32
Producto_ID       int32
d1                float64
d2                float64
d3                float64
d4                float64
d5                float64
d6                float64
p_total_demand    int64
total_demand      int64
dtypes: float64(6), int32(2), int64(2)
memory usage: 4.4 GB


In [59]:
params = {}
params['objective'] = "reg:linear"
params['eta'] = 0.025
params['max_depth'] = 10
params['subsample'] = 0.8
params['colsample_bytree'] = 0.6
params['silent'] = True

In [60]:
test_preds = np.zeros(test_joined.shape[0])
xg_train = xgb.DMatrix(X_train, label=y_train, missing = -999)
xg_test = xgb.DMatrix(X_test)

watchlist = [(xg_train, 'train_joined')]
num_rounds = 200

xgclassifier = xgb.train(params, xg_train, num_rounds, watchlist, feval = evalerror,
                         early_stopping_rounds= 20, verbose_eval = 10)
preds = xgclassifier.predict(xg_test, ntree_limit=xgclassifier.best_iteration)

print ('RMSLE Score:', rmsle(y_test, preds))

[0]	train_joined-error:1.33882
Will train until train_joined-error hasn't improved in 20 rounds.
[10]	train_joined-error:0.847869


KeyboardInterrupt: 

In [None]:
fxg_test = xgb.DMatrix(test_joined)
fold_preds = np.around(xgclassifier.predict(fxg_test, ntree_limit=xgclassifier.best_iteration), decimals = 1)
test_preds += fold_preds

In [None]:
submission = pd.DataFrame({'id':ids, 'Demanda_uni_equil': test_preds})

In [None]:
submission.Demanda_uni_equil = submission.Demanda_uni_equil.round()

In [None]:
submission[["id","Demanda_uni_equil"]].to_csv('../submissions/ALL' +
                                              datetime.now().strftime('%Y-%m-%d-%H-%M-%S') +'.csv', index=False)

print ('done')