# Librerías

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
import seaborn as sns

%matplotlib inline

# Predicciones en csv

In [2]:
preds_lightgbm = pd.read_csv('../predictions/preds_lightgbm.csv')
preds_catboost = pd.read_csv('../predictions/preds_catboost.csv')
preds_xgboost = pd.read_csv('../predictions/preds_xgboost.csv')

In [3]:
full_preds = preds_lightgbm.join(preds_catboost.set_index('Unnamed: 0'), on='Unnamed: 0')\
.join(preds_xgboost.set_index('Unnamed: 0'), on='Unnamed: 0')
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm,predicciones_catboost,predicciones_xgboost
0,0,30,28,22
1,1,71,74,60
2,2,2,2,2
3,3,12,17,7
4,4,41,53,41
5,5,64,65,56
6,6,38,45,31
7,7,0,0,0
8,8,1,1,1
9,9,31,29,23


In [4]:
full_preds['media'] = (full_preds['predicciones_lightgbm'] +
                       full_preds['predicciones_catboost'] +
                       full_preds['predicciones_xgboost'])/3
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm,predicciones_catboost,predicciones_xgboost,media
0,0,30,28,22,26.666667
1,1,71,74,60,68.333333
2,2,2,2,2,2.0
3,3,12,17,7,12.0
4,4,41,53,41,45.0
5,5,64,65,56,61.666667
6,6,38,45,31,38.0
7,7,0,0,0,0.0
8,8,1,1,1,1.0
9,9,31,29,23,27.666667


In [5]:
y_preds = [int(round(x)) for x in full_preds['media'].values.tolist()]

In [6]:
y_preds[:20]

[27, 68, 2, 12, 45, 62, 38, 0, 1, 28, 8, 43, 0, 0, 47, 118, 62, 32, 28, 80]

# Apuesta de bloque

In [7]:
product_blocks = pd.read_csv('../data/product_blocks.csv')
product_blocks.head(10)

Unnamed: 0,product_id,block_id
0,612967398,0
1,296892108,0
2,139541214,0
3,963923934,0
4,938230141,0
5,172045154,0
6,663552768,0
7,160621689,1
8,948976891,1
9,556017319,1


In [8]:
productos_por_bloque = product_blocks.groupby('block_id').count()['product_id']
productos_por_bloque.name = 'n_products'
productos_por_bloque.head()

block_id
0     7
1     7
2     7
3     6
4    10
Name: n_products, dtype: int64

In [9]:
product_blocks_n = product_blocks.join(productos_por_bloque, on='block_id', how='left')
product_blocks_n.head()

Unnamed: 0,product_id,block_id,n_products
0,612967398,0,7
1,296892108,0,7
2,139541214,0,7
3,963923934,0,7
4,938230141,0,7


In [10]:
test = pd.read_csv('../data/final_test.csv', usecols=['product_id', 'price'])

In [11]:
test.head()

Unnamed: 0,product_id,price
0,151926,25.95
1,213413,19.95
2,310130,12.95
3,455200,29.95
4,571044,15.95


In [12]:
predicciones = pd.DataFrame({'product_id': test.product_id,
                             'preds': y_preds,
                             'price':test.price,
                             'gain': y_preds * test.price})

predicciones = predicciones.sort_values('gain', ascending=False)
predicciones.head()

Unnamed: 0,product_id,preds,price,gain
5480,617359148,170,69.95,11891.5
2481,276600836,204,55.95,11413.8
5174,579379740,158,69.95,11052.1
5369,604894198,165,59.95,9891.75
329,37119458,427,22.95,9799.65


In [13]:
predicciones.shape, test.shape

((62244, 4), (62244, 2))

In [14]:
predicciones_final = predicciones.join(product_blocks_n.set_index('product_id'),
                                       on='product_id', how='left').reset_index(drop=True)
predicciones_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products
0,617359148,170,69.95,11891.5,2442,4
1,276600836,204,55.95,11413.8,447,7
2,579379740,158,69.95,11052.1,1409,7
3,604894198,165,59.95,9891.75,626,9
4,37119458,427,22.95,9799.65,812,8


In [15]:
group_block_gain = predicciones_final.groupby('block_id').sum().sort_values('gain', ascending=False)['gain']
group_block_gain.name = 'gain_per_block'

In [16]:
preds_final = predicciones_final.join(group_block_gain, on='block_id')
preds_final = preds_final.sort_values('gain_per_block', ascending=False)
preds_final = preds_final.reset_index(drop=True)

In [17]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,640651551,0,39.95,0.0,21,10,22144.95
1,38527509,1,3.95,3.95,21,10,22144.95
2,38527509,1,3.95,3.95,21,10,22144.95
3,640651551,0,39.95,0.0,21,10,22144.95
4,229995577,0,25.99,0.0,21,10,22144.95


In [18]:
preds_final = preds_final.iloc[preds_final.block_id.drop_duplicates().index.values.tolist()]
preds_final = preds_final.sort_values('gain_per_block', ascending=False)

In [19]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,640651551,0,39.95,0.0,21,10,22144.95
42,830984307,1,39.95,39.95,1409,7,16911.05
70,191922272,5,69.95,349.75,1121,10,16278.85
112,746385416,110,39.95,4394.5,530,10,15860.65
161,645173005,0,29.99,0.0,487,8,15496.33


In [20]:
preds_final[preds_final.block_id==530]

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
112,746385416,110,39.95,4394.5,530,10,15860.65


In [21]:
preds_final['product_cumsum'] = preds_final.n_products.cumsum()
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,640651551,0,39.95,0.0,21,10,22144.95,10
42,830984307,1,39.95,39.95,1409,7,16911.05,17
70,191922272,5,69.95,349.75,1121,10,16278.85,27
112,746385416,110,39.95,4394.5,530,10,15860.65,37
161,645173005,0,29.99,0.0,487,8,15496.33,45


In [22]:
bet_blocks = preds_final[preds_final.product_cumsum <= 100]
bet_blocks

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,640651551,0,39.95,0.0,21,10,22144.95,10
42,830984307,1,39.95,39.95,1409,7,16911.05,17
70,191922272,5,69.95,349.75,1121,10,16278.85,27
112,746385416,110,39.95,4394.5,530,10,15860.65,37
161,645173005,0,29.99,0.0,487,8,15496.33,45
210,246593220,20,29.95,599.0,1446,10,15325.05,55
266,702526934,1,39.95,39.95,1677,9,14686.1,64
315,762339185,0,49.95,0.0,2671,8,14474.15,72
364,868698701,0,19.95,0.0,442,10,14456.05,82
406,941971229,0,29.95,0.0,1143,10,14365.27,92


In [23]:
preds_final[['block_id', 'gain_per_block', 'n_products', 'product_cumsum']].to_csv('../bets/week1_bets.csv', index=None)

### Posibles bloques para la apuesta final:

In [70]:
preds_final.block_id

0        21
42      530
91     1121
133    2671
182     442
224     418
273    1403
308    1677
357    1409
385    1446
Name: block_id, dtype: int64