# Librerías

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
import seaborn as sns

%matplotlib inline

# Predicciones en csv

In [2]:
preds_lightgbm = pd.read_csv('../predictions/preds_lightgbm_pw.csv')
preds_catboost = pd.read_csv('../predictions/preds_catboost_pw.csv')
preds_xgboost = pd.read_csv('../predictions/preds_xgboost_pw.csv')

In [3]:
full_preds = preds_lightgbm.join(preds_catboost.set_index('Unnamed: 0'), on='Unnamed: 0')\
.join(preds_xgboost.set_index('Unnamed: 0'), on='Unnamed: 0')
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm_pw,predicciones_catboost_pw,predicciones_xgboost_pw
0,0,67,42,65
1,1,163,82,98
2,2,48,30,33
3,3,5,6,5
4,4,52,34,41
5,5,169,75,117
6,6,80,63,70
7,7,0,1,0
8,8,8,16,12
9,9,71,57,80


In [5]:
full_preds['media'] = (full_preds['predicciones_lightgbm_pw'] +
                       full_preds['predicciones_catboost_pw'] +
                       full_preds['predicciones_xgboost_pw'])/3
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm_pw,predicciones_catboost_pw,predicciones_xgboost_pw,media
0,0,67,42,65,58.0
1,1,163,82,98,114.333333
2,2,48,30,33,37.0
3,3,5,6,5,5.333333
4,4,52,34,41,42.333333
5,5,169,75,117,120.333333
6,6,80,63,70,71.0
7,7,0,1,0,0.333333
8,8,8,16,12,12.0
9,9,71,57,80,69.333333


In [6]:
y_preds = [int(round(x)) for x in full_preds['media'].values.tolist()]

In [7]:
y_preds[:20]

[58,
 114,
 37,
 5,
 42,
 120,
 71,
 0,
 12,
 69,
 1,
 19,
 8,
 4,
 108,
 174,
 115,
 20,
 19,
 96]

# Apuesta de bloque

In [8]:
product_blocks = pd.read_csv('../data/product_blocks.csv')
product_blocks.head(10)

Unnamed: 0,product_id,block_id
0,612967398,0
1,296892108,0
2,139541214,0
3,963923934,0
4,938230141,0
5,172045154,0
6,663552768,0
7,160621689,1
8,948976891,1
9,556017319,1


In [9]:
productos_por_bloque = product_blocks.groupby('block_id').count()['product_id']
productos_por_bloque.name = 'n_products'
productos_por_bloque.head()

block_id
0     7
1     7
2     7
3     6
4    10
Name: n_products, dtype: int64

In [10]:
product_blocks_n = product_blocks.join(productos_por_bloque, on='block_id', how='left')
product_blocks_n.head()

Unnamed: 0,product_id,block_id,n_products
0,612967398,0,7
1,296892108,0,7
2,139541214,0,7
3,963923934,0,7
4,938230141,0,7


In [11]:
test = pd.read_csv('../data/final_test_pw.csv', usecols=['product_id', 'price'])

In [12]:
test.head()

Unnamed: 0,product_id,price
0,151926,25.95
1,213413,19.95
2,310130,12.95
3,455200,29.95
4,571044,15.95


In [13]:
predicciones = pd.DataFrame({'product_id': test.product_id,
                             'preds': y_preds,
                             'price':test.price,
                             'gain': y_preds * test.price})

predicciones = predicciones.sort_values('gain', ascending=False)
predicciones.head()

Unnamed: 0,product_id,preds,price,gain
4037,449746716,602,59.95,36089.9
1473,164786803,661,49.95,33016.95
329,37119458,1104,22.95,25336.8
8350,939167053,500,49.95,24975.0
692,78723299,415,59.95,24879.25


In [14]:
predicciones.shape, test.shape

((8892, 4), (8892, 2))

In [15]:
predicciones_final = predicciones.join(product_blocks_n.set_index('product_id'),
                                       on='product_id', how='left').reset_index(drop=True)
predicciones_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products
0,449746716,602,59.95,36089.9,1058,2
1,164786803,661,49.95,33016.95,1850,1
2,37119458,1104,22.95,25336.8,812,8
3,939167053,500,49.95,24975.0,315,8
4,78723299,415,59.95,24879.25,1651,7


In [16]:
group_block_gain = predicciones_final.groupby('block_id').sum().sort_values('gain', ascending=False)['gain']
group_block_gain.name = 'gain_per_block'

In [17]:
preds_final = predicciones_final.join(group_block_gain, on='block_id')
preds_final = preds_final.sort_values('gain_per_block', ascending=False)
preds_final = preds_final.reset_index(drop=True)

In [18]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,28691464,408,49.95,20379.6,560,8,37400.3
1,640144857,52,29.95,1557.4,560,8,37400.3
2,759695418,334,29.95,10003.3,560,8,37400.3
3,163664991,83,22.95,1904.85,560,8,37400.3
4,231065399,137,25.95,3555.15,560,8,37400.3


In [19]:
preds_final = preds_final.iloc[preds_final.block_id.drop_duplicates().index.values.tolist()]
preds_final = preds_final.sort_values('gain_per_block', ascending=False)

In [20]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,28691464,408,49.95,20379.6,560,8,37400.3
5,449746716,602,59.95,36089.9,1058,2,36468.95
7,795471256,762,22.95,17487.9,1580,8,34089.5
15,153175370,15,119.0,1785.0,315,8,33143.3
19,164786803,661,49.95,33016.95,1850,1,33016.95


In [21]:
preds_final[preds_final.block_id==560]

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,28691464,408,49.95,20379.6,560,8,37400.3


In [22]:
preds_final['product_cumsum'] = preds_final.n_products.cumsum()
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,28691464,408,49.95,20379.6,560,8,37400.3,8
5,449746716,602,59.95,36089.9,1058,2,36468.95,10
7,795471256,762,22.95,17487.9,1580,8,34089.5,18
15,153175370,15,119.0,1785.0,315,8,33143.3,26
19,164786803,661,49.95,33016.95,1850,1,33016.95,27


In [23]:
bet_blocks = preds_final[preds_final.product_cumsum <= 100]
bet_blocks

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,28691464,408,49.95,20379.6,560,8,37400.3,8
5,449746716,602,59.95,36089.9,1058,2,36468.95,10
7,795471256,762,22.95,17487.9,1580,8,34089.5,18
15,153175370,15,119.0,1785.0,315,8,33143.3,26
19,164786803,661,49.95,33016.95,1850,1,33016.95,27
20,417106995,75,12.95,971.25,1121,10,33003.9,37
26,645173005,309,29.99,9266.91,487,8,32833.86,45
33,238358722,156,15.99,2494.44,1143,10,32315.99,55
41,868698701,3,19.95,59.85,442,10,32240.4,65
47,299370856,143,25.95,3710.85,851,10,29446.8,75


In [26]:
preds_final[['block_id', 'gain_per_block', 'n_products', 'product_cumsum']].to_csv('../bets/week1_bets_pw.csv', index=None)

### Posibles bloques para la apuesta final:

In [25]:
bet_blocks.block_id

0      560
5     1058
7     1580
15     315
19    1850
20    1121
26     487
33    1143
41     442
47     851
54     812
58    1651
61      21
Name: block_id, dtype: int64