# Librerías

In [2]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
import seaborn as sns

%matplotlib inline

# Predicciones en csv

In [3]:
preds_lightgbm = pd.read_csv('../predictions/week2/preds_lightgbm_pw.csv')
preds_catboost = pd.read_csv('../predictions/week2/preds_catboost_pw.csv')
preds_xgboost = pd.read_csv('../predictions/week2/preds_xgboost_pw.csv')

In [4]:
full_preds = preds_lightgbm.join(preds_catboost.set_index('Unnamed: 0'), on='Unnamed: 0')\
.join(preds_xgboost.set_index('Unnamed: 0'), on='Unnamed: 0')
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm_pw,predicciones_catboost_pw,predicciones_xgboost_pw
0,0,74,43,68
1,1,119,60,90
2,2,85,31,38
3,3,7,6,7
4,4,127,59,74
5,5,173,79,132
6,6,131,60,90
7,7,1,1,1
8,8,7,6,10
9,9,97,48,71


In [5]:
full_preds['media'] = (full_preds['predicciones_lightgbm_pw'] +
                       full_preds['predicciones_catboost_pw'] +
                       full_preds['predicciones_xgboost_pw'])/3
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm_pw,predicciones_catboost_pw,predicciones_xgboost_pw,media
0,0,74,43,68,61.666667
1,1,119,60,90,89.666667
2,2,85,31,38,51.333333
3,3,7,6,7,6.666667
4,4,127,59,74,86.666667
5,5,173,79,132,128.0
6,6,131,60,90,93.666667
7,7,1,1,1,1.0
8,8,7,6,10,7.666667
9,9,97,48,71,72.0


In [6]:
y_preds = [int(round(x)) for x in full_preds['media'].values.tolist()]

In [7]:
y_preds[:20]

[62, 90, 51, 7, 87, 128, 94, 1, 8, 72, 2, 20, 4, 1, 77, 154, 121, 38, 11, 170]

# Apuesta de bloque

In [8]:
product_blocks = pd.read_csv('../data/product_blocks.csv')
product_blocks.head(10)

Unnamed: 0,product_id,block_id
0,612967398,0
1,296892108,0
2,139541214,0
3,963923934,0
4,938230141,0
5,172045154,0
6,663552768,0
7,160621689,1
8,948976891,1
9,556017319,1


In [9]:
productos_por_bloque = product_blocks.groupby('block_id').count()['product_id']
productos_por_bloque.name = 'n_products'
productos_por_bloque.head()

block_id
0     7
1     7
2     7
3     6
4    10
Name: n_products, dtype: int64

In [10]:
product_blocks_n = product_blocks.join(productos_por_bloque, on='block_id', how='left')
product_blocks_n.head()

Unnamed: 0,product_id,block_id,n_products
0,612967398,0,7
1,296892108,0,7
2,139541214,0,7
3,963923934,0,7
4,938230141,0,7


In [11]:
test = pd.read_csv('../data/week2/final_test_pw.csv', usecols=['product_id', 'price'])

In [12]:
test.head()

Unnamed: 0,product_id,price
0,151926,25.95
1,213413,19.95
2,310130,12.95
3,455200,29.95
4,571044,15.95


In [13]:
predicciones = pd.DataFrame({'product_id': test.product_id,
                             'preds': y_preds,
                             'price':test.price,
                             'gain': y_preds * test.price})

predicciones = predicciones.sort_values('gain', ascending=False)
predicciones.head()

Unnamed: 0,product_id,preds,price,gain
330,37119458,2126,22.95,48791.7
1498,164786803,549,49.95,27422.55
5211,577297325,866,29.95,25936.7
4101,450385036,1277,19.95,25476.15
3899,429109570,826,29.95,24738.7


In [14]:
predicciones.shape, test.shape

((9007, 4), (9007, 2))

In [15]:
predicciones_final = predicciones.join(product_blocks_n.set_index('product_id'),
                                       on='product_id', how='left').reset_index(drop=True)
predicciones_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products
0,37119458,2126,22.95,48791.7,812,8
1,164786803,549,49.95,27422.55,1850,1
2,577297325,866,29.95,25936.7,1143,10
3,450385036,1277,19.95,25476.15,2233,8
4,429109570,826,29.95,24738.7,1412,5


In [16]:
group_block_gain = predicciones_final.groupby('block_id').sum().sort_values('gain', ascending=False)['gain']
group_block_gain.name = 'gain_per_block'

In [17]:
preds_final = predicciones_final.join(group_block_gain, on='block_id')
preds_final = preds_final.sort_values('gain_per_block', ascending=False)
preds_final = preds_final.reset_index(drop=True)

In [18]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,37119458,2126,22.95,48791.7,812,8,54356.1
1,679568934,142,12.95,1838.9,812,8,54356.1
2,19599326,12,15.95,191.4,812,8,54356.1
3,890876414,118,29.95,3534.1,812,8,54356.1
4,842106369,40,29.95,1198.0,1580,8,47004.75


In [19]:
preds_final = preds_final.iloc[preds_final.block_id.drop_duplicates().index.values.tolist()]
preds_final = preds_final.sort_values('gain_per_block', ascending=False)

In [20]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,37119458,2126,22.95,48791.7,812,8,54356.1
4,842106369,40,29.95,1198.0,1580,8,47004.75
12,699251225,23,45.95,1056.85,1143,10,42662.98
21,106554518,71,17.95,1274.45,388,8,34568.2
26,868698701,3,19.95,59.85,442,10,34298.65


In [21]:
preds_final[preds_final.block_id==560]

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
208,28691464,213,49.95,10639.35,560,8,21784.95


In [22]:
preds_final['product_cumsum'] = preds_final.n_products.cumsum()
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,37119458,2126,22.95,48791.7,812,8,54356.1,8
4,842106369,40,29.95,1198.0,1580,8,47004.75,16
12,699251225,23,45.95,1056.85,1143,10,42662.98,26
21,106554518,71,17.95,1274.45,388,8,34568.2,34
26,868698701,3,19.95,59.85,442,10,34298.65,44


In [23]:
bet_blocks = preds_final[preds_final.product_cumsum <= 100]
bet_blocks

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,37119458,2126,22.95,48791.7,812,8,54356.1,8
4,842106369,40,29.95,1198.0,1580,8,47004.75,16
12,699251225,23,45.95,1056.85,1143,10,42662.98,26
21,106554518,71,17.95,1274.45,388,8,34568.2,34
26,868698701,3,19.95,59.85,442,10,34298.65,44
32,73016424,46,39.95,1837.7,2735,10,32967.25,54
37,848832888,636,12.95,8236.2,487,8,30356.22,62
44,926711197,22,12.95,284.9,2574,10,29723.65,72
52,746385416,361,39.95,14421.95,530,10,29493.15,82
59,387227310,216,39.95,8629.2,1747,10,28955.05,92


In [24]:
preds_final[['block_id', 'gain_per_block', 'n_products', 'product_cumsum']].to_csv('../bets/week2_bets_pw.csv', index=None)

### Posibles bloques para la apuesta final:

In [25]:
bet_blocks.block_id

0      812
4     1580
12    1143
21     388
26     442
32    2735
37     487
44    2574
52     530
59    1747
Name: block_id, dtype: int64