In [1]:
from optimizer import Optimizer
from timeit import timeit
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings('ignore')

# Performance Profiling

In [2]:
# These all need to available as global constants
covs=[1.793184, 0.704047, 5.865989]
iterations=1000

In [3]:
def profile_run():
    total_time = timeit("opt.get_demand_at_prices(covs)", number=iterations, globals=globals())
    print(f"get_demand_at_prices() avg runtime: {total_time / iterations:.6f} seconds")
    
    total_time = timeit("opt.get_revenue_maximizing_prices_and_revenue(demand)", number=iterations, globals=globals())
    print(f"get_revenue_maximizing_prices_and_revenue() avg runtime: {total_time / iterations:.6f} seconds")
    
    total_time = timeit("opt.get_revenue_maximizing_prices_and_revenue_from_cov(covs)", number=iterations, globals=globals())
    print(f"get_revenue_maximizing_prices_and_revenue_from_cov() avg runtime: {total_time / iterations:.6f} seconds")

## XGBoost

In [4]:
opt = Optimizer(model_file='xgbclassifier_v1.pkl', bought_range_p0=100, bought_range_p1=100)
demand = opt.get_demand_at_prices(covs)
profile_run()

get_demand_at_prices() avg runtime: 0.089825 seconds
get_revenue_maximizing_prices_and_revenue() avg runtime: 0.010742 seconds
get_revenue_maximizing_prices_and_revenue_from_cov() avg runtime: 0.097210 seconds


## Logistic Regression

In [5]:
opt = Optimizer(model_file='logit_v1.pkl', bought_range_p0=100, bought_range_p1=100)
demand = opt.get_demand_at_prices(covs)
profile_run()

get_demand_at_prices() avg runtime: 0.000781 seconds
get_revenue_maximizing_prices_and_revenue() avg runtime: 0.010003 seconds
get_revenue_maximizing_prices_and_revenue_from_cov() avg runtime: 0.010690 seconds


## GradientBoostingClassifier

In [6]:
opt = Optimizer(model_file='gradboost_v1.pkl', bought_range_p0=100, bought_range_p1=100)
demand = opt.get_demand_at_prices(covs)
profile_run()

get_demand_at_prices() avg runtime: 0.025221 seconds
get_revenue_maximizing_prices_and_revenue() avg runtime: 0.010278 seconds
get_revenue_maximizing_prices_and_revenue_from_cov() avg runtime: 0.035074 seconds


# Static Submission

You also need to submit static prices for the test set of agents – a CSV with your calculated optimal prices for each test customer. For a set of test customers in the file “test_user_info.csv”, you will calculate prices for each item for each customer so as to maximize expected revenue if you were a monopolist (i.e., as in HW3)

The CSV should have four columns:
- (1) `user_index`
- (2) `price_item_0`
- (3) `price_item_1`
- (4) `expected_revenue`

The file should be named `part2_static_prices_submission.csv` and should be in the `agents/` folder (same folder as `yourteamname.py`)

In [2]:
test_user_info = pd.read_csv('../../data/test_user_info.csv')
test_user_info.shape

(30000, 4)

In [3]:
opt = Optimizer(model_file='xgbclassifier_v1.pkl', bought_range_p0=45, bought_range_p1=60)

In [4]:
results = test_user_info.progress_apply(lambda row: opt.get_revenue_maximizing_prices_and_revenue_from_cov(row[['Covariate1', 'Covariate2', 'Covariate3']]), axis=1)

  0%|          | 0/30000 [00:00<?, ?it/s]

In [5]:
test_submission = test_user_info[['user_index']].copy()
test_submission['price_item_0'] = results.progress_apply(lambda x: x[0][0])
test_submission['price_item_1'] = results.progress_apply(lambda x: x[0][1])
test_submission['expected_revenue'] = results.progress_apply(lambda x: x[1])

  0%|          | 0/30000 [00:00<?, ?it/s]

  0%|          | 0/30000 [00:00<?, ?it/s]

  0%|          | 0/30000 [00:00<?, ?it/s]

In [6]:
test_submission

Unnamed: 0,user_index,price_item_0,price_item_1,expected_revenue
0,30000,34.778864,42.982203,33.618279
1,30001,81.819091,72.384237,71.360144
2,30002,75.683409,67.860847,65.781965
3,30003,45.005000,45.243898,45.007438
4,30004,8.190909,15.841864,14.317521
...,...,...,...,...
29995,59995,47.050227,54.290678,53.553635
29996,59996,14.326591,31.673729,27.465596
29997,59997,38.869318,42.982203,39.878610
29998,59998,38.869318,40.720508,39.328790


In [7]:
# EDA
test_submission.describe()

Unnamed: 0,user_index,price_item_0,price_item_1,expected_revenue
count,30000.0,30000.0,30000.0,30000.0
mean,44999.5,52.188452,58.399801,50.231847
std,8660.398374,21.562744,24.173582,21.392808
min,30000.0,6.145682,9.05678,0.305236
25%,37499.75,38.869318,42.982203,37.170422
50%,44999.5,45.005,54.290678,46.36813
75%,52499.25,75.683409,70.122542,59.55469
max,59999.0,90.0,133.45,132.908873


In [8]:
test_submission.isna().sum().sum()

0

In [10]:
test_submission.to_csv("../part2_static_prices_submission.csv", index=False)