-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
272 lines (209 loc) · 11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import copy
import csv
import datetime
import json
from statistics import stdev
import mysql.connector as connector
from algorithm.pseudorandom import PseudorandomAlgorithm
from algorithm.ucb import UcbAlgorithm
from model.click_entity import Click
from model.partner_entity import Partner
from model.product_day_entity import ProductDay
from model.product_day_mean_entity import ProductDayMean
from model.product_statistics import ProductStatistics
from model.working_data_entity import WorkingData
filename = 'resource/CriteoSearchData'
results_path = 'results/'
db = connector.connect(host='localhost', port='33070', password='q1w2e3r4', user='root', database='ccs')
cursor = db.cursor()
cost_factor = 0.12
profit_factor = 0.22
def db_cleanup():
cursor.execute(Click.drop_click_table_script)
cursor.execute(ProductDay.drop_product_day_table_script)
cursor.execute(Partner.drop_partner_table_script)
cursor.execute(WorkingData.drop_table_script)
cursor.execute(ProductDayMean.drop_product_day_table_script)
def db_setup():
cursor.execute(Click.create_click_table_script)
cursor.execute(ProductDay.create_product_day_table_script)
cursor.execute(Partner.create_partner_table_script)
cursor.execute(WorkingData.create_table_script)
cursor.execute(ProductDayMean.create_product_day_table_script)
def import_data_from_csv():
with open(filename, 'r') as file:
reader = csv.reader(file, delimiter='\t')
i = 0
for row in reader:
entity = []
for e in row:
entity.append(e)
click_date = datetime.datetime.utcfromtimestamp(int(entity[3])).date().isoformat()
entity.append(click_date)
cursor.execute(Click.insert_click_entity_script, tuple(entity))
if i % 10000 == 0:
print('PROCESSED ' + str(i) + ' RECORDS')
db.commit()
i += 1
print('PROCESSED TOTAL ' + str(i) + ' RECORDS')
file.close()
db.commit()
def create_working_table_for_partner(partner_id):
cursor.execute(Click.select_working_data_where_partner_id_script, {'partner_id': partner_id})
working_data_for_partner = cursor.fetchall()
print('CREATING WORKING TABLE FOR PARTNER: ' + str(partner_id))
for entity in working_data_for_partner:
cursor.execute(WorkingData.insert_entity_script, entity)
db.commit()
def calculate_product_day(partner_ids):
for partner_tuple in partner_ids:
partner_id = partner_tuple[0]
print('PROCESSING PARTNER: ' + str(partner_id))
create_working_table_for_partner(partner_id)
cursor.execute(WorkingData.select_distinct_product_id)
products_for_partner = cursor.fetchall()
print('PROCESSING DATA FOR PARTNER: ' + str(partner_id))
total_sales_amount = 0
total_clicks_number = 0
for product_tuple in products_for_partner:
product_id = product_tuple[0]
cursor.execute(WorkingData.select_distinct_click_day_where_product_id_script,
{'product_id': product_id})
days_for_product = cursor.fetchall()
for day_tuple in days_for_product:
day = day_tuple[0]
cursor.execute(WorkingData.select_all_where_click_date_and_product_id_script,
{'click_date': day, 'product_id': product_id})
clicks_for_product = cursor.fetchall()
daily_clicks_number = 0
daily_sales_number = 0
daily_sales_amount = 0
for click in clicks_for_product:
daily_clicks_number += 1
sales_amount = click[WorkingData.SALES_AMOUNT]
if sales_amount != -1:
daily_sales_number += 1
daily_sales_amount += sales_amount
total_sales_amount += daily_sales_amount
total_clicks_number += daily_clicks_number
product_day_entity = [product_id, partner_id, daily_sales_amount, daily_clicks_number,
daily_sales_number, day]
cursor.execute(ProductDay.insert_product_day_entity_script, tuple(product_day_entity))
single_click_cost = total_sales_amount * cost_factor / total_clicks_number
partner_entity = [partner_id, total_sales_amount, total_clicks_number, single_click_cost]
cursor.execute(Partner.insert_partner_entity_script, tuple(partner_entity))
db.commit()
cursor.execute(WorkingData.delete_all_script)
db.commit()
print('FINISHED PROCESSING PARTNER: ' + str(partner_id))
cursor.execute(WorkingData.delete_all_script)
db.commit()
def calculate_product_day_mean(partner_ids):
for partner_tuple in partner_ids:
partner_id = partner_tuple[0]
cursor.execute(Partner.select_all_where_partner_id_script, {'partner_id': partner_id})
partner_data = cursor.fetchall()
per_partner_cost = partner_data[0][Partner.SINGLE_CLICK_COST]
print('CALCULATING MEAN FOR PARTNER: ' + str(partner_id))
cursor.execute(ProductDay.select_distinct_product_id_where_partner_id_script, {'partner_id': partner_id})
products_for_partner = cursor.fetchall()
for product_tuple in products_for_partner:
product_id = product_tuple[0]
cursor.execute(ProductDay.select_all_where_partner_id_and_product_id_script,
{'partner_id': partner_id, 'product_id': product_id})
product_days_for_product = cursor.fetchall()
mean = 0
days_counter = 0
for product_day in product_days_for_product:
days_counter += 1
income = product_day[ProductDay.DAILY_SALES_AMOUNT] * profit_factor
cost = product_day[ProductDay.DAILY_CLICKS_NUMBER] * per_partner_cost
profit = income - cost
if days_counter == 1:
mean = profit
std_dev = 0
else:
mean = (mean + profit) / 2
std_dev = stdev([mean, profit])
mean_entity = [product_day[ProductDay.ID], profit, cost, mean, std_dev]
cursor.execute(ProductDayMean.insert_product_day_entity_script, tuple(mean_entity))
db.commit()
print('FINISHED CALCULATING MEAN FOR PARTNER: ' + str(partner_id))
db.commit()
def calculate_partner_data(partner_ids):
calculate_product_day(partner_ids)
calculate_product_day_mean(partner_ids)
def calculate_daily_profit(products):
daily_profit = 0
for product in products:
product_profit = product[ProductStatistics.DAILY_PROFIT]
daily_profit += product_profit
return daily_profit
def get_entities_attribute(entities_list, attribute_index):
return sorted(list(map(lambda p: p[attribute_index], entities_list)))
def compare_product_statistics(entity1, entity2):
ids = get_entities_attribute([entity1, entity2], ProductStatistics.PRODUCT_ID)
return ids[0] < ids[1]
def products_intersection(products_list_1, products_list_2):
products_list_1_ids = get_entities_attribute(products_list_1, ProductStatistics.PRODUCT_ID)
filtered_products = [product for product in products_list_2
if product[ProductStatistics.PRODUCT_ID] in products_list_1_ids]
return filtered_products
def generate_excluded_products_result(partner_ids, strategy, algorithm, *params):
for partner_tuple in partner_ids:
partner_id = partner_tuple[0]
cursor.execute(ProductDay.select_distinct_day_date_where_partner_id_script, {'partner_id': partner_id})
days = cursor.fetchall()
results = []
excluded_products = []
total_excluded_products_profit = 0
products_seen_so_far = []
for day_tuple in days:
day = day_tuple[0]
day_record = {}
products_to_exclude = excluded_products
cursor.execute(ProductStatistics.select_products_statistics_where_partner_id_and_day_script,
{'date': day, 'partner_id': partner_id})
products_in_day = cursor.fetchall()
products_seen_so_far_ids = get_entities_attribute(products_seen_so_far, ProductStatistics.PRODUCT_ID)
cursor.execute(ProductStatistics.select_products_seen_so_far_where_partner_id_and_day_script,
{'date': day, 'partner_id': partner_id})
products_seen_so_far = cursor.fetchall()
excluded_products = algorithm(products_seen_so_far, *params)
products_actually_excluded = products_intersection(products_to_exclude, products_in_day)
profit_before_exclusion = calculate_daily_profit(products_in_day)
excluded_products_profit = calculate_daily_profit(products_actually_excluded)
profit_after_exclusion = profit_before_exclusion - excluded_products_profit
total_excluded_products_profit -= excluded_products_profit
products_in_day_ids = get_entities_attribute(products_in_day, ProductStatistics.PRODUCT_ID)
products_to_exclude_next_day_ids = get_entities_attribute(excluded_products, ProductStatistics.PRODUCT_ID)
products_to_exclude_ids = get_entities_attribute(products_to_exclude, ProductStatistics.PRODUCT_ID)
products_actually_excluded_ids = \
get_entities_attribute(products_actually_excluded, ProductStatistics.PRODUCT_ID)
day_record['day'] = str(day)
day_record['products_seen_so_far'] = copy.copy(products_seen_so_far_ids)
day_record['products_in_day'] = products_in_day_ids
day_record['products_to_exclude'] = products_to_exclude_ids
day_record['products_to_exclude_next_day'] = products_to_exclude_next_day_ids
day_record['products_actually_excluded'] = products_actually_excluded_ids
day_record['profit_before_exclusion'] = profit_before_exclusion
day_record['profit_after_exclusion'] = profit_after_exclusion
day_record['excluded_products_profit'] = -excluded_products_profit
day_record['total_excluded_products_profit'] = total_excluded_products_profit
results.append(day_record)
result = {'strategy': strategy, 'days': results}
out_file_name = str(partner_id) + '_' + strategy + '.json'
generate_results_file(out_file_name, result)
def generate_results_file(out_file_name, result):
file_path = results_path + out_file_name
with open(file_path, "w") as outfile:
json.dump(result, outfile, indent=4)
if __name__ == '__main__':
partners = [['C0F515F0A2D0A5D9F854008BA76EB537'], ['04A66CE7327C6E21493DA6F3B9AACC75'],
['C306F0AD20C9B20C69271CC79B2E0887']]
db_cleanup()
db_setup()
import_data_from_csv()
calculate_partner_data(partners)
generate_excluded_products_result(partners, 'pseudorandom', PseudorandomAlgorithm.exclude_products)
generate_excluded_products_result(partners, 'ucb_beta_13', UcbAlgorithm.exclude_products, 13)