In [1]:
from faker import Faker
import random
from datetime import datetime, timedelta
import pandas as pd
import math

In [2]:
def basline_premium(dcurrent, daverage, alpha):
    return alpha * (dcurrent/daverage)

def calculate_dcurrent(group):
    return group.rolling('10T', on='timestamp').count()['price']

def demand_modifier(alpha):
    return math.exp(-2 * alpha) 

In [3]:
fake = Faker()

def generate_event_data(num_events, base_orders_per_event):
    events_data = []
    
    for _ in range(num_events):
        event_location = fake.city()
        event_start = fake.date_time_between(start_date='-1y', end_date='now')
        event_alpha = round(random.uniform(0.1, 1.0), 1)  # Alpha between 0.1 and 1.0
        
        # Adjust number of orders based on alpha
        adjusted_orders = int(base_orders_per_event * demand_modifier(event_alpha))
        
        for _ in range(adjusted_orders):
            order_time = event_start + timedelta(minutes=random.randint(0, 240))  # Orders within 4 hours of event start
            price = round(random.uniform(5, 30), 2)  # Price between $5 and $30
            
            events_data.append({
                'location': event_location,
                'timestamp': order_time,
                'price': price,
                'alpha': event_alpha
            })
    
    return events_data

# Generate data for 50 events, with a base of 500-1000 orders (before alpha adjustment)
event_data = generate_event_data(50, random.randint(500, 1000))

In [4]:
event_data = pd.DataFrame(event_data)

# if an event is at the same location and day, assign it the same event_id
event_data['event_id'] = event_data.groupby(['location', event_data['timestamp'].dt.date]) \
    .ngroup()

In [5]:
event_data['timestamp'] = pd.to_datetime(event_data['timestamp'])

event_data = event_data.sort_values(['event_id', 'timestamp'])

event_data['Dcurrent'] = event_data.groupby('event_id').apply(calculate_dcurrent).reset_index(level=0, drop=True)

event_data['Daverage'] = event_data.groupby('location')['Dcurrent'].transform('mean')

event_data['baseline_premium'] = event_data.apply(lambda x: basline_premium(x['Dcurrent'], x['Daverage'], x['alpha']), axis=1)

event_data['price_for_order'] = (1 + event_data['baseline_premium']) * event_data['price']

event_data['absolute_premium'] = event_data['price_for_order'] - event_data['price']

event_data['profit_deliverer'] = event_data['absolute_premium'] * 0.9

event_data['profit_platform'] = event_data['absolute_premium'] * 0.1

In [6]:
# group by event_id and sum the profits for the platform and keep the alpha
event_data_grouped = event_data.groupby('event_id').agg({
    'profit_deliverer': 'sum',
    'profit_platform': 'sum',
    'alpha': 'first'
})


In [7]:
alpha_grouped = event_data_grouped.groupby('alpha').agg({
    'profit_deliverer': 'mean',
    'profit_platform': 'mean'
})

In [8]:
alpha_grouped

Unnamed: 0_level_0,profit_deliverer,profit_platform
alpha,Unnamed: 1_level_1,Unnamed: 2_level_1
0.1,645.902328,71.766925
0.2,1376.001941,152.889105
0.3,1565.95804,173.995338
0.4,1875.73898,208.415442
0.5,1899.15281,211.016979
0.6,1907.219925,211.913325
0.7,1796.947726,199.660858
0.8,1305.584231,145.064915
0.9,1546.504809,171.833868
1.0,1446.700254,160.744473


In [92]:
# replace location with one of the ones used earlier

Unnamed: 0,location,timestamp,price,alpha,event_id,Dcurrent,Daverage,baseline_premium,price_for_order,absolute_premium,profit_deliverer,profit_platform
