In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Settings for simulation
n_rows = 1000
origin_destinations = [('NYC', 'LON'), ('LON', 'PAR'), ('PAR', 'NYC')]
actions = ['request_for_booking', 'add_to_cart', 'view_details']
ancillary_types = ['extra_baggage', 'meal', 'priority_boarding']
flight_times = ['Morning', 'Afternoon', 'Evening', 'Night']

# Function to generate random timestamps
def random_date():
    start = datetime.now()
    end = start + timedelta(days=365)
    return start + (end - start) * np.random.rand()

# Generating the initial dataset
np.random.seed(0)
df = pd.DataFrame({
    'user_id': np.random.randint(1000, 9999, size=n_rows),
    'time_stamp': [random_date() for _ in range(n_rows)],
    'session_id': np.random.randint(10000, 99999, size=n_rows),
    'origin': [np.random.choice([od[0] for od in origin_destinations]) for _ in range(n_rows)],
    'destination': [np.random.choice([od[1] for od in origin_destinations]) for _ in range(n_rows)],
    'flight_date': [random_date().date() for _ in range(n_rows)],
    'flight_time': [np.random.choice(flight_times) for _ in range(n_rows)],
    'action': [np.random.choice(actions) for _ in range(n_rows)],
    'action_type': [np.random.choice(['click', 'view', 'select']) for _ in range(n_rows)],
    'ancillary_type': [np.random.choice(ancillary_types) for _ in range(n_rows)],
})
df['time_stamp'] = df['time_stamp'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Price pattern function
def generate_price(row):
    base_price = {'extra_baggage': 50, 'meal': 30, 'priority_boarding': 40}
    action_modifier = {'request_for_booking': -10, 'add_to_cart': 10, 'view_details': 5}
    time_modifier = {'Morning': 5, 'Afternoon': -5, 'Evening': 5, 'Night': -5}

    price = base_price[row['ancillary_type']] + action_modifier[row['action']] + time_modifier[row['flight_time']]
    return round(np.random.normal(price, 10), 2)  # Adding some random noise

# Adding the offered_price column with pattern
df['offered_price'] = df.apply(generate_price, axis=1)

print(df.head())


   user_id           time_stamp  session_id origin destination flight_date  \
0     3732  2024-05-16 06:02:04       75186    LON         NYC  2024-06-20   
1     4264  2024-05-21 19:44:44       39386    PAR         PAR  2024-10-20   
2     5859  2024-10-12 16:28:19       20024    NYC         LON  2024-05-07   
3     8891  2024-04-26 19:41:14       42808    PAR         LON  2023-12-31   
4     5373  2024-03-07 21:52:11       96110    NYC         NYC  2024-05-25   

  flight_time               action action_type ancillary_type  offered_price  
0   Afternoon  request_for_booking      select           meal          25.69  
1     Evening         view_details        view  extra_baggage          66.14  
2     Evening          add_to_cart      select           meal          43.78  
3       Night  request_for_booking       click  extra_baggage          42.51  
4   Afternoon          add_to_cart       click  extra_baggage          46.67  


In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Settings for simulation
n_rows = 1000
user_types = ['frequent', 'occasional', 'first-time']
origin_destinations = [('NYC', 'LON', 'high'), ('LON', 'PAR', 'medium'), ('PAR', 'NYC', 'low')]
actions = ['search_flight', 'view_details', 'add_to_cart', 'checkout']
ancillary_types = ['extra_baggage', 'meal', 'priority_boarding', 'none']  # Adding 'none' for search_flight
flight_times = ['Morning', 'Afternoon', 'Evening', 'Night']

# Function to generate random timestamps
def random_date(start=None, end=None):
    if start is None:
        start = datetime.now()
    if end is None:
        end = start + timedelta(days=365)
    return start + (end - start) * np.random.rand()

# Generate sessions with multiple actions
def generate_sessions(n):
    sessions = []
    for _ in range(n):
        session_id = np.random.randint(10000, 99999)
        user_type = random.choice(user_types)
        num_actions = random.randint(1, len(actions)) if user_type != 'first-time' else random.randint(1, 2)
        session_actions = random.sample(actions, num_actions)
        session_actions.sort(key=lambda x: actions.index(x))  # Sort actions by predefined order
        start_time = random_date()
        previous_action_time = start_time

        for action in session_actions:
            time_delay = timedelta(minutes=random.randint(5, 60))  # Time delay between actions
            action_time = previous_action_time + time_delay
            session = {
                'session_id': session_id,
                'user_type': user_type,
                'action': action,
                'time_stamp': action_time,
                'ancillary_type': 'none' if action == 'search_flight' else random.choice(ancillary_types)
            }
            sessions.append(session)
            previous_action_time = action_time
    return sessions

# Adjust price function
def adjust_price(row, destination_popularity):
    if row['action'] == 'search_flight':
        return None  # No price for search_flight action

    base_price = {'extra_baggage': 50, 'meal': 30, 'priority_boarding': 40, 'none': 0}
    popularity_factor = {'high': 1.2, 'medium': 1.0, 'low': 0.8}
    season_factor = 1.15 if row['flight_date'].month in [7, 8, 12] else 1.0

    price = base_price[row['ancillary_type']] * popularity_factor[destination_popularity] * season_factor
    return round(price, 2)

# Generate initial data
np.random.seed(0)
session_actions = generate_sessions(n_rows)

# Create the DataFrame
df_actions = pd.DataFrame(session_actions)
df_actions['user_id'] = np.random.randint(1000, 9999, size=df_actions.shape[0])
df_actions['origin'], df_actions['destination'], df_actions['popularity'] = zip(*[random.choice(origin_destinations) for _ in range(df_actions.shape[0])])
df_actions['flight_date'] = [random_date().date() for _ in range(df_actions.shape[0])]
df_actions['flight_time'] = [np.random.choice(flight_times) for _ in range(df_actions.shape[0])]
df_actions['offered_price'] = df_actions.apply(lambda row: adjust_price(row, row['popularity']), axis=1)
df_actions['time_stamp'] = df_actions['time_stamp'].dt.strftime('%Y-%m-%d %H:%M:%S')

print(df_actions.head(10))


   session_id   user_type         action           time_stamp  \
0       78268    frequent   view_details  2024-07-25 03:23:11   
1       78268    frequent       checkout  2024-07-25 04:01:11   
2       55891  first-time       checkout  2024-10-29 21:46:16   
3       51993  first-time  search_flight  2024-08-05 08:19:08   
4       51993  first-time   view_details  2024-08-05 08:25:08   
5       65026  occasional  search_flight  2024-05-29 11:28:53   
6       65026  occasional   view_details  2024-05-29 11:52:53   
7       65026  occasional    add_to_cart  2024-05-29 12:27:53   
8       65026  occasional       checkout  2024-05-29 12:40:53   
9       90966    frequent  search_flight  2024-01-11 10:48:23   

      ancillary_type  user_id origin destination popularity flight_date  \
0  priority_boarding     7222    PAR         NYC        low  2024-03-01   
1               meal     5819    PAR         NYC        low  2024-11-22   
2               none     6402    LON         PAR     medium

In [7]:
df_actions[df_actions['session_id'] == 65026]

Unnamed: 0,session_id,user_type,action,time_stamp,ancillary_type,user_id,origin,destination,popularity,flight_date,flight_time,offered_price
5,65026,occasional,search_flight,2024-05-29 11:28:53,none,2588,NYC,LON,high,2024-07-20,Night,
6,65026,occasional,view_details,2024-05-29 11:52:53,meal,6009,PAR,NYC,low,2024-11-12,Evening,24.0
7,65026,occasional,add_to_cart,2024-05-29 12:27:53,meal,7538,PAR,NYC,low,2024-08-04,Evening,27.6
8,65026,occasional,checkout,2024-05-29 12:40:53,meal,9185,LON,PAR,medium,2024-05-28,Night,30.0


In [12]:
df_actions[df_actions['user_id'] == 8891]

Unnamed: 0,user_id,session_id,user_type,action,time_stamp,origin,destination,popularity,ancillary_type,flight_date,flight_time,offered_price
3,8891,90471,occasional,search_flight,2024-08-13 14:02:55,NYC,LON,low,wifi,2024-11-29,Night,16.0
4,8891,90471,occasional,view_details,2024-08-13 14:16:55,NYC,LON,low,extra_baggage,2024-09-18,Evening,40.0
5,8891,90966,occasional,add_to_cart,2024-08-13 14:27:55,NYC,LON,low,lounge_access,2024-06-25,Evening,56.0
6,8891,90966,occasional,checkout,2024-08-13 14:30:55,NYC,LON,low,extra_baggage,2024-02-10,Morning,40.0


In [11]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Settings for simulation
n_rows = 1000
user_types = ['frequent', 'occasional', 'first-time']
origin_destinations = [('NYC', 'LON', 'high'), ('LON', 'PAR', 'medium'), ('PAR', 'NYC', 'low')]
actions = ['search_flight', 'view_details', 'add_to_cart', 'checkout']
ancillary_types = ['extra_baggage', 'meal', 'priority_boarding', 'lounge_access', 'wifi', 'seat_upgrade', 'travel_insurance', 'fast_track_security']
flight_times = ['Morning', 'Afternoon', 'Evening', 'Night']
session_length = timedelta(minutes=30)  # Session window

# Function to generate random timestamps
def random_date(start=None, end=None):
    if start is None:
        start = datetime.now()
    if end is None:
        end = start + timedelta(days=365)
    return start + (end - start) * np.random.rand()

# Suggest ancillary based on route
def suggest_ancillary(route):
    route_specific_ancillaries = {
        'NYC-LON': ['extra_baggage', 'lounge_access', 'seat_upgrade'],
        'LON-PAR': ['meal', 'wifi', 'fast_track_security'],
        'PAR-NYC': ['priority_boarding', 'travel_insurance', 'wifi']
    }
    origin, destination = route
    route_key = f"{origin}-{destination}"
    return random.choice(route_specific_ancillaries.get(route_key, ancillary_types))

# Generate user actions with session logic
def generate_user_actions(n):
    actions_data = []
    for _ in range(n):
        user_id = np.random.randint(1000, 9999)
        user_type = random.choice(user_types)
        num_actions = random.randint(1, len(actions)) if user_type != 'first-time' else random.randint(1, 2)
        user_actions = random.sample(actions, num_actions)
        user_actions.sort(key=lambda x: actions.index(x))
        start_time = random_date()
        session_end_time = start_time + session_length
        session_id = np.random.randint(10000, 99999)
        route = random.choice(origin_destinations)[:2]  # Get origin and destination
        popularity = random.choice(['high', 'medium', 'low'])

        for action in user_actions:
            time_delay = timedelta(minutes=random.randint(1, 15))
            action_time = start_time + time_delay
            if action_time > session_end_time:
                session_id = np.random.randint(10000, 99999)  # New session
                session_end_time = action_time + session_length

            ancillary = suggest_ancillary(route) if action != 'search_flight' else random.choice(ancillary_types)
            action_data = {
                'user_id': user_id,
                'session_id': session_id,
                'user_type': user_type,
                'action': action,
                'time_stamp': action_time,
                'origin': route[0],
                'destination': route[1],
                'popularity': popularity,
                'ancillary_type': ancillary
            }
            actions_data.append(action_data)
            start_time = action_time
    return actions_data

# Price adjustment function
def adjust_price(row):
    base_price_mapping = {
        'extra_baggage': 50, 'meal': 30, 'priority_boarding': 40, 'lounge_access': 70, 
        'wifi': 20, 'seat_upgrade': 100, 'travel_insurance': 25, 'fast_track_security': 15
    }
    popularity_factor = {'high': 1.2, 'medium': 1.0, 'low': 0.8}
    season_factor = 1.15 if row['flight_date'].month in [7, 8, 12] else 1.0

    base_price = base_price_mapping[row['ancillary_type']]
    price = base_price * popularity_factor[row['popularity']] * season_factor
    return round(price, 2)

# Generate initial data
np.random.seed(0)
user_actions = generate_user_actions(n_rows)

# Create the DataFrame
df_actions = pd.DataFrame(user_actions)
df_actions['flight_date'] = [random_date().date() for _ in range(df_actions.shape[0])]
df_actions['flight_time'] = [np.random.choice(flight_times) for _ in range(df_actions.shape[0])]
df_actions['offered_price'] = df_actions.apply(adjust_price, axis=1)
df_actions['time_stamp'] = df_actions['time_stamp'].dt.strftime('%Y-%m-%d %H:%M:%S')

print(df_actions.head(10))


   user_id  session_id   user_type         action           time_stamp origin  \
0     3732       55891  first-time  search_flight  2024-07-25 05:16:06    PAR   
1     3732       55891  first-time   view_details  2024-07-25 05:26:06    PAR   
2     5859       51993    frequent  search_flight  2024-07-07 17:15:34    NYC   
3     8891       90471  occasional  search_flight  2024-08-13 14:02:55    NYC   
4     8891       90471  occasional   view_details  2024-08-13 14:16:55    NYC   
5     8891       90966  occasional    add_to_cart  2024-08-13 14:27:55    NYC   
6     8891       90966  occasional       checkout  2024-08-13 14:30:55    NYC   
7     7744       27089  occasional   view_details  2024-03-30 08:35:07    LON   
8     7744       27089  occasional    add_to_cart  2024-03-30 08:47:07    LON   
9     7744       42230  occasional       checkout  2024-03-30 09:01:07    LON   

  destination popularity       ancillary_type flight_date flight_time  \
0         NYC     medium        ext

In [20]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
pd.set_option('display.max_rows', 500) 

# Fixed user and flight information
user_id = 1234
origin = 'NYC'
destination = 'LON'
flight_date = datetime.now() + timedelta(days=10)
flight_time = 'Morning'
flight_date_str = flight_date.strftime('%Y-%m-%d')

# Possible actions and ancillaries
actions = ['search_flight', 'view_details', 'add_to_cart', 'checkout']
ancillary_types = ['extra_baggage', 'meal', 'priority_boarding', 'lounge_access', 'wifi', 'seat_upgrade', 'travel_insurance', 'fast_track_security']

# Function to simulate actions within a session
def simulate_session_actions(session_id, base_prices):
    session_actions = []
    start_time = datetime.now()
    for ancillary in ancillary_types:
        for action in actions:
            time_delay = timedelta(minutes=random.randint(1, 10))
            action_time = start_time + time_delay
            action_data = {
                'user_id': user_id,
                'session_id': session_id,
                'action': action,
                'time_stamp': action_time.strftime('%Y-%m-%d %H:%M:%S'),
                'origin': origin,
                'destination': destination,
                'flight_date': flight_date_str,
                'flight_time': flight_time,
                'ancillary_type': ancillary,
                'offered_price': base_prices[ancillary]
            }
            session_actions.append(action_data)
            start_time = action_time
    return session_actions

# Generate sessions
np.random.seed(0)
base_prices_session1 = {ancillary: np.random.choice(range(20, 100)) for ancillary in ancillary_types}
base_prices_session2 = {ancillary: price + np.random.choice([-1, 0, 1]) for ancillary, price in base_prices_session1.items()}
base_prices_session3 = {ancillary: price + np.random.choice([-1, 0, 1]) for ancillary, price in base_prices_session2.items()}

session_1 = simulate_session_actions(10001, base_prices_session1)
session_2 = simulate_session_actions(10002, base_prices_session2)
session_3 = simulate_session_actions(10003, base_prices_session3)

# Combine sessions into a DataFrame
all_sessions = session_1 + session_2 + session_3
df_actions = pd.DataFrame(all_sessions)

print(df_actions.head(10))


   user_id  session_id         action           time_stamp origin destination  \
0     1234       10001  search_flight  2023-12-21 20:39:10    NYC         LON   
1     1234       10001   view_details  2023-12-21 20:40:10    NYC         LON   
2     1234       10001    add_to_cart  2023-12-21 20:41:10    NYC         LON   
3     1234       10001       checkout  2023-12-21 20:49:10    NYC         LON   
4     1234       10001  search_flight  2023-12-21 20:54:10    NYC         LON   
5     1234       10001   view_details  2023-12-21 21:03:10    NYC         LON   
6     1234       10001    add_to_cart  2023-12-21 21:05:10    NYC         LON   
7     1234       10001       checkout  2023-12-21 21:10:10    NYC         LON   
8     1234       10001  search_flight  2023-12-21 21:13:10    NYC         LON   
9     1234       10001   view_details  2023-12-21 21:22:10    NYC         LON   

  flight_date flight_time     ancillary_type  offered_price  
0  2023-12-31     Morning      extra_baggage  

In [21]:
df_actions

Unnamed: 0,user_id,session_id,action,time_stamp,origin,destination,flight_date,flight_time,ancillary_type,offered_price
0,1234,10001,search_flight,2023-12-21 20:39:10,NYC,LON,2023-12-31,Morning,extra_baggage,64
1,1234,10001,view_details,2023-12-21 20:40:10,NYC,LON,2023-12-31,Morning,extra_baggage,64
2,1234,10001,add_to_cart,2023-12-21 20:41:10,NYC,LON,2023-12-31,Morning,extra_baggage,64
3,1234,10001,checkout,2023-12-21 20:49:10,NYC,LON,2023-12-31,Morning,extra_baggage,64
4,1234,10001,search_flight,2023-12-21 20:54:10,NYC,LON,2023-12-31,Morning,meal,67
5,1234,10001,view_details,2023-12-21 21:03:10,NYC,LON,2023-12-31,Morning,meal,67
6,1234,10001,add_to_cart,2023-12-21 21:05:10,NYC,LON,2023-12-31,Morning,meal,67
7,1234,10001,checkout,2023-12-21 21:10:10,NYC,LON,2023-12-31,Morning,meal,67
8,1234,10001,search_flight,2023-12-21 21:13:10,NYC,LON,2023-12-31,Morning,priority_boarding,84
9,1234,10001,view_details,2023-12-21 21:22:10,NYC,LON,2023-12-31,Morning,priority_boarding,84


In [26]:
import requests
from datetime import datetime, timedelta

# Replace 'your_api_key' with your actual Eventbrite API key
api_key = 'XNWYQGI4XK7DLPCCBL65'

# Set the date range for the next two weeks
start_date = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
end_date = (datetime.now() + timedelta(weeks=2)).strftime('%Y-%m-%dT%H:%M:%SZ')

# Set the location to Auckland, New Zealand
location = "Auckland, New Zealand"

# Eventbrite API endpoint for searching events
url = f'https://www.eventbriteapi.com/v3/events/search/?q=&location.address={location}&start_date.range_start={start_date}&start_date.range_end={end_date}&token={api_key}'

# Make the API request
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    events = response.json()['events']
    for event in events:
        # Extract and print event details
        name = event['name']['text']
        start = event['start']['local']
        print(f"Event: {name}, Start Time: {start}")
else:
    print("Failed to retrieve events")



Failed to retrieve events


In [25]:
requests.get(url)

<Response [404]>

In [27]:
url


'https://www.eventbriteapi.com/v3/events/search/?q=&location.address=Auckland, New Zealand&start_date.range_start=2023-12-25T22:59:08Z&start_date.range_end=2024-01-08T22:59:08Z&token=XNWYQGI4XK7DLPCCBL65'

In [1]:
import pandas as pd
from datetime import datetime, timedelta

# Current date
today = datetime.today()

# Define the departure date as 80 days from today
departure_date = today + timedelta(days=80)

# Create a list of city pairs for departure and arrival using IATA codes
cities = [('JFK', 'LHR'),  # New York to London
          ('LAX', 'HND'),  # Los Angeles to Tokyo
          ('ORD', 'CDG'),  # Chicago to Paris
          ('SFO', 'TXL'),  # San Francisco to Berlin
          ('MIA', 'MAD')]  # Miami to Madrid

# Define inventory by flight class
flight_inventory = {
    'Economy Basic': 150,
    'Economy Standard': 100,
    'Economy Flex': 50,
    'Premium Economy': 40,
    'Business Standard': 30,
    'Business Flex': 20,
    'First Class': 10
}

# Define carrier types and associated typical aircraft
carrier_types = ['Narrow Body', 'Wide Body', 'Regional Jet']

# Mock data for each flight
flights_data = []

# Generate mock data
for i, (departure_code, arrival_code) in enumerate(cities):
    for carrier in carrier_types:
        for flight_class, seats in flight_inventory.items():
            flight_data = {
                'FlightNumber': f'AB{1000 + i}',
                'DepartureAirport': departure_code,
                'ArrivalAirport': arrival_code,
                'DepartureDate': departure_date.strftime("%Y-%m-%d"),
                'CarrierType': carrier,
                'Class': flight_class,
                'SeatsAvailable': seats
            }
            flights_data.append(flight_data)

# Create DataFrame
flights_df = pd.DataFrame(flights_data)

# Display the DataFrame
flights_df.head()  # Displaying just the first few rows for brevity


Unnamed: 0,FlightNumber,DepartureAirport,ArrivalAirport,DepartureDate,CarrierType,Class,SeatsAvailable
0,AB1000,JFK,LHR,2024-05-14,Narrow Body,Economy Basic,150
1,AB1000,JFK,LHR,2024-05-14,Narrow Body,Economy Standard,100
2,AB1000,JFK,LHR,2024-05-14,Narrow Body,Economy Flex,50
3,AB1000,JFK,LHR,2024-05-14,Narrow Body,Premium Economy,40
4,AB1000,JFK,LHR,2024-05-14,Narrow Body,Business Standard,30


In [3]:
display(flights_df)


Unnamed: 0,FlightNumber,DepartureAirport,ArrivalAirport,DepartureDate,CarrierType,Class,SeatsAvailable
0,AB1000,JFK,LHR,2024-05-14,Narrow Body,Economy Basic,150
1,AB1000,JFK,LHR,2024-05-14,Narrow Body,Economy Standard,100
2,AB1000,JFK,LHR,2024-05-14,Narrow Body,Economy Flex,50
3,AB1000,JFK,LHR,2024-05-14,Narrow Body,Premium Economy,40
4,AB1000,JFK,LHR,2024-05-14,Narrow Body,Business Standard,30
...,...,...,...,...,...,...,...
100,AB1004,MIA,MAD,2024-05-14,Regional Jet,Economy Flex,50
101,AB1004,MIA,MAD,2024-05-14,Regional Jet,Premium Economy,40
102,AB1004,MIA,MAD,2024-05-14,Regional Jet,Business Standard,30
103,AB1004,MIA,MAD,2024-05-14,Regional Jet,Business Flex,20
