In [8]:
import numpy as np


seed_value = 711
n = 320
rng = np.random.default_rng(seed_value)


routes = ["NYC-LAX", "LHR-JFK", "SFO-SEA", "DXB-SIN", "MAD-ROM"]
days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
classes =  ["economy", "premium", "business"]

route_adjs = [140, 220, 60, 180, 80]
class_adjs = [0, 80, 220]

tickets = []

for i in range(1, n+1):
    route_index = (i + seed_value) % 5
    day_index = (i + seed_value) % 7
    class_index = (i * 2 + seed_value) % 3

    route = routes[route_index]
    day = days[day_index]
    days_to_departure = 1 + ((i * 3 + seed_value) % 60)    
    ticket_class = classes[class_index]

    base = 120 + (days_to_departure * -1.5)
    route_adj = route_adjs[route_index]
    class_adj = class_adjs[class_index]
    noise = rng.normal(0, 25)
    price_usd = round(base + route_adj + class_adj + noise, 2)

    if i % 28 == 0:
        price_usd = ""

    if i % 45 == 0:
        price_usd *= -1
    
    if i % 37 == 0:
        ticket_class = ticket_class.upper()

    tickets.append(
        {
            'Ticket ID': f"T{seed_value}-{i:04d}",
            'Route': route,
            'Day': day,
            'Days to departure': days_to_departure,
            'Class': ticket_class,
            'Price in USD': price_usd
        }
    )

print(f"Total record count is: {len(tickets)}")
print(f'First five records are:')
for ticket in tickets[:5]:
    print(ticket)
 

Total record count is: 320
First five records are:
{'Ticket ID': 'T711-0001', 'Route': 'SFO-SEA', 'Day': 'Sat', 'Days to departure': 55, 'Class': 'business', 'Price in USD': 310.33}
{'Ticket ID': 'T711-0002', 'Route': 'DXB-SIN', 'Day': 'Sun', 'Days to departure': 58, 'Class': 'premium', 'Price in USD': 275.23}
{'Ticket ID': 'T711-0003', 'Route': 'MAD-ROM', 'Day': 'Mon', 'Days to departure': 1, 'Class': 'economy', 'Price in USD': 214.51}
{'Ticket ID': 'T711-0004', 'Route': 'NYC-LAX', 'Day': 'Tue', 'Days to departure': 4, 'Class': 'business', 'Price in USD': 465.46}
{'Ticket ID': 'T711-0005', 'Route': 'LHR-JFK', 'Day': 'Wed', 'Days to departure': 7, 'Class': 'premium', 'Price in USD': 428.48}


TASK 2

In [9]:
cleaned_tickets = []

for ticket in tickets:
    price = ticket['Price in USD']
    if isinstance(price, (int, float)) and price >= 0:
        cleaned_ticket = ticket.copy()
        cleaned_ticket['Class'] = cleaned_ticket['Class'].lower()
        cleaned_tickets.append(cleaned_ticket)


print(f'Cleaned ticket count: {len(cleaned_tickets)}')
print(f'First two records of cleaned tickets:')
for ticket in cleaned_tickets[:2]:
    print(ticket)

Cleaned ticket count: 302
First two records of cleaned tickets:
{'Ticket ID': 'T711-0001', 'Route': 'SFO-SEA', 'Day': 'Sat', 'Days to departure': 55, 'Class': 'business', 'Price in USD': 310.33}
{'Ticket ID': 'T711-0002', 'Route': 'DXB-SIN', 'Day': 'Sun', 'Days to departure': 58, 'Class': 'premium', 'Price in USD': 275.23}


TASK 3

In [10]:
prices_arr = np.array([t['Price in USD'] for t in cleaned_tickets])
days_arr = np.array([t['Day'] for t in cleaned_tickets])

price_mean = np.mean(prices_arr)
std_price = np.std(prices_arr)

daily_totals = {d: round(np.sum(prices_arr[days_arr == d]), 2) for d in days}
daily_counts = {d: int(np.sum(days_arr == d)) for d in days}

total_revenue = round(np.sum(prices_arr), 2)
daily_revenue_sum = round(sum(daily_totals.values()), 2)
revenue_is_valid = np.isclose(total_revenue, daily_revenue_sum)



TASK 4

In [11]:
threshold = np.percentile(prices_arr, 90)
high_price_mask = prices_arr > threshold 

high_price_count = int(np.sum(high_price_mask))

verify = np.all(prices_arr[high_price_mask] >= threshold)


TASK 5

In [15]:
print('=' * 20)

# total_tickets
# cleaned_tickets
# mean_price
# std_price
# daily_totals
# high_price_count


report = {
    'Total tickets': len(tickets),
    'Cleaned tickets': len(cleaned_tickets),
    'Mean price': round(float(price_mean), 2),
    'Price STD': round(float(std_price), 2),
    'High price count': high_price_count,
    'Daily totals': daily_totals
    
}

print(f'Final report'.center(20))



for key, value in report.items():
    if key == "Daily totals":
        print(f"{key}:")
        for day, total in value.items():
            print(f"--->{day}: ${total:,.2f}")
    else:
        print(f"{key}: {value}")


    Final report    
Total tickets: 320
Cleaned tickets: 302
Mean price: 311.55
Price STD: 116.13
High price count: 31
Daily totals:
--->Mon: $14,014.73
--->Tue: $13,935.99
--->Wed: $14,225.10
--->Thu: $13,749.12
--->Fri: $10,026.79
--->Sat: $14,020.19
--->Sun: $14,114.71
