In [2]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

fake = Faker()
Faker.seed(42)

# Configuration
NUM_FLIGHTS = 1000
AIRCRAFT_TYPES = {
    'A380': {'min_staff': 20, 'max_staff': 25, 'weight': 0.4},
    'B777': {'min_staff': 12, 'max_staff': 16, 'weight': 0.5},
    'A320': {'min_staff': 6, 'max_staff': 8, 'weight': 0.1}
}

# 1. Generate Flight Schedules
flights = []
start_date = datetime(2025, 1, 1)

for i in range(NUM_FLIGHTS):
    ac_type = random.choices(list(AIRCRAFT_TYPES.keys()), weights=[0.4, 0.5, 0.1])[0]
    scheduled_arrival = start_date + timedelta(minutes=random.randint(0, 10080)) # Over 1 week
    
    flights.append({
        'flight_id': f'EK{random.randint(100, 999)}',
        'aircraft_type': ac_type,
        'scheduled_arrival': scheduled_arrival,
        'passengers': random.randint(150, 500) if ac_type != 'A320' else random.randint(50, 180),
        'gate': random.randint(101, 250)
    })

df_flights = pd.DataFrame(flights)

# 2. Generate Historical Operations (The "Actuals")
# We simulate that some flights were understaffed, leading to delays.
ops = []
for index, row in df_flights.iterrows():
    ac_config = AIRCRAFT_TYPES[row['aircraft_type']]
    
    # Simulate actual staff assigned (sometimes less than required)
    staff_assigned = random.randint(ac_config['min_staff'] - 5, ac_config['max_staff'] + 2)
    
    # Logic: If staff_assigned < min_staff, probability of delay increases
    delay_prob = 0.7 if staff_assigned < ac_config['min_staff'] else 0.1
    delay_minutes = random.randint(15, 60) if random.random() < delay_prob else 0
    
    ops.append({
        'flight_id': row['flight_id'],
        'actual_staff_deployed': staff_assigned,
        'turnaround_delay_mins': delay_minutes,
        'handling_score': max(0, 100 - (delay_minutes * 1.5)) # Performance KPI
    })

df_ops = pd.DataFrame(ops)

# Save to CSV for the next phase (SQL)
df_flights.to_csv('flights_schedule.csv', index=False)
df_ops.to_csv('historical_ops.csv', index=False)