In [13]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Horizon (H) in days to simulate
H = 2  # Start with 2 days

# Days in each month for 2013
days_in_month = {
    1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30,
    7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31
}

# Read the CSV file
flights_df = pd.read_csv('./flights.csv')

# Extract hours only for dep_time and arr_time (ignore minutes)
flights_df['dep_hour'] = (flights_df['dep_time'] // 100).fillna(0).astype(int)
flights_df['arr_hour'] = (flights_df['arr_time'] // 100).fillna(0).astype(int)

# Estimate airplanes directly from total flights
def estimate_airplanes(flights_df):
    """
    Estimate the total number of airplanes based on the total number of flights.
    Assume that a certain percentage of flights require dedicated airplanes.
    """
    total_flights = len(flights_df)
    # Assume each airplane can handle 5 flights in a day on average
    # Scale this based on the length of the horizon
    airplanes_needed = max(1, int(total_flights / (5 * H)))
    return airplanes_needed

# Assign start airplanes proportionally to departing flights
def assign_start_airplanes(departing_airports, departing_counts, airplanes_needed):
    """
    Assign airplanes to each departing airport proportional to the number of departures.
    """
    airport_start_planes = {}
    total_departures = sum(departing_counts.values())

    # Allocate airplanes proportionally to departure counts
    for airport, count in departing_counts.items():
        airport_start_planes[airport] = max(1, int((count / total_departures) * airplanes_needed))

    # Distribute any remaining airplanes randomly among the airports
    remaining_airplanes = airplanes_needed - sum(airport_start_planes.values())
    if remaining_airplanes > 0:
        for airport in random.sample(list(departing_airports), len(departing_airports)):
            if remaining_airplanes > 0:
                airport_start_planes[airport] += 1
                remaining_airplanes -= 1
            else:
                break

    return airport_start_planes

# Generate simulated data for a single horizon
def generate_simulated_data(flights_df, H, airplanes_needed, output_file):
    # Add the year column to ensure proper datetime conversion
    flights_df['year'] = 2013

    # Randomly pick a starting point in 2013
    start_day_of_year = random.randint(1, 365 - H)
    start_date = datetime(2013, 1, 1) + timedelta(days=start_day_of_year - 1)
    end_date = start_date + timedelta(days=H)

    # Convert to datetime for filtering
    flights_df['flight_date'] = pd.to_datetime(flights_df[['year', 'month', 'day']])
    flights_in_horizon = flights_df[
        (flights_df['flight_date'] >= start_date) &
        (flights_df['flight_date'] < end_date)
    ]

    # Get the set of departing airports and their counts for the start day
    flights_start_day = flights_in_horizon[
        flights_in_horizon['flight_date'] == start_date
    ]
    departing_airports = flights_start_day['origin'].unique()
    departing_counts = flights_start_day['origin'].value_counts().to_dict()

    # Assign airplanes to departing airports
    airport_start_planes = assign_start_airplanes(departing_airports, departing_counts, airplanes_needed)

    # Prepare output data for flights
    flights_data = []
    for _, flight in flights_in_horizon.iterrows():
        dep_day = (flight['flight_date'] - start_date).days
        arr_day = dep_day + (flight['arr_hour'] < flight['dep_hour'])  # Handle day rollover

        # Skip flights where departure and arrival are in the same hour
        if dep_day == arr_day and flight['dep_hour'] == flight['arr_hour']:
            continue

        flights_data.append(
            f"{flight['origin']} {dep_day} {flight['dep_hour']} "
            f"{flight['dest']} {arr_day} {flight['arr_hour']}"
        )

    # Prepare airplane starting data (only airports with assigned planes)
    airplanes_data = [
        f"{airport} {airport_start_planes[airport]}"
        for airport in airport_start_planes if airport_start_planes[airport] > 0
    ]

    # Write to file
    with open(output_file, 'w') as f:
        f.write(f"{len(flights_data)} {len(airplanes_data)}\n")
        f.writelines("\n".join(flights_data) + "\n")
        f.writelines("\n".join(airplanes_data) + "\n")

# Main execution
if __name__ == "__main__":
    # Estimate airplanes
    airplanes_needed = estimate_airplanes(flights_df)

    # Generate simulated data for the given H
    output_file = f"data_{H}.txt"
    generate_simulated_data(flights_df, H, airplanes_needed, output_file)

    print(f"Simulated data for horizon {H} days written to {output_file}.")
    print(f"Estimated airplanes needed: {airplanes_needed}")


Simulated data for horizon 2 days written to data_2.txt.
Estimated airplanes needed: 33677
