In [5]:
import pandas as pd
import numpy as np
import time
from collections import defaultdict, deque

start_time = time.time()

passengers_df = pd.read_csv(r"C:\Users\rayar\Downloads\recruitment-dataset-20240522T183003Z-001\recruitment-dataset\passengers.csv") #Replace all these with the respective file paths in your system
flights_df = pd.read_csv(r"C:\Users\rayar\Downloads\recruitment-dataset-20240522T183003Z-001\recruitment-dataset\flights.csv")
canceled_flights_df = pd.read_csv(r"C:\Users\rayar\Downloads\recruitment-dataset-20240522T183003Z-001\recruitment-dataset\canceled.csv")


canceled_flight_ids = set(canceled_flights_df['Canceled'].values)


affected_passengers = passengers_df[passengers_df['FID'].isin(canceled_flight_ids)]
num_affected_passengers = len(affected_passengers)

# We create a graph representation of flights
flight_graph = defaultdict(list)
flight_info = {}
for _, row in flights_df.iterrows():
    if row['FID'] not in canceled_flight_ids:
        flight_graph[row['DEP']].append(row['FID'])
        flight_info[row['FID']] = row.to_dict()

# We define our BFS function
def bfs_reallocate(start_airport, end_airport, max_planes=3):
    queue = deque([(start_airport, [], 0, 0)])  # (current airport, path, layovers, arrival time)
    visited = set()
    while queue:
        current_airport, path, planes, arrival_time = queue.popleft()
        if current_airport == end_airport and planes <= max_planes:
            return path, arrival_time
        if (current_airport, planes) in visited or planes > max_planes:
            continue
        visited.add((current_airport, planes))
        for flight_id in flight_graph[current_airport]:
            flight = flight_info[flight_id]
            if flight['CAPACITY'] > 0:  # Check capacity
                new_path = path + [flight_id]
                queue.append((flight['ARR'], new_path, planes + 1, flight['ARR_TIME']))
    return [], float('inf')


relocated_passengers = []
layovers_list = []
arrival_time_diff_list = []


passenger_count_per_flight = defaultdict(int)

for _, passenger in affected_passengers.iterrows():
    original_flight = flights_df[flights_df['FID'] == passenger['FID']].iloc[0]
    start_airport = original_flight['DEP']
    end_airport = original_flight['ARR']
    original_arrival_time = original_flight['ARR_TIME']
    
    new_path, new_arrival_time = bfs_reallocate(start_airport, end_airport)
    
    if new_path:
        # We check capacity for the new path
        if all(passenger_count_per_flight[flight_id] < flight_info[flight_id]['CAPACITY'] for flight_id in new_path):
            relocated_passengers.append((passenger['PID'], new_path))
            layovers_list.append(len(new_path) - 1)
            arrival_time_diff_list.append(abs(new_arrival_time - original_arrival_time))
            
            # We increment passenger count for each flight in the new path
            for flight_id in new_path:
                passenger_count_per_flight[flight_id] += 1
        else:
            # If no available capacity in one or more flights in the path
            relocated_passengers.append((passenger['PID'], []))
            layovers_list.append(None)
            arrival_time_diff_list.append(None)
    else:
        relocated_passengers.append((passenger['PID'], []))
        layovers_list.append(None)
        arrival_time_diff_list.append(None)

end_time = time.time()
execution_time = (end_time - start_time) * 1000  # convert to milliseconds
num_successfully_reallocated = sum(1 for _, path in relocated_passengers if path)
reallocation_rate = (num_successfully_reallocated / num_affected_passengers)
avg_layovers = np.mean([layovers for layovers in layovers_list if layovers is not None])
avg_arrival_time_diff = np.mean([diff for diff in arrival_time_diff_list if diff is not None])

# We create a metric
Score = (100 * reallocation_rate) - (10 * avg_layovers) - (1 * avg_arrival_time_diff)/100000   #We have set appropriate weights 
                                                                                               #Divided avg_Arrival time by 10^5 to make it lie between 0 & 1 like the others

print(f"Score of our solution: {Score}")


stats = {
    'Affected': num_affected_passengers,
    'Reallocate': num_successfully_reallocated,
    'AvgLay': avg_layovers,
    'TimeDiff': avg_arrival_time_diff,
    'SolTime': execution_time,
}
stats_df = pd.DataFrame([stats])
stats_df.to_csv('stats.csv', index=False)

with open('allot.csv', 'w') as f:
    for passenger_id, flights in relocated_passengers:
        f.write(f"{passenger_id},{len(flights)},{','.join(map(str, flights))}\n")

print("Done. All files generated.")


Score of our solution: 78.31073788114175
Done. All files generated.
