### Import packages:

In [67]:
import pandas as pd
from datetime import datetime
import os

### Define functions:

In [157]:
def CalculatePunctuality(schedule, confirmed):
    schedule_mins = schedule.str[:2].astype('int32') * 60 + schedule.str[-2:].astype('int32')
    confirmed_mins = confirmed.str[:2].astype('int32') * 60 + confirmed.str[-2:].astype('int32')
    return confirmed_mins - schedule_mins

def CollectDataFramesActual():
    target_directory = os.path.join('FlightData', 'Actual')
    files_in_directory = os.listdir(target_directory)
    filenames = [filename for filename in files_in_directory if filename != f"flights_departures_actual.csv"]

    current_date = datetime.today().strftime('%Y-%m-%d')
    cutoff_date = ""
    dfs = []

    for filename in filenames:
        df = pd.read_csv(os.path.join(target_directory, filename))
        dfs.append(df[df["Date"] > cutoff_date])
        cutoff_date = df["Date"].max()
    
    return dfs

def CollectDataFramesPlan(is_using_most_recent_plans):
    
    target_directory = os.path.join('FlightData', 'Plan')
    files_in_directory = os.listdir(target_directory)
    filenames = [filename for filename in files_in_directory if filename != f"flights_departures_plan.csv"]

    current_date = datetime.today().strftime('%Y-%m-%d')
    cutoff_date = ""
    dfs = []
    
    if is_using_most_recent_plans:
        cutoff_date = "a"
        for filename in filenames[::-1]:
            df = pd.read_csv(os.path.join(target_directory, filename))
            dfs.append(df[df["Date"] < cutoff_date])
            cutoff_date = df["Date"].min()
    else:
        for filename in filenames:
            df = pd.read_csv(os.path.join(target_directory, filename))
            dfs.append(df[df["Date"] > cutoff_date])
            cutoff_date = df["Date"].max()
    
    return dfs

def GetIcelandicWeekdays(weekday):
    name_of_days = {'Monday':'Mánudagur',
                    'Tuesday': 'Þriðjudagur',
                    'Wednesday': 'Miðvikudagur',
                   'Thursday': 'Fimmtudagur',
                   'Friday': 'Föstudagur',
                   'Saturday': 'Laugardagur',
                   'Sunday': 'Sunnudagur'}
    
    if weekday in name_of_days.keys():
        return name_of_days[weekday]
    return ''

# def CollectDataFrames(scenario):
    
#     scenario = scenario.lower()
#     if scenario not in ['actual', 'plan']:
#         return []
    
#     dir = os.listdir(f"FlightData\\{scenario.title()}")
#     filenames = [file for file in dir if file != f"flights_departures_{scenario}.csv"]

#     current_date = datetime.today().strftime('%Y-%m-%d')
#     cutoff_date = ""
#     dfs = []

#     for filename in filenames:
#         df_tmp = pd.read_csv(os.path.join("FlightData", scenario.title(), filename))
#         dfs.append(df_tmp[df_tmp["Date"] > cutoff_date])
#         cutoff_date = df_tmp["Date"].max()
    
#     return dfs

In [163]:
df

Unnamed: 0,Airline,Confirmed,CreationDate,Date,Destination,FlightNumber,Scheduled,Type,Scenario,pMins,WeekDay,IsMorning
0,Wizz Air,00:26,2021-10-09,2021-07-23,Varsjá,W61540,00:10,Departure,Actual,16,Föstudagur,True
1,Eurowings,00:53,2021-10-09,2021-07-23,Dusseldorf,EW9281,00:30,Departure,Actual,23,Föstudagur,True
2,Iberia Express,01:31,2021-10-09,2021-07-23,Madríd,I23661,01:35,Departure,Actual,-4,Föstudagur,True
3,Lufthansa,02:00,2021-10-09,2021-07-23,Munchen,LH2469,02:00,Departure,Actual,0,Föstudagur,True
4,Play,05:53,2021-10-09,2021-07-23,Berlin Brandenburg,OG700,06:00,Departure,Actual,-7,Föstudagur,True
...,...,...,...,...,...,...,...,...,...,...,...,...
16929,Icelandair,Á áætlun,2021-10-09,2021-09-05,Boston,FI631,17:15,Departure,Plan,0,Sunnudagur,False
16930,Wizz Air,Á áætlun,2021-10-09,2021-09-05,Kraká,W65054,17:15,Departure,Plan,0,Sunnudagur,False
16931,Icelandair,Á áætlun,2021-10-09,2021-09-05,Vancouver,FI697,17:20,Departure,Plan,0,Sunnudagur,False
16932,Transavia,Á áætlun,2021-10-09,2021-09-05,Amsterdam,HV6886,20:05,Departure,Plan,0,Sunnudagur,False


### Actual-scenario:

In [145]:
data_frames = CollectDataFramesActual()
    
df_actual = pd.concat(data_frames, sort=False)
df_actual = df_actual[df_actual["Confirmed"].str[0] <= "2"]
df_actual["CreationDate"] = datetime.today().strftime('%Y-%m-%d')
df_actual["pMins"] = CalculatePunctuality(df_actual["Scheduled"], df_actual["Confirmed"])
# df_actual["Punctuality"] = df_actual.apply(lambda row : 1 if row["pMins"] > 0 else -1, axis=1)
df_actual.to_csv(os.path.join("FlightData","Actual", "flights_departures_actual.csv"), index=False)

### Plan-scenario:
You can sample dataframes in **descending** date-order (from newest to oldest) by letting **is_using_most_recent_plans** = **True**

You can sample dataframes in **ascending** date-order (from oldest to newest) by letting **is_using_most_recent_plans** = **False**

In [146]:
is_using_most_recent_plans = True
data_frames = CollectDataFramesPlan(is_using_most_recent_plans)
    
df_plan = pd.concat(data_frames, sort=False)
df_plan["CreationDate"] = datetime.today().strftime('%Y-%m-%d')
df_plan["pMins"] = 0
# df_plan["Punctuality"] = 0
df_plan.to_csv(os.path.join("FlightData", "Plan", "flights_departures_plan.csv"), index=False)

### Combine Scenarios & Add Time-related columns:

In [162]:
df = pd.concat([df_actual, df_plan], sort=False)
df = df.reset_index(drop=True)

df['Date'] = pd.to_datetime(df['Date'])
# df['Year'] = df['Date'].dt.year
# df['Quarter'] = df['Date'].dt.quarter
# df['Month'] = df['Date'].dt.month
# df['Week'] = df['Date'].dt.week
# df['Day'] = df['Date'].dt.day
# df['WeekDay'] = df['Date'].dt.day_name()
df['WeekDay'] = df.Date.dt.day_name().apply(lambda x: GetIcelandicWeekdays(x) )

df["IsMorning"] = True
i = df[(df.Scheduled.str[0:2].astype('int32') >= 12)]["IsMorning"].index
df.loc[i,"IsMorning"] = False

### Save Datasets:

In [164]:
df_actual = df[df.Scenario == "Actual"].reset_index(drop=True)
df_plan = df[df.Scenario == "Plan"].reset_index(drop=True)

df.to_pickle(os.path.join("FlightData", "flight_departures.pickle"))
df_actual.to_pickle(os.path.join("FlightData", "flight_departures_actual.pickle"))
df_plan.to_pickle(os.path.join("FlightData", "flight_departures_plan.pickle"))