### Import packages:

In [1]:
import pandas as pd
from datetime import datetime
import os

### Define functions:

In [2]:
def CalculatePunctuality(schedule, confirmed):
    schedule_mins = schedule.str[:2].astype('int32') * 60 + schedule.str[-2:].astype('int32')
    confirmed_mins = confirmed.str[:2].astype('int32') * 60 + confirmed.str[-2:].astype('int32')
    return confirmed_mins - schedule_mins

def CollectDataFramesActual():
    target_directory = os.path.join('FlightData', 'Actual')
    files_in_directory = os.listdir(target_directory)
    filenames = [filename for filename in files_in_directory if filename != f"flights_departures_actual.csv"]

    current_date = datetime.today().strftime('%Y-%m-%d')
    cutoff_date = ""
    dfs = []

    for filename in filenames:
        df = pd.read_csv(os.path.join(target_directory, filename))
        dfs.append(df[df["Date"] > cutoff_date])
        cutoff_date = df["Date"].max()
    
    return dfs

def CollectDataFramesPlan(is_using_most_recent_plans):
    
    target_directory = os.path.join('FlightData', 'Plan')
    files_in_directory = os.listdir(target_directory)
    filenames = [filename for filename in files_in_directory if filename != f"flights_departures_plan.csv"]

    current_date = datetime.today().strftime('%Y-%m-%d')
    cutoff_date = ""
    dfs = []
    
    if is_using_most_recent_plans:
        cutoff_date = "a"
        for filename in filenames[::-1]:
            df = pd.read_csv(os.path.join(target_directory, filename))
            dfs.append(df[df["Date"] < cutoff_date])
            cutoff_date = df["Date"].min()
    else:
        for filename in filenames:
            df = pd.read_csv(os.path.join(target_directory, filename))
            dfs.append(df[df["Date"] > cutoff_date])
            cutoff_date = df["Date"].max()
    
    return dfs

def GetIcelandicWeekdays(weekday):
    name_of_days = {'Monday':'Mánudagur',
                    'Tuesday': 'Þriðjudagur',
                    'Wednesday': 'Miðvikudagur',
                    'Thursday': 'Fimmtudagur',
                    'Friday': 'Föstudagur',
                    'Saturday': 'Laugardagur',
                    'Sunday': 'Sunnudagur'}
    
    if weekday in name_of_days.keys():
        return name_of_days[weekday]
    return ''

def GetPartOfDay(hour_str):
    try:
        hour = int(hour_str[:2])
    except:
        return ''
    if 0 <= hour <= 23:
        if hour < 5: return 'Night'
        elif hour < 12: return 'Morning'
        elif hour < 18: return 'Day'
        return 'Evening'
    return ''


### Actual-scenario:

In [3]:
data_frames = CollectDataFramesActual()
    
df_actual = pd.concat(data_frames, sort=False)
df_actual = df_actual[df_actual["Confirmed"].str[0] <= "2"]
df_actual["CreationDate"] = datetime.today().strftime('%Y-%m-%d')
df_actual["pMins"] = CalculatePunctuality(df_actual["Scheduled"], df_actual["Confirmed"])
df_actual.to_csv(os.path.join("FlightData","Actual", "flights_departures_actual.csv"), index=False)

### Plan-scenario:
You can sample dataframes in **descending** date-order (from newest to oldest) by letting **is_using_most_recent_plans** = **True**

You can sample dataframes in **ascending** date-order (from oldest to newest) by letting **is_using_most_recent_plans** = **False**

In [4]:
is_using_most_recent_plans = True
data_frames = CollectDataFramesPlan(is_using_most_recent_plans)
    
df_plan = pd.concat(data_frames, sort=False)
df_plan["CreationDate"] = datetime.today().strftime('%Y-%m-%d')
df_plan["pMins"] = 0
df_plan.to_csv(os.path.join("FlightData", "Plan", "flights_departures_plan.csv"), index=False)

### Combine Scenarios & Add Time-related columns:

In [5]:
df = pd.concat([df_actual, df_plan], sort=False)
df = df.reset_index(drop=True)

df['Date'] = pd.to_datetime(df['Date'])
df['WeekDay'] = df.Date.dt.day_name().apply(lambda x: GetIcelandicWeekdays(x) )
df['PartOfDay'] = df.Scheduled.apply(lambda x: GetPartOfDay(x))

### Save Datasets:

In [6]:
df_actual = df[df.Scenario == "Actual"].reset_index(drop=True)
df_plan = df[df.Scenario == "Plan"].reset_index(drop=True)

df.to_pickle(os.path.join("FlightData", "flight_departures.pickle"))
df_actual.to_pickle(os.path.join("FlightData", "flight_departures_actual.pickle"))
df_plan.to_pickle(os.path.join("FlightData", "flight_departures_plan.pickle"))