In [16]:
import pandas as pd
import numpy as np
from datetime import date
import os

In [17]:
def preprocess(df):
    df['date'] = df['Date'].apply(lambda x: date.fromisoformat(x))
    df['hdd_15.5'] = df['HDD 15.5']
    # if this column does not exist it means there are no issues with accuracy i.e. none estimated
    if '% Estimated' in df.columns:
        df['percent_estimated'] = df['% Estimated']
        df = df.drop(['% Estimated'], axis=1)
    else:
        df['percent_estimated'] = 0
    df = df.drop(['HDD 15.5', 'Date'], axis=1)
    return df

In [18]:
def from_icao_code(df, code):
    return df[df.apply(lambda x: x['icao'] == code, axis=1)]

In [19]:
def make_df(df_icao, output_dirname: str = 'output'):
    dfs = []
    filenames = [fn for fn in os.listdir(f"./{output_dirname}") if fn.endswith('.csv')]
    for filename in filenames:
        filepath = f'./{output_dirname}/' + filename
        df = pd.read_csv(filepath, skiprows=range(6))
        df = preprocess(df)
        icao_code = filename.split('_')[0]
        country_row = from_icao_code(df_icao, icao_code)
        df = df.apply(lambda x: pd.concat([x, country_row.squeeze(axis=0)]), axis=1)
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

In [20]:
df_countries = pd.read_csv('icao-with-country-names.csv')

In [21]:
df = make_df(df_countries)

In [22]:
df.to_csv('degreedays.csv', index=False)