In [1]:
import json
import pickle

import pandas as pd
import numpy as np

In [56]:
airports_df = pd.read_csv('../utility_dataframes/airports_df.csv', index_col=0)

In [3]:
column_names = [
    'FirstName',
    'LastName',
    'Gender',
    'FlightNumber',
    'Date',
    'Time',
    'DepartureCountry',
    'DepartureCity',
    'DepartureAirport',
    'ArrivalCountry',
    'ArrivalCity',
    'ArrivalAirport',
    'AirlineName',
    'LoyaltyStatus'
]

In [4]:
with open('../utility_dataframes/dates.pkl', 'rb') as file:
    dates = pickle.load(file)

In [72]:
airline_names = {
    'SU': 'aeroflot',
    'OK': 'chech airlines',
    'KE': 'korean air',
    'UX': 'air europa',
    'CZ': 'china southern airlines',
    'AF': 'air france',
    'KL': 'klm',
    'VN': 'vietnam airlines',
    'RO': 'tarom',
    'AZ': 'alitalia',
    'CI': 'china airlines',
    'MF': 'xiamen air',
    'ME': 'middle east airlines',
    'MU': 'china eastern airlines',
    'AR': 'argentinas',
    'GA': 'garudas indonesia',
    'DL': 'delta airlines',
    'KQ': 'kenya airways',
    'SV': 'saudi arabian airlines',
    'AM': 'aeromexico'
}

def convert_airline_name(name, airline_names=airline_names):
    return airline_names.get(name[:2], np.nan)

In [6]:
def process_sheet(df):
    if df.iloc[1, 5] is not np.nan:
        return False
    flight_info = {}
    flight_info['FirstName'] = (df.iloc[1, 1]).lower()
    flight_info['Gender'] = 'M' if df.iloc[1, 0] == 'MR' else 'F'
    flight_info['FlightNumber'] = df.iloc[3, 0]
    flight_info['Date'] = df.iloc[7, 0]
    flight_info['Time'] = df.iloc[7, 2]
    flight_info['DepartureCity'] = (df.iloc[3, 3]).lower()
    flight_info['DepartureAirport'] = df.iloc[5, 3]
    flight_info['ArrivalCity'] = (df.iloc[3, 7]).lower()
    flight_info['ArrivalAirport'] = df.iloc[5, 7]
    return flight_info

In [7]:
def process_file(df):
    result_df = pd.DataFrame(columns=column_names)
    no_match_airport = set()
    for sheet in df.values():
        flight_info = process_sheet(sheet)
        if not flight_info:
            continue
        try: 
            flight_info['DepartureCountry'] = (airports_df.loc[airports_df['code'] == flight_info['DepartureAirport'], 'country'].iloc[0]).lower()
        except IndexError:
            no_match_airport.add(flight_info['DepartureAirport'])

        try: 
            flight_info['ArrivalCountry'] = (airports_df.loc[airports_df['code'] == flight_info['ArrivalAirport'], 'country'].iloc[0]).lower()
        except IndexError:
            no_match_airport.add(flight_info['ArrivalAirport'])


        result_df = result_df.append(flight_info, ignore_index=True)
        result_df['AirlineName'] = result_df['FlightNumber'].apply(convert_airline_name)

    print('No match for airports: ', no_match_airport)
    return result_df

In [59]:
airline_names = set()

In [70]:
path_to_excel_files = './YourBoardingPassDotAero/YourBoardingPassDotAero-'

for date in dates[200:]:
    df = pd.read_excel(path_to_excel_files + date + '.xlsx', sheet_name=None,
                  names=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])

    result_df = process_file(df)
    result_df.to_csv(f'./result_dfs/result_df_{date}.csv')
    print(f'{date} proccesseed')

No match for airports:  {'DQA', 'GYS', 'JUH', 'LDS', 'LOP', 'RIZ', 'ZUJ', 'NLH', 'JXS', 'HNY', 'ZYI', 'BAR', 'KVN', 'JIQ', 'HXD', 'NGQ', 'JNG', 'DSS', 'FLZ'}
2017-07-20 proccesseed
No match for airports:  {'ZUJ', 'DYN', 'LOP', 'DTB', 'AEB', 'JXS', 'HNY', 'ZYI', 'TCZ', 'ECP', 'UKA', 'JNG', 'WNH', 'HDG', 'LPF', 'DSS'}
2017-07-21 proccesseed
No match for airports:  {'WLI', 'GYS', 'HMI', 'DQA', 'BUW', 'RIZ', 'WUT', 'WDS', 'HNY', 'JSJ', 'TCZ', 'ECP', 'RAQ', 'LFQ', 'NYT', 'LLV', 'DSS', 'HDG', 'BWX'}
2017-07-22 proccesseed
No match for airports:  {'DQA', 'WLI', 'DYN', 'KTG', 'CWJ', 'ZYI', 'SQD', 'BAR', 'FSZ', 'LFQ', 'JNG', 'QSZ'}
2017-07-23 proccesseed
No match for airports:  {'KVN', 'HPG', 'DQA'}
2017-07-24 proccesseed
No match for airports:  {'DQA', 'KOS', 'DYN', 'RIZ', 'JXS', 'HNY', 'WDS', 'BAR', 'TCZ', 'ZYI', 'KVN', 'JBB', 'ECP', 'WNH', 'HDG'}
2017-07-25 proccesseed
No match for airports:  {'DQA', 'GYS', 'DYN', 'LOP', 'ZYI', 'ECP', 'JIQ', 'UKA', 'LFQ', 'FSZ', 'QSZ', 'DSS'}
2017-07-26 proc

No match for airports:  {'WMT', 'DQA', 'LOP', 'KTG', 'JXS', 'HNY', 'ZYI', 'KVN', 'UKA', 'JNG', 'NYT'}
2017-09-12 proccesseed
No match for airports:  {'ZUJ', 'LDS', 'DYN', 'DCY', 'LOP', 'ZYI', 'BEJ', 'KVN', 'LSW', 'FSZ', 'JNG', 'BWX'}
2017-09-13 proccesseed
No match for airports:  {'KOS', 'DYN', 'LOP', 'PRA', 'GMQ', 'WDS', 'JXS', 'ZYI', 'BAR', 'TCZ', 'KVN'}
2017-09-14 proccesseed
No match for airports:  {'DQA', 'BUW', 'DYN', 'LOP', 'PRA', 'GMQ', 'JXA', 'LSW', 'KVN', 'ECP', 'FLZ', 'GYU', 'DSS', 'BWX'}
2017-09-15 proccesseed
No match for airports:  {'DQA', 'GYS', 'LDS', 'RKZ', 'LOP', 'MJU', 'WDS', 'ZYI', 'SXK', 'TCZ', 'SQD', 'GNS', 'ECP', 'UKA', 'LFQ', 'JNG', 'FYN', 'LPF', 'DSS'}
2017-09-16 proccesseed
No match for airports:  {'LUW', 'WMT', 'DQA', 'HMI', 'WLI', 'RIZ', 'LDS', 'WUT', 'LOP', 'MJU', 'ZYI', 'BAR', 'TCZ', 'KVN', 'ECP', 'JNG', 'QSZ', 'WNH', 'HDG'}
2017-09-17 proccesseed
No match for airports:  {'DQA', 'ZUJ', 'LDS', 'LOP', 'DCY', 'HPG', 'RIZ', 'WDS', 'HNY', 'ZYI', 'JIC', 'KVN', '

No match for airports:  {'WMT', 'BEJ', 'KVN', 'DSS', 'HMI', 'WLI', 'TCZ', 'JBB', 'DYN', 'YTY', 'HNY', 'ZYI', 'BAR', 'GNS', 'JIQ', 'RAQ', 'BWX', 'LOP', 'WUT', 'CWJ', 'JMJ', 'NYT'}
2017-11-07 proccesseed
No match for airports:  {'RKZ', 'YTY', 'JXS', 'SQD', 'KVN', 'ECP', 'JNG', 'JIC'}
2017-11-08 proccesseed
No match for airports:  {'DQA', 'LLB', 'BUW', 'WUT', 'JXS', 'ZYI', 'LFQ', 'FLZ', 'DSS'}
2017-11-09 proccesseed
No match for airports:  {'DYN', 'TVS', 'LOP', 'KTG', 'GMQ', 'WDS', 'JXS', 'SXK', 'YSQ', 'KVN', 'JNG', 'JMJ', 'DSS'}
2017-11-10 proccesseed
No match for airports:  {'WMT', 'DYN', 'LOP', 'YTY', 'HPG', 'AEB', 'WDS', 'JXS', 'HTT', 'TCZ', 'KVN', 'ECP', 'TVX', 'FSZ', 'JNG', 'JMJ'}
2017-11-11 proccesseed
No match for airports:  {'ZUJ', 'WDS', 'ZYI', 'CWJ', 'KVN', 'JIC', 'GYU'}
2017-11-12 proccesseed
No match for airports:  {'DQA', 'LDS', 'DCY', 'HPG', 'GMQ', 'WDS', 'KVN', 'FSZ', 'JXA', 'GYU', 'NYT'}
2017-11-13 proccesseed
No match for airports:  {'DQA', 'ZUJ', 'DYN', 'LOP', 'PSU', 'A

No match for airports:  {'WMT', 'KTG', 'BEJ', 'KVN', 'RCU', 'TVX', 'DSS', 'DQA', 'HMI', 'AEB', 'TCZ', 'ECP', 'YKH', 'JXA', 'CHG', 'DYN', 'RIZ', 'YTY', 'JXS', 'HNY', 'ZYI', 'WNH', 'BWX', 'ZUJ', 'LOP', 'SHF', 'WUT', 'WDS', 'HTT', 'FSZ', 'JNG', 'NYT', 'PKN'}
2017-12-29 proccesseed
No match for airports:  {'LOP', 'RCU', 'YKH', 'JMJ', 'WNH'}
2017-12-30 proccesseed
No match for airports:  {'KTG', 'NLH', 'NDS', 'KVN', 'LLV', 'DSS', 'DQA', 'WLI', 'GMQ', 'ECP', 'YKH', 'QSZ', 'DYN', 'JXS', 'ZYI', 'BWX', 'LOP', 'MJU', 'WUT', 'PSU', 'HTT', 'FSZ', 'JNG', 'LPF'}
2017-12-31 proccesseed


In [71]:
print(airline_names)

set()
