In [1]:
import pandas as pd
import numpy as np
import glob
import pickle
import datetime

In [2]:
def get_file_list(folder_name, prefix=''):
    return glob.glob(folder_name + '/*' + prefix + '.data')

In [3]:
def load_file(file_name):
    with open(file_name, 'rb') as f:
        return pickle.load(f)

In [4]:
def init_data(clfs_folder, cols_folder):
    files_list = get_file_list(clfs_folder)
    clfs = {}
    for file in files_list:
        airport = file.replace(clfs_folder + '/', '').replace('.data', '')
        clfs[airport] = {'clf': load_file(file), 'cols': load_file(cols_folder + '/' + airport + '.data')}
    return clfs

In [5]:
def get_features(data, origin, dest, carrier, flight, wind_speed, dep_time, flight_time, flight_duration, date):
    df = pd.DataFrame(columns=data[origin]['cols'])
    df = df.append({'average_wind_speed': wind_speed, 'crs_dep_time': flight_time, 'crs_elapsed_time': flight_duration,'dest_' + dest: 1, 'fl_num': flight, 
                    'carrier_' + carrier: 1, 'month': date.month, 'quarter': date.month // 4, 'day_of_month': date.day, 'day_of_year': date.timetuple().tm_yday}, ignore_index=True)
    df = df.drop('status', axis=1)
    df = df.fillna(0)
    return df

In [8]:
classifiers = init_data('clfs', 'values_dicts')

In [9]:
dt = datetime.datetime(1999, 8, 28)

In [10]:
test_sample = get_features(classifiers, 'PIT', 'MCO', 'AA', 0, 0, 0, 0, 0, dt)

In [14]:
classifiers['PIT']['clf'].predict(test_sample)

array(['no_delay'], dtype=object)