## Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.utils import compute_class_weight

from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import ADASYN

from sklearn.ensemble import AdaBoostClassifier

from sklearn import svm 

import pickle

In [2]:
## import data when available
data = pd.read_csv('wingman_data_proc_v5.csv')
data.set_index('id', inplace=True)

In [3]:
data

Unnamed: 0_level_0,subcategory_no,num_eng,total_seats,afm_hrs,cert_max_gr_wt,dprt_time,power_units,flight_hours_mean,certs_held,second_pilot,...,eng_type_infrequent_sklearn,carb_fuel_injection_CARB,carb_fuel_injection_FINJ,carb_fuel_injection_UNK,dprt_apt_id,dest_apt_id,flt_plan_filed_IFR,flt_plan_filed_NONE,flt_plan_filed_VFR,pc_profession
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20080115X000511,6,1,2,5835,1670,2215,110,18.000000,1.0,0.0,...,0.0,1.0,0.0,0.0,1,1,0.0,1.0,0.0,0
20080116X000631,3,2,10,13130,7368,1743,350,9437.000000,1.0,0.0,...,0.0,0.0,1.0,0.0,1,1,0.0,0.0,1.0,1
20080122X000871,5,1,7,3895,5000,2331,674,3120.000000,1.0,0.0,...,1.0,0.0,0.0,1.0,1,0,0.0,0.0,1.0,1
20080220X002121,6,1,2,1100,1650,1630,150,4600.000000,1.0,0.0,...,0.0,1.0,0.0,0.0,0,0,0.0,1.0,0.0,0
20080207X001531,4,1,5,3227,2450,1630,400,6500.000000,1.0,0.0,...,1.0,0.0,0.0,1.0,1,1,0.0,1.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20190107X337411,2,1,4,9805,2270,2100,225,1877.000000,1.0,0.0,...,0.0,1.0,0.0,0.0,1,1,0.0,1.0,0.0,0
20190121X128521,7,1,4,3899,2550,1315,230,611.400024,1.0,0.0,...,0.0,1.0,0.0,0.0,1,1,0.0,1.0,0.0,0
20190108X549451,4,1,2,6177,1369,1630,160,1525.000000,1.0,1.0,...,0.0,1.0,0.0,0.0,1,1,0.0,1.0,0.0,1
20190112X112141,6,1,1,28,1320,2130,180,9000.000000,1.0,0.0,...,0.0,1.0,0.0,0.0,1,1,0.0,1.0,0.0,0


## Train test split

In [4]:
X = data.drop('subcategory_no', axis=1)
y = data['subcategory_no']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

## Feature selection and reduced model

In [6]:
data_drop_feats = data.drop(columns=['_AOBV', 'site_seeing', 'air_medical', '_BUS ', '_POSI', 'type_last_insp_COAW',
                                     'certs_held', 'type_last_insp_AAIP', '_Other', '_OWRK', 'type_last_insp_UNK',
                                     '_AAPL', 'eng_mfgr_P&W', '_FLTS', 'crew_sex', '_UNK', 'eng_type_REC', 
                                     'eng_type_infrequent_sklearn', 'flt_plan_filed_IFR', 'num_eng', 
                                     'flt_plan_filed_VFR', 'type_last_insp_COND', 'dprt_apt_id', 'second_pilot',
                                     '_INST', 'eng_mfgr_infrequent_sklearn', 'eng_mfgr_ROTAX', 'type_last_insp_100H',
                                     'crew_category_FLTI', 'dest_apt_id', 'carb_fuel_injection_UNK', 'flt_plan_filed_NONE',
                                     'acft_make_PIPER', 'carb_fuel_injection_FINJ', 'pc_profession', '_PERS',
                                     'acft_make_BEECH', 'carb_fuel_injection_CARB'], axis=1)



In [7]:
X_drop = data_drop_feats.drop('subcategory_no', axis=1)
y_drop = data_drop_feats['subcategory_no']

X_train_dr, X_test_dr, y_train_dr, y_test_dr = train_test_split(X_drop, y_drop, test_size=0.3, random_state=1)



In [19]:
sample = X_test_dr.sample(1)
sample.to_csv('sample3.csv')

In [8]:
model_random_2 = RandomForestClassifier(n_estimators= 2500, min_samples_split=5, min_samples_leaf=15, 
                                        max_features= 'sqrt', max_depth=168, bootstrap=False)


In [9]:
model_random_2.fit(X_train_dr, y_train_dr)


In [10]:
# Export Pipeline as pickle file
with open("model.pkl", "wb") as file:
    pickle.dump(model_random_2, file)

# Load Pipeline from pickle file
ui_model = pickle.load(open("model.pkl","rb"))

#ui_model.score(X_test, y_test)

In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('sample.csv')
len(df.columns.to_list())

20

In [4]:
df.columns.to_list()

['id',
 'total_seats',
 'afm_hrs',
 'cert_max_gr_wt',
 'dprt_time',
 'power_units',
 'flight_hours_mean',
 'type_last_insp_ANNL',
 'eng_mfgr_CONTINENTAL',
 'eng_mfgr_LYCOMING',
 'far_part_091',
 'far_part_infrequent_sklearn',
 'acft_make_CESSNA',
 'acft_make_infrequent_sklearn',
 'fixed_retractable_RETR',
 'acft_category_AIR',
 'acft_category_infrequent_sklearn',
 'homebuilt',
 'crew_category_DSTU',
 'crew_category_PLT']

In [5]:
fields = ['id',
 'total_seats',
 'afm_hrs',
 'cert_max_gr_wt',
 'dprt_time',
 'power_units',
 'flight_hours_mean',
 'type_last_insp_ANNL',
 'eng_mfgr_CONTINENTAL',
 'eng_mfgr_LYCOMING',
 'far_part_091',
 'far_part_infrequent_sklearn',
 'acft_make_CESSNA',
 'acft_make_infrequent_sklearn',
 'fixed_retractable_RETR',
 'acft_category_AIR',
 'acft_category_infrequent_sklearn',
 'homebuilt',
 'crew_category_DSTU',
 'crew_category_PLT']

In [6]:
values = ['20110614X613041',4,16250,2400,100,180,142.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0]

In [7]:
obj = {}

for index, field in enumerate(fields):
    obj[field] = [values[index]]
    



In [8]:
obj

{'id': '20110614X613041',
 'total_seats': 4,
 'afm_hrs': 16250,
 'cert_max_gr_wt': 2400,
 'dprt_time': 100,
 'power_units': 180,
 'flight_hours_mean': 142.0,
 'type_last_insp_ANNL': 1.0,
 'eng_mfgr_CONTINENTAL': 0.0,
 'eng_mfgr_LYCOMING': 1.0,
 'far_part_091': 1.0,
 'far_part_infrequent_sklearn': 0.0,
 'acft_make_CESSNA': 1.0,
 'acft_make_infrequent_sklearn': 0.0,
 'fixed_retractable_RETR': 0.0,
 'acft_category_AIR': 1.0,
 'acft_category_infrequent_sklearn': 0.0,
 'homebuilt': 0.0,
 'crew_category_DSTU': 0.0,
 'crew_category_PLT': 1.0}