In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import joblib

pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('../data/datasets/cleanDataConfig.csv', sep=';')
df.head()

Unnamed: 0,id_leg,operation,runway,hexid,callsign,type,origin,destination,altitude,ground_speed,vertical_rate,tmp,dew_pt,rel_hum,wind_dir,win_sp,wind_gust,visib,press,sky_lvl,rain,snow,drizzle,fog,time_class,day_week,month,time_ref,operation_Landing,operation_TakeOff,runway_14L,runway_14R,runway_18L,runway_18R,runway_32L,runway_32R,runway_36L,runway_36R,configuration_NORTE,configuration_SUR,configuration
0,733513,0,5,483,6920,22,186,268,1625.0,142.0,-768.0,7.0,0.0,61.017,250.0,20.372,0.0,10.0,1030.0,10.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
1,733498,0,5,515,246,23,664,268,1850.0,137.0,-768.0,7.0,-1.0,56.724,250.0,25.928,44.447962,10.0,1030.0,10.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
2,733495,0,5,814,262,67,212,268,2200.0,156.0,-832.0,7.0,-1.0,56.724,230.0,16.668,0.0,10.0,1031.0,10.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,8.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
3,733501,0,5,625,277,23,254,268,1862.5,143.0,-640.0,7.0,-1.0,56.724,230.0,16.668,0.0,10.0,1031.0,10.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,8.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
4,733496,0,5,491,268,23,421,268,2000.0,136.0,-640.0,5.0,0.0,70.063,160.0,12.964,0.0,10.0,1031.0,10.0,0.0,0.0,0.0,0.0,4.0,1.0,1.0,9.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0


In [3]:
df.shape

(733822, 41)

In [4]:
columns_names = df.columns.to_list()

predictors = columns_names[:38]

target = columns_names[-1]

X = df[predictors]

Y = df[target]

In [5]:
X_train , X_test , Y_train , Y_test = train_test_split(X , Y , test_size=0.2, shuffle=True, random_state=0)

In [6]:
def train():
    try:
        print("INICIO RANDOM FOREST")
        forest = RandomForestClassifier(oob_score=True , n_estimators=200, n_jobs=7)
        forest.fit(X_train , Y_train)

        joblib.dump(forest, '../models/RANDOM_FOREST.bin', compress=9)
    except Exception as e:
        print(f"ERROR en el RANDOM FOREST. ERROR \n {e}")
    try:
        print("INICIO KNN")
        knn = KNeighborsClassifier(n_neighbors=5 , metric='manhattan', n_jobs=7)
        knn.fit(X_train , Y_train)

        joblib.dump(knn, '../models/KNN.bin', compress=9)
    except Exception as e:
        print(f"ERROR en el KNN. ERROR \n {e}")
    try:
        print("INICIO RRNN")
        rrnn = MLPClassifier(hidden_layer_sizes=(38,24,40,35), solver='adam', activation='relu', max_iter=125, shuffle=True, random_state=0)

        rrnn.fit(X_train, Y_train)
        joblib.dump(rrnn, '../models/RRNN.bin', compress=9)
    except Exception as e:
        print(f"ERROR en el RRNN. ERROR \n {e}")
    try:
        print("Inicio Creacion Modelo Final!!")
        forest = RandomForestClassifier(oob_score=True , n_estimators=200, n_jobs=7, max_depth=18)
        knn = KNeighborsClassifier(n_neighbors=5 , metric='manhattan', n_jobs=7, weights='distance')
        rrnn = MLPClassifier(hidden_layer_sizes=(38,25,40,35), solver='adam', activation='relu', max_iter=125, shuffle=True, random_state=0)

        final_model = VotingClassifier(
            estimators=[('random_forest', forest), ('knn', knn), ('rrnn', rrnn)],
            voting='soft',
            n_jobs=7
            )

        final_model.fit(X_train, Y_train)
        
        joblib.dump(final_model, '../models/votingClasifier.bin', compress=9)
    except Exception as e:
        print(f"ERROR en el modelo FINAL. ERROR \n {e}")

    print(f"FIIIIIINNNNN!!!!!!!!!!!*******")

In [7]:
train()

INICIO RANDOM FOREST
INICIO KNN
INICIO RRNN
Inicio Creacion Modelo Final!!
FIIIIIINNNNN!!!!!!!!!!!*******
