In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import sklearn.model_selection as ms


sns.set(style="whitegrid", rc={"lines.linewidth": 4})
plt.rcParams.update({'axes.labelsize': 13.5})

In [2]:
df = pd.read_csv('../data/datasets/cleanDataConfig.csv', sep=';')
df.head()

Unnamed: 0,id_leg,operation,runway,hexid,callsign,type,origin,destination,altitude,ground_speed,...,runway_14R,runway_18L,runway_18R,runway_32L,runway_32R,runway_36L,runway_36R,configuration_NORTE,configuration_SUR,configuration
0,733513,0,5,483,6920,22,186,268,1625.0,142.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
1,733498,0,5,515,246,23,664,268,1850.0,137.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
2,733495,0,5,814,262,67,212,268,2200.0,156.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
3,733501,0,5,625,277,23,254,268,1862.5,143.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0
4,733496,0,5,491,268,23,421,268,2000.0,136.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,0,0


In [3]:
columns_names = df.columns.to_list()

predictors = columns_names[:38]

target = columns_names[-1]

X = df[predictors]

Y = df[target]

In [4]:
X_train , X_test , Y_train , Y_test = train_test_split(X , Y , test_size=0.2, shuffle=True, random_state=0)

In [5]:
def validacion_cruzada():
    filename = '../data/resultados/validacionCruzada/crossValidationRESULTS.csv'
    kfold = ms.StratifiedKFold(n_splits=10, shuffle=True, random_state=0).split(X_train, Y_train)

    forest = RandomForestClassifier(n_jobs=3, n_estimators=10)

    try:
        print("Inicio RANDOM FOREST")
        for k, (train,test) in enumerate(kfold):
            forest.fit(X_train.iloc[train], Y_train.iloc[train])
            train_score = forest.score(X_train.iloc[train], Y_train.iloc[train])
            validation_score = forest.score(X_train.iloc[test], Y_train.iloc[test])
            test_score = forest.score(X_test, Y_test)
            
            with open (filename, 'a', encoding='utf-8') as file:
                file.write(f"RANDOM_FOREST;{k+1};{train_score};{validation_score};{test_score}\n")
    except Exception as e:
        print("ERROR (RANDOM FOREST): \n ", e)
    ##################################################################################################################

    kfold = ms.StratifiedKFold(n_splits=10, shuffle=True, random_state=0).split(X_train, Y_train)

    knn = RandomForestClassifier(n_jobs=3, n_estimators=10)

    try:
        print("Inicio KNN")
        for k, (train,test) in enumerate(kfold):
            knn.fit(X_train.iloc[train], Y_train.iloc[train])
            train_score = knn.score(X_train.iloc[train], Y_train.iloc[train])
            validation_score = knn.score(X_train.iloc[test], Y_train.iloc[test])
            test_score = knn.score(X_test, Y_test)
            
            with open (filename, 'a', encoding='utf-8') as file:
                file.write(f"KNN;{k+1};{train_score};{validation_score};{test_score}\n")
    except Exception as e:
        print("ERROR (KNN): \n ", e)
    ##################################################################################################################

    kfold = ms.StratifiedKFold(n_splits=10, shuffle=True, random_state=0).split(X_train, Y_train)

    rrnn = RandomForestClassifier(n_jobs=3, n_estimators=10)

    try:
        print("Inicio RRNN")
        for k, (train,test) in enumerate(kfold):
            rrnn.fit(X_train.iloc[train], Y_train.iloc[train])
            train_score = rrnn.score(X_train.iloc[train], Y_train.iloc[train])
            validation_score = rrnn.score(X_train.iloc[test], Y_train.iloc[test])
            test_score = rrnn.score(X_test, Y_test)
            
            with open (filename, 'a', encoding='utf-8') as file:
                file.write(f"RRNN;{k+1};{train_score};{validation_score};{test_score}\n")
    except Exception as e:
        print("ERROR (RRNN): \n ", e)
    ###################################################################################################
    kfold = ms.StratifiedKFold(n_splits=10, shuffle=True, random_state=0).split(X_train, Y_train)

    forest = RandomForestClassifier(oob_score=True , n_estimators=200, n_jobs=7, max_depth=18)
    knn = KNeighborsClassifier(n_neighbors=5 , metric='manhattan', n_jobs=7, weights='distance')
    rrnn = MLPClassifier(hidden_layer_sizes=(38,25,40,35), solver='adam', activation='relu', max_iter=125, shuffle=True, random_state=0)

    final_model = VotingClassifier(
        estimators=[('random_forest', forest), ('knn', knn), ('rrnn', rrnn)],
        voting='soft',
        n_jobs=7
        )

    try:
        print("Inicio ENSEMBLE")
        for k, (train,test) in enumerate(kfold):
            print(f"Iteracion {k+1} de 10")
            final_model.fit(X_train.iloc[train], Y_train.iloc[train])
            train_score = final_model.score(X_train.iloc[train], Y_train.iloc[train])
            validation_score = final_model.score(X_train.iloc[test], Y_train.iloc[test])
            test_score = final_model.score(X_test, Y_test)
            print(f"ENSEMBLE;{k+1};{train_score};{validation_score};{test_score}")
            
            with open (filename, 'a', encoding='utf-8') as file:
                file.write(f"ENSEMBLE;{k+1};{train_score};{validation_score};{test_score}\n")
    except Exception as e:
        print("ERROR (ENSEMBLE): \n ", e)

    print("FIIIIIN!!!!!!!!!!!!!!!!!!!!!")

In [6]:
validacion_cruzada()

Inicio RANDOM FOREST
Inicio KNN
Inicio RRNN
Inicio ENSEMBLE
FIIIIIN!!!!!!!!!!!!!!!!!!!!!
