In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [3]:
import zipfile
with zipfile.ZipFile('valoracion_aerolineas.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

df_test = pd.read_csv('/content/dataset/test.csv')

In [4]:
df_test = df_test.dropna()
df_test = df_test.drop(columns=['Unnamed: 0', 'id'])

In [5]:
df_test_dummie = df_test.copy()

# Aplicar Label Encoding a las columnas categóricas
label_encoder = LabelEncoder()
for column in df_test_dummie.select_dtypes(include=['object']).columns:
    df_test_dummie[column] = label_encoder.fit_transform(df_test_dummie[column])

df_test_dummie

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,0,0,52,0,1,160,5,4,3,4,...,5,5,5,5,2,5,5,50,44.0,1
1,0,0,36,0,0,2863,1,1,3,1,...,4,4,4,4,3,4,5,0,0.0,1
2,1,1,20,0,1,192,2,0,2,4,...,2,4,1,3,2,2,2,0,0.0,0
3,1,0,44,0,0,3377,0,0,0,2,...,1,1,1,1,3,1,4,0,6.0,1
4,0,0,49,0,1,1182,2,3,4,3,...,2,2,2,2,4,2,4,0,20.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25971,1,1,34,0,0,526,3,3,3,1,...,4,3,2,4,4,5,4,0,0.0,0
25972,1,0,23,0,0,646,4,4,4,4,...,4,4,5,5,5,5,4,0,0.0,1
25973,0,0,17,1,1,828,2,5,1,5,...,2,4,3,4,5,4,2,0,0.0,0
25974,1,0,14,0,0,1127,3,3,3,3,...,4,3,2,5,4,5,4,0,0.0,1


In [6]:
df_test_selected = df_test_dummie[['Type of Travel','Class','Online boarding','satisfaction']]
df_test_selected

Unnamed: 0,Type of Travel,Class,Online boarding,satisfaction
0,0,1,4,1
1,0,0,4,1
2,0,1,2,0
3,0,0,4,1
4,0,1,1,1
...,...,...,...,...
25971,0,0,3,0
25972,0,0,4,1
25973,1,1,1,0
25974,0,0,4,1


In [7]:
# Age clister
df_test_new_features = pd.DataFrame()
age_bins = [7,18,26,33,40,47,54,63,86]
df_test_new_features['Age Cluster'] = pd.cut(df_test['Age'], bins=age_bins, labels=False, right=False)

# Weight Comfort Seats
df_test_new_features['Weight Comfort Seats'] = (df_test_dummie['Seat comfort']/5 + df_test_dummie['Class'] + df_test_dummie['Type of Travel'])

# Media de todos los servicios que tienen valor del 0-5
df_test_new_features['Mean Satisfaction Services'] = df_test_dummie[['Inflight wifi service','Departure/Arrival time convenient','Ease of Online booking','Gate location','Food and drink',
                                  'Online boarding','Seat comfort','Inflight entertainment','On-board service','Leg room service','Baggage handling','Checkin service','Inflight service','Cleanliness']].mean(axis=1)

# Suma del Servicio en Vuelo
df_test_new_features['Sum Inflight Services'] = df_test_dummie['Inflight wifi service'] + df_test_dummie['Inflight service'] + df_test_dummie['Inflight entertainment'] + df_test_dummie['Online boarding']

# Peso sobre el espacio de los pies según la clase
df_test_new_features['Space Seat and Class'] = (df_test_dummie['Class'] * df_test_dummie['Leg room service']) / 5

# suma de servicios básicos
df_test_new_features['Weight Basic Services'] = df_test_dummie['Class'] + (df_test_dummie['Food and drink'] + df_test_dummie['Cleanliness'])/10

df_test_new_features

Unnamed: 0,Age Cluster,Weight Comfort Seats,Mean Satisfaction Services,Sum Inflight Services,Space Seat and Class,Weight Basic Services
0,5,1.6,4.142857,19,1.0,1.8
1,3,1.0,3.428571,13,0.0,1.0
2,1,1.4,2.142857,8,0.2,1.4
3,4,0.8,1.785714,6,0.0,0.7
4,5,1.4,2.642857,7,0.4,1.8
...,...,...,...,...,...,...
25971,3,0.8,3.357143,15,0.0,0.8
25972,1,0.8,4.285714,17,0.0,0.8
25973,0,2.4,3.000000,9,0.6,1.4
25974,0,0.8,3.642857,16,0.0,0.8


In [8]:
df_test_combined = pd.concat([df_test_selected, df_test_new_features], axis=1)

# Eliminar las columnas duplicadas si es necesario
df_test_combined = df_test_combined.loc[:, ~df_test_combined.columns.duplicated()]
df_test_combined

Unnamed: 0,Type of Travel,Class,Online boarding,satisfaction,Age Cluster,Weight Comfort Seats,Mean Satisfaction Services,Sum Inflight Services,Space Seat and Class,Weight Basic Services
0,0,1,4,1,5,1.6,4.142857,19,1.0,1.8
1,0,0,4,1,3,1.0,3.428571,13,0.0,1.0
2,0,1,2,0,1,1.4,2.142857,8,0.2,1.4
3,0,0,4,1,4,0.8,1.785714,6,0.0,0.7
4,0,1,1,1,5,1.4,2.642857,7,0.4,1.8
...,...,...,...,...,...,...,...,...,...,...
25971,0,0,3,0,3,0.8,3.357143,15,0.0,0.8
25972,0,0,4,1,1,0.8,4.285714,17,0.0,0.8
25973,1,1,1,0,0,2.4,3.000000,9,0.6,1.4
25974,0,0,4,1,0,0.8,3.642857,16,0.0,0.8


# Arquitectura 1
- Precisión del 47.46%

In [61]:
model = tf.keras.models.load_model('../Modelos/Modelo1.h5')

X_test = df_test_dummie.drop(columns=['satisfaction'])
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

In [62]:
cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9744830620325561
Especificidad: 0.04935297356828194
F1-Score: 0.6110176270999421
Precisión: 0.4450293337619545
Precisión del modelo: 45.54%


# Arquitectura 2
- Precisión del 47.46%

In [63]:
model = tf.keras.models.load_model('../Modelos/Modelo2.h5')

X_test = df_test_dummie.drop(columns=['satisfaction'])
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.024373075230972284
Especificidad: 0.9772852422907489
F1-Score: 0.04627464082860007
Precisión: 0.45634266886326197
Precisión del modelo: 55.90%


# Arquitectura 3

In [65]:
model = tf.keras.models.load_model('../Modelos/Modelo3.h5')

X_test = df_test_dummie[['Type of Travel', 'Class','Online boarding', 'Seat comfort', 'Inflight entertainment','On-board service','Leg room service','Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9789705235371755
Especificidad: 0.06965859030837004
F1-Score: 0.6179909462049046
Precisión: 0.45150555961366773
Precisión del modelo: 46.88%


# Arquitectura 4

In [66]:
model = tf.keras.models.load_model('../Modelos/Modelo4.h5')

X_test = df_test_dummie[['Type of Travel', 'Class','Online boarding', 'Seat comfort', 'Inflight entertainment','On-board service','Leg room service','Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9906731192256929
Especificidad: 0.014386013215859032
F1-Score: 0.6095336058251902
Precisión: 0.44018296973962
Precisión del modelo: 44.29%


# Arquitectura 5

In [11]:
model = tf.keras.models.load_model('../Modelos/Modelo5.h5')

X_test = df_test_dummie[['Online boarding','Inflight wifi service','Class','Type of Travel','Inflight entertainment','Seat comfort','Leg room service','Customer Type','Ease of Online booking']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9930488341399032
Especificidad: 0.012802863436123349
F1-Score: 0.6101694915254238
Precisión: 0.44037771187763386
Precisión del modelo: 44.31%


# Arquitectura 6

In [13]:
model = tf.keras.models.load_model('../Modelos/Modelo6.h5')

X_test = df_test_dummie[['Type of Travel', 'Class','Flight Distance', 'Inflight wifi service', 'Online boarding','Seat comfort','Inflight entertainment','On-board service', 'Leg room service', 'Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 1.0
Especificidad: 0.0
F1-Score: 0.6100703204680874
Precisión: 0.4389217162939791
Precisión del modelo: 43.89%


# Arquitectura 7

In [67]:
model = tf.keras.models.load_model('../Modelos/Modelo7.h5')

X_test = df_test_combined[['Age Cluster', 'Weight Comfort Seats', 'Mean Satisfaction Services',
       'Sum Inflight Services', 'Space Seat and Class',
       'Weight Basic Services']]
y_test = df_test_combined['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9999120105587329
Especificidad: 0.0
F1-Score: 0.6100330139302681
Precisión: 0.4389000463463618
Precisión del modelo: 43.89%


# Arquitectura 8

In [69]:
model = tf.keras.models.load_model('../Modelos/Modelo8.h5')

X_test = df_test_combined[['Age Cluster', 'Weight Comfort Seats', 'Mean Satisfaction Services',
       'Sum Inflight Services', 'Space Seat and Class',
       'Weight Basic Services']]
y_test = df_test_combined['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9998240211174659
Especificidad: 0.0
F1-Score: 0.6099957053897359
Precisión: 0.43887837472480784
Precisión del modelo: 43.88%


# Arquitectura 9

In [15]:
model = tf.keras.models.load_model('../Modelos/Modelo9.h5')

X_test = df_test_dummie[['Class','Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9167619885613726
Especificidad: 0.3039647577092511
F1-Score: 0.6533107599699023
Precisión: 0.5074764989528031
Precisión del modelo: 57.29%


In [18]:
model = tf.keras.models.load_model('../Modelos/Modelo10.h5')

X_test = df_test_dummie[['Class','Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9418389793224813
Especificidad: 0.18715583700440527
F1-Score: 0.6319145167955605
Precisión: 0.47545862390618754
Precisión del modelo: 51.84%


In [20]:
model = tf.keras.models.load_model('../Modelos/Modelo11.h5')

X_test = df_test_dummie[['Class','Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9274087109546855
Especificidad: 0.28937224669603523
F1-Score: 0.6540693164541252
Precisión: 0.5051763803680982
Precisión del modelo: 56.94%


In [21]:
model = tf.keras.models.load_model('../Modelos/Modelo12.h5')

X_test = df_test_dummie[['Class','Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness']]
y_test = df_test_dummie['satisfaction']

y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Para convertir probabilidades a clases (0 o 1)

cm = confusion_matrix(y_test, y_pred)

# Extraer los valores de la matriz de confusión
TN, FP, FN, TP = cm.ravel()

# Calcular el recall
recall = TP / (TP + FN)

# Calcular la especificidad
specificity = TN / (TN + FP)

# Calcular la precisión
precision = TP / (TP + FP)

# Calcular el F1-Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Imprimir las métricas
print("Recall:", recall)
print("Especificidad:", specificity)
print("F1-Score:", f1_score)
print('Precisión:', precision)

accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy * 100:.2f}%')

Recall: 0.9088429388473384
Especificidad: 0.2657626651982379
F1-Score: 0.6383609900806527
Precisión: 0.4919508477805296
Precisión del modelo: 54.80%
