In [33]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import time
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, Callback


In [3]:
import zipfile
with zipfile.ZipFile('valoracion_aerolineas.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

df_train = pd.read_csv('/content/dataset/train.csv')
df_test = pd.read_csv('/content/dataset/test.csv')

In [4]:
df_train = df_train.dropna()
df_train = df_train.drop(columns=['Unnamed: 0', 'id'])
df_train

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,Male,Loyal Customer,13,Personal Travel,Eco Plus,460,3,4,3,1,...,5,4,3,4,4,5,5,25,18.0,neutral or dissatisfied
1,Male,disloyal Customer,25,Business travel,Business,235,3,2,3,3,...,1,1,5,3,1,4,1,1,6.0,neutral or dissatisfied
2,Female,Loyal Customer,26,Business travel,Business,1142,2,2,2,2,...,5,4,3,4,4,4,5,0,0.0,satisfied
3,Female,Loyal Customer,25,Business travel,Business,562,2,5,5,5,...,2,2,5,3,1,4,2,11,9.0,neutral or dissatisfied
4,Male,Loyal Customer,61,Business travel,Business,214,3,3,3,3,...,3,3,4,4,3,3,3,0,0.0,satisfied
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103899,Female,disloyal Customer,23,Business travel,Eco,192,2,1,2,3,...,2,3,1,4,2,3,2,3,0.0,neutral or dissatisfied
103900,Male,Loyal Customer,49,Business travel,Business,2347,4,4,4,4,...,5,5,5,5,5,5,4,0,0.0,satisfied
103901,Male,disloyal Customer,30,Business travel,Business,1995,1,1,1,3,...,4,3,2,4,5,5,4,7,14.0,neutral or dissatisfied
103902,Female,disloyal Customer,22,Business travel,Eco,1000,1,1,1,5,...,1,4,5,1,5,4,1,0,0.0,neutral or dissatisfied


In [5]:
df_train_dummie = df_train.copy()

# Aplicar Label Encoding a las columnas categóricas
label_encoder = LabelEncoder()
for column in df_train_dummie.select_dtypes(include=['object']).columns:
    df_train_dummie[column] = label_encoder.fit_transform(df_train_dummie[column])

df_train_dummie

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,1,0,13,1,2,460,3,4,3,1,...,5,4,3,4,4,5,5,25,18.0,0
1,1,1,25,0,0,235,3,2,3,3,...,1,1,5,3,1,4,1,1,6.0,0
2,0,0,26,0,0,1142,2,2,2,2,...,5,4,3,4,4,4,5,0,0.0,1
3,0,0,25,0,0,562,2,5,5,5,...,2,2,5,3,1,4,2,11,9.0,0
4,1,0,61,0,0,214,3,3,3,3,...,3,3,4,4,3,3,3,0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103899,0,1,23,0,1,192,2,1,2,3,...,2,3,1,4,2,3,2,3,0.0,0
103900,1,0,49,0,0,2347,4,4,4,4,...,5,5,5,5,5,5,4,0,0.0,1
103901,1,1,30,0,0,1995,1,1,1,3,...,4,3,2,4,5,5,4,7,14.0,0
103902,0,1,22,0,1,1000,1,1,1,5,...,1,4,5,1,5,4,1,0,0.0,0


In [6]:
df_train_selected = df_train_dummie[['Type of Travel','Class','Online boarding','satisfaction']]
df_train_selected

Unnamed: 0,Type of Travel,Class,Online boarding,satisfaction
0,1,2,3,0
1,0,0,3,0
2,0,0,5,1
3,0,0,2,0
4,0,0,5,1
...,...,...,...,...
103899,0,1,2,0
103900,0,0,4,1
103901,0,0,1,0
103902,0,1,1,0


In [7]:
# Age cluster
df_train_new_features = pd.DataFrame()
age_bins = [7,18,26,33,40,47,54,63,86]
df_train_new_features['Age Cluster'] = pd.cut(df_train_dummie['Age'], bins=age_bins, labels=False, right=False)

# Weight Comfort Seats
df_train_new_features['Weight Comfort Seats'] = (df_train_dummie['Seat comfort']/5 + df_train_dummie['Class'] + df_train_dummie['Type of Travel'])

# Media de todos los servicios que tienen valor del 0-5
df_train_new_features['Mean Satisfaction Services'] = df_train_dummie[['Inflight wifi service','Departure/Arrival time convenient','Ease of Online booking','Gate location','Food and drink',
                                  'Online boarding','Seat comfort','Inflight entertainment','On-board service','Leg room service','Baggage handling','Checkin service','Inflight service','Cleanliness']].mean(axis=1)

# Suma del Servicio en Vuelo
df_train_new_features['Sum Inflight Services'] = df_train_dummie['Inflight wifi service'] + df_train_dummie['Inflight service'] + df_train_dummie['Inflight entertainment'] + df_train_dummie['Online boarding']

# Peso sobre el espacio de los pies según la clase
df_train_new_features['Space Seat and Class'] = (df_train_dummie['Class'] * df_train_dummie['Leg room service']) / 5

# suma de servicios básicos
df_train_new_features['Weight Basic Services'] = df_train_dummie['Class'] + (df_train_dummie['Food and drink'] + df_train_dummie['Cleanliness'])/10

df_train_new_features


Unnamed: 0,Age Cluster,Weight Comfort Seats,Mean Satisfaction Services,Sum Inflight Services,Space Seat and Class,Weight Basic Services
0,0,4.0,3.857143,16,1.2,3.0
1,1,0.2,2.285714,11,0.0,0.2
2,2,1.0,3.714286,16,0.0,1.0
3,1,0.4,3.000000,10,0.0,0.4
4,6,1.0,3.500000,14,0.0,0.7
...,...,...,...,...,...,...
103899,1,1.4,2.214286,9,0.2,1.4
103900,5,1.0,4.357143,18,0.0,0.6
103901,2,1.0,3.071429,11,0.0,0.8
103902,1,1.2,2.285714,7,1.0,1.2


In [8]:
df_train_combined = pd.concat([df_train_selected, df_train_new_features], axis=1)

# Eliminar las columnas duplicadas si es necesario
df_train_combined = df_train_combined.loc[:, ~df_train_combined.columns.duplicated()]
df_train_combined

Unnamed: 0,Type of Travel,Class,Online boarding,satisfaction,Age Cluster,Weight Comfort Seats,Mean Satisfaction Services,Sum Inflight Services,Space Seat and Class,Weight Basic Services
0,1,2,3,0,0,4.0,3.857143,16,1.2,3.0
1,0,0,3,0,1,0.2,2.285714,11,0.0,0.2
2,0,0,5,1,2,1.0,3.714286,16,0.0,1.0
3,0,0,2,0,1,0.4,3.000000,10,0.0,0.4
4,0,0,5,1,6,1.0,3.500000,14,0.0,0.7
...,...,...,...,...,...,...,...,...,...,...
103899,0,1,2,0,1,1.4,2.214286,9,0.2,1.4
103900,0,0,4,1,5,1.0,4.357143,18,0.0,0.6
103901,0,0,1,0,2,1.0,3.071429,11,0.0,0.8
103902,0,1,1,0,1,1.2,2.285714,7,1.0,1.2


# Callbacks

In [34]:
# Definir un callback personalizado para medir el tiempo de entrenamiento
class TimeHistory(Callback):
    def on_train_begin(self, logs=None):
        self.times = []
        self.train_start_time = time.time()

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        self.times.append(time.time() - self.epoch_start_time)

    def on_train_end(self, logs=None):
        self.total_train_time = time.time() - self.train_start_time
        
time_callback = TimeHistory()

early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min', restore_best_weights=True)

# Arquitectura 1: Valores sin análisis previo
- Inputs: Todo el data set (dummie)
- Epochs = 100
- 1ra capa = 64, relu
- 2da capa = 32 relu
- 3ra capa = 1 sigmoid
- learning_rate=0.001
- optimazador = Adam
- loss = 'binary_crossentropy'
- batch_size=32



In [43]:
X = df_train_dummie.drop('satisfaction', axis=1)
y = df_train_dummie['satisfaction']

In [44]:
# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [45]:
# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [46]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [47]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [48]:
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[time_callback, early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


In [49]:
model.save('../Modelos/Modelo1.h5')

loss = history.history['loss']
accuracy = history.history['accuracy']

print(f'Tiempo total de entrenamiento: {time_callback.total_train_time:.2f} segundos')
print(f'Pérdida: {loss[-1]}')
print(f'Accuracy: {accuracy[-1]*100}')


Tiempo total de entrenamiento: 51.59 segundos
Pérdida: 0.07897747308015823
Accuracy: 96.54660820960999


# Arquitectura 2
- Inputs: Todo el data set (dummie)
- Epochs = 100
- 1ra capa = 64, relu
- 2da capa = 32 relu
- 3ra capa = 16 relu 
- 4ta capa = 1 sigmoid
- learning_rate=0.001
- optimazador = Adam
- loss = 'binary_crossentropy'
- batch_size=32

In [51]:
X = df_train_dummie.drop('satisfaction', axis=1)
y = df_train_dummie['satisfaction']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[time_callback, early_stopping])

# Guardar el modelo
model.save('../Modelos/Modelo2.h5')

loss = history.history['loss']
accuracy = history.history['accuracy']

print(f'Tiempo total de entrenamiento: {time_callback.total_train_time:.2f} segundos')
print(f'Pérdida: {loss[-1]}')
print(f'Accuracy: {accuracy[-1]*100}')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Tiempo total de entrenamiento: 58.75 segundos
Pérdida: 0.075148805975914
Accuracy: 96.71071171760559


# Arquitectura 3

In [39]:
X = df_train_combined.drop('satisfaction', axis=1)
y = df_train_combined['satisfaction']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[time_callback, early_stopping])

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy*100:.2f}%')

model.save('Modelo2.h5')

loss = history.history['loss']
accuracy = history.history['accuracy']

print(f'Tiempo total de entrenamiento: {time_callback.total_train_time:.2f} segundos')
print(f'Pérdida: {loss[-1]}')
print(f'Accuracy: {accuracy[-1]*100}')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100

KeyboardInterrupt: 