In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from keras.models import load_model
import re
import os

In [3]:
# Load your airline satisfaction dataset
airline_df = pd.read_excel(r'D:\Code\py_code\Multi-Layer-Perceptron\data\BA_AirlineReviews_CL_excel.xlsx',header=0)

In [4]:
# Assuming 'OverallRating' is your target variable
df = airline_df.copy()

In [5]:
# Preprocess the text data in 'ReviewBody'
df['ReviewBody'] = df['ReviewBody'].apply(lambda x: str(x).lower())
df['ReviewBody'] = df['ReviewBody'].apply(lambda x: re.sub(r'[^\w\d ,]', '', str(x)))

In [6]:
# Assuming 'OverallRating' is your target variable
# Use 'OverallRating' as the target variable and 'ReviewBody' as features
X = df['ReviewBody']
y = df['Satisfaction']

In [7]:
# TF-IDF vectorization
tfidf = TfidfVectorizer(binary=True)
X_tfidf = tfidf.fit_transform(X).todense()

In [8]:
# Encode the target variable
lb = LabelEncoder()
y_encoded = lb.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [9]:
# Split the dataset
seed = 29
X_train, X_val, y_train, y_val = train_test_split(X_tfidf, y_categorical, test_size=0.2, random_state=seed)

In [10]:
# Build and compile the MLP model
MLP = Sequential()
MLP.add(Dense(512, input_shape=(X_tfidf.shape[1],), activation='relu'))
MLP.add(Dropout(0.5))
MLP.add(Dense(y_categorical.shape[1], activation='softmax'))
MLP.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

MLP.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               7968768   
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 10)                5130      
                                                                 
Total params: 7973898 (30.42 MB)
Trainable params: 7973898 (30.42 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
# Model training
file_path = "mlp_airline.keras"
check_point = ModelCheckpoint(file_path, monitor="val_accuracy", verbose=1, save_best_only=True, mode="max")
early_stop = EarlyStopping(monitor="val_accuracy", mode="max", patience=5)

mlp_history = MLP.fit(X_train, y_train, batch_size=128, epochs=50,
                      validation_data=(X_val, y_val), callbacks=[check_point, early_stop])

Epoch 1/50


Epoch 1: val_accuracy improved from -inf to 0.24022, saving model to mlp_airline.keras
Epoch 2/50
Epoch 2: val_accuracy improved from 0.24022 to 0.28880, saving model to mlp_airline.keras
Epoch 3/50
Epoch 3: val_accuracy improved from 0.28880 to 0.34143, saving model to mlp_airline.keras
Epoch 4/50
Epoch 4: val_accuracy improved from 0.34143 to 0.34818, saving model to mlp_airline.keras
Epoch 5/50
Epoch 5: val_accuracy improved from 0.34818 to 0.34953, saving model to mlp_airline.keras
Epoch 6/50
Epoch 6: val_accuracy improved from 0.34953 to 0.35358, saving model to mlp_airline.keras
Epoch 7/50
Epoch 7: val_accuracy did not improve from 0.35358
Epoch 8/50
Epoch 8: val_accuracy did not improve from 0.35358
Epoch 9/50
Epoch 9: val_accuracy did not improve from 0.35358
Epoch 10/50
Epoch 10: val_accuracy did not improve from 0.35358
Epoch 11/50
Epoch 11: val_accuracy did not improve from 0.35358


In [12]:
# Load the best model for predictions
mlp_best = load_model(file_path)

In [13]:
# Evaluate model on validation set and print accuracy
accuracy = mlp_best.evaluate(X_val, y_val, verbose=0)[1]
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 35.36%


In [14]:
# Make predictions on test data
# Assuming you have a test dataset 'X_test_tfidf'
mlp_pred = mlp_best.predict(X_test_tfidf)

NameError: name 'X_test_tfidf' is not defined