In [5]:

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Load your dataset
# Assuming your dataset is in CSV format
data = pd.read_csv('original github se copy wala csv.csv')

# Display the first few rows of the dataset to understand its structure
print(data.head())

# Data Preprocessing
# Assuming 'waitlisted' is the feature you want to predict, and other columns are your features
X = data.drop('labels', axis=1)
y = data['labels']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define categorical and numerical features
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(exclude=['object']).columns

# Create transformers for numerical and categorical features
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Create a column transformer to apply different transformers to different columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

print("Accuracy of a Random_Forest_classifier")


# Create a pipeline with preprocessing and the classifier
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Fit the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
# Save the model for future use
# You can use joblib or pickle to save the model
import joblib
joblib.dump(model, 'waitlist_model.joblib')



   Unnamed: 0 travelClass  bookingStatus  status1Day  status1Month  \
0           0          3A             21          28            12   
1           1          3A             14          63            -1   
2           2          3A             39          -1            -1   
3           3          3A             11          46             8   
4           4          3A             20          -1            -1   

   status1Week  status2Days  labels  
0           14           15       0  
1           -1           -1       0  
2           -1           18       0  
3           -1           -1       0  
4            7           -1       0  
Accuracy of a Random_Forest_classifier
Accuracy: 96.68%


['waitlist_model.joblib']

In [11]:
print("Accuracy of a XGBoost_classifier")

# Create a pipeline with preprocessing and the classifier (XGBoost)
model2 = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier())
])

# Fit the model
model2.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model2.predict(X_test)

# Evaluate the model
accuracy2 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy2 * 100:.2f}%')
import joblib
joblib.dump(model2, 'waitlist_model_xgboost.joblib')

Accuracy of a XGBoost_classifier


  if is_sparse(data):


Accuracy: 96.82%


['waitlist_model_xgboost.joblib']

In [12]:
print("Accuracy of a Logistic_Regression")
from sklearn.linear_model import LogisticRegression
# Create a pipeline with preprocessing and the classifier (XGBoost)
model3 = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

# Fit the model
model3.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model3.predict(X_test)

# Evaluate the model
accuracy3 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy3 * 100:.2f}%')
import joblib
joblib.dump(model3, 'waitlist_model_logistic_Regression.joblib')

Accuracy of a Logistic_Regression
Accuracy: 95.33%


['waitlist_model_logistic_Regression.joblib']

In [15]:
print("Accuracy of a Support_Vector_Machine")
from sklearn.svm import SVC
# Create a pipeline with preprocessing and the classifier (XGBoost)
model4 = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC())
])

# Fit the model
model4.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model4.predict(X_test)

# Evaluate the model
accuracy4 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy4 * 100:.2f}%')
import joblib
joblib.dump(model4, 'waitlist_model_SVC.joblib')

Accuracy of a Support_Vector_Machine
Accuracy: 94.20%


['waitlist_model_SVC.joblib']

In [17]:
print("Accuracy of a K_Nearest_Neighbors(KNN)")
from sklearn.neighbors import KNeighborsClassifier
# Create a pipeline with preprocessing and the classifier (XGBoost)
model5 = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', KNeighborsClassifier())
])

# Fit the model
model5.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model5.predict(X_test)

# Evaluate the model
accuracy5 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy5 * 100:.2f}%')
import joblib
joblib.dump(model5, 'waitlist_model_KNN.joblib')

Accuracy of a K_Nearest_Neighbors(KNN)
Accuracy: 94.76%


['waitlist_model_KNN.joblib']

In [20]:
print("Accuracy of a Neural Network(MLP classifier)")
from sklearn.neural_network import MLPClassifier
# Create a pipeline with preprocessing and the classifier (XGBoost)
model6 = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', MLPClassifier())
])

# Fit the model
model6.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model6.predict(X_test)

# Evaluate the model
accuracy6 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy6 * 100:.2f}%')
import joblib
joblib.dump(model6, 'waitlist_model_KNN.joblib')

Accuracy of a Neural Network(MLP classifier)
Accuracy: 96.81%


['waitlist_model_KNN.joblib']

In [21]:
print("Accuracy of a Naive_Bayes")
from sklearn.naive_bayes import GaussianNB
# Create a pipeline with preprocessing and the classifier (XGBoost)
model7 = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', GaussianNB())
])

# Fit the model
model7.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model7.predict(X_test)

# Evaluate the model
accuracy7 = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy7 * 100:.2f}%')
import joblib
joblib.dump(model7, 'waitlist_model_KNN.joblib')

Accuracy of a Naive_Bayes
Accuracy: 72.89%


['waitlist_model_KNN.joblib']