In [1]:
#   Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
import datetime as dt
import warnings
warnings.filterwarnings('ignore')

# Set style for visualizations
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('viridis')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

In [3]:
# Load the datasets
print("Loading datasets...")

try:
    loyalty_history = pd.read_csv('Customer Loyalty History.csv')
    flight_activity = pd.read_csv('Customer Flight Activity.csv')
    calendar = pd.read_csv('Calendar.csv')
    
    print("Data loaded successfully!")
except Exception as e:
    print(f"Error loading data: {e}")

# Display basic information about the datasets
print("\n--- Customer Loyalty History ---")
print(f"Shape: {loyalty_history.shape}")
print(loyalty_history.head())

print("\n--- Customer Flight Activity ---")
print(f"Shape: {flight_activity.shape}")
print(flight_activity.head())

print("\n--- Calendar ---")
print(f"Shape: {calendar.shape}")
print(calendar.head())

Loading datasets...
Data loaded successfully!

--- Customer Loyalty History ---
Shape: (16737, 16)
   Loyalty Number Country          Province       City Postal Code  Gender  \
0          480934  Canada           Ontario    Toronto     M2Z 4K1  Female   
1          549612  Canada           Alberta   Edmonton     T3G 6Y6    Male   
2          429460  Canada  British Columbia  Vancouver     V6E 3D9    Male   
3          608370  Canada           Ontario    Toronto     P1W 1K4    Male   
4          530508  Canada            Quebec       Hull     J8Y 3Z5    Male   

  Education    Salary Marital Status Loyalty Card      CLV Enrollment Type  \
0  Bachelor   83236.0        Married         Star  3839.14        Standard   
1   College       NaN       Divorced         Star  3839.61        Standard   
2   College       NaN         Single         Star  3839.75        Standard   
3   College       NaN         Single         Star  3839.75        Standard   
4  Bachelor  103495.0        Married      

Data Preprocessing

In [4]:
# Check for missing values
print("\n--- Missing Values ---")
print("Loyalty History:")
print(loyalty_history.isnull().sum())
print("\nFlight Activity:")
print(flight_activity.isnull().sum())


--- Missing Values ---
Loyalty History:
Loyalty Number            0
Country                   0
Province                  0
City                      0
Postal Code               0
Gender                    0
Education                 0
Salary                 4238
Marital Status            0
Loyalty Card              0
CLV                       0
Enrollment Type           0
Enrollment Year           0
Enrollment Month          0
Cancellation Year     14670
Cancellation Month    14670
dtype: int64

Flight Activity:
Loyalty Number                 0
Year                           0
Month                          0
Total Flights                  0
Distance                       0
Points Accumulated             0
Points Redeemed                0
Dollar Cost Points Redeemed    0
dtype: int64


In [5]:
# Fill missing values in Customer Loyalty History
loyalty_history['Cancellation Year'].fillna(9999, inplace=True)  # 9999 means not cancelled
loyalty_history['Cancellation Month'].fillna(0, inplace=True)

# Check unique values of categorical variables
print("\n--- Unique Values in Categorical Variables ---")
print(f"Gender: {loyalty_history['Gender'].unique()}")
print(f"Education: {loyalty_history['Education'].unique()}")
print(f"Marital Status: {loyalty_history['Marital Status'].unique()}")
print(f"Loyalty Card: {loyalty_history['Loyalty Card'].unique()}")
print(f"Enrollment Type: {loyalty_history['Enrollment Type'].unique()}")


--- Unique Values in Categorical Variables ---
Gender: ['Female' 'Male']
Education: ['Bachelor' 'College' 'Master' 'High School or Below' 'Doctor']
Marital Status: ['Married' 'Divorced' 'Single']
Loyalty Card: ['Star' 'Aurora' 'Nova']
Enrollment Type: ['Standard' '2018 Promotion']
