In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.preprocessing import StandardScaler

# Seleccionar las características relevantes
df = pd.read_csv("../datasets/AirlineReviews.csv", encoding="latin-1")
df_select = df[['GroundServiceRating', 'EntertainmentRating', 'CabinType']]
df_select.dropna(inplace=True)
scaler = StandardScaler()
df_std = pd.DataFrame(scaler.fit_transform(df_select.iloc[:, 0:2]),
                      columns=['GroundServiceRating', 'EntertainmentRating'])
df_std['CabinType'] = df_select['CabinType']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_select.dropna(inplace=True)


In [32]:
color_dict = {'First Class': '#F65E5D', 'Economy Class': '#3AB4F2', 'Business Class': '#FFBC46', 'Premium Economy': '#2F9C95'}
label_dict = {0: 'First Class', 1: 'Economy Class', 2: 'Business Class', 3: 'Premium Economy'}

In [33]:
kmeans = KMeans(n_clusters=4, random_state=42, max_iter=1000).fit(df_std[['GroundServiceRating', 'EntertainmentRating']])
df_std['KMeans_Cluster'] = kmeans.labels_



In [34]:
dbscan = DBSCAN(eps=0.3, min_samples=5).fit(df_std[['GroundServiceRating', 'EntertainmentRating']])
df_std['DBSCAN_Cluster'] = dbscan.labels_

In [35]:
hierarchical = AgglomerativeClustering(n_clusters=4).fit(df_std[['GroundServiceRating', 'EntertainmentRating']])
df_std['Hierarchical_Cluster'] = hierarchical.labels_

In [36]:
sns.set(rc={'axes.facecolor': 'EFEFEF'})
plt.figure(figsize=(11, 9))

# Scatter plot for K-means
kmeans_plot = sns.scatterplot(data=df_std, x='GroundServiceRating', y='EntertainmentRating',
                              hue='KMeans_Cluster', palette=color_dict, s=36, linewidth=0, alpha=0.7)

ValueError: The palette dictionary is missing keys: {0, 1, 2, 3}

<Figure size 1100x900 with 0 Axes>

In [None]:
# Scatter plot for DBSCAN
dbscan_plot = sns.scatterplot(data=df_std, x='GroundServiceRating', y='EntertainmentRating',
                              hue='DBSCAN_Cluster', palette=color_dict, marker='x', s=100, linewidth=1.5)

# Scatter plot for Hierarchical clustering
hierarchical_plot = sns.scatterplot(data=df_std, x='GroundServiceRating', y='EntertainmentRating',
                                    hue='Hierarchical_Cluster', palette=color_dict, marker='s', s=50, linewidth=1)

# Set the plot limits
plt.xlim(-3, 3)
plt.ylim(-3, 3)

# Add legends
legend_handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=6) for color in color_dict.values()]
legend_labels = list(color_dict.keys())
kmeans_plot.legend(legend_handles, legend_labels, title='K-Means Clusters', loc='upper left')

legend_handles = [plt.Line2D([0], [0], marker='x', color='w', markeredgecolor=color, markersize=10, linestyle='None') for color in color_dict.values()]
legend_labels = list(color_dict.keys())
dbscan_plot.legend(legend_handles, legend_labels, title='DBSCAN Clusters', loc='upper left')

legend_handles = [plt.Line2D([0], [0], marker='s', color='w', markerfacecolor=color, markersize=6) for color in color_dict.values()]
legend_labels = list(color_dict.keys())
hierarchical_plot.legend(legend_handles, legend_labels, title='Hierarchical Clusters', loc='upper left')

# Save the plot
plt.savefig('scatter.png')

# Show the plot
plt.show()