In [74]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import silhouette_score
from sklearn.cluster import DBSCAN

In [75]:
data = px.data.tips()

data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [76]:
cat_cols= list(data.select_dtypes(include='object').columns)

for i in cat_cols:
    encode=LabelEncoder()
    data[i]=encode.fit_transform(data[i])

data.head()   

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,0,0,2,0,2
1,10.34,1.66,1,0,2,0,3
2,21.01,3.5,1,0,2,0,3
3,23.68,3.31,1,0,2,0,2
4,24.59,3.61,0,0,2,0,4


In [91]:
#Scale the dataset
scaler=RobustScaler()
columns=data.columns

scaled_data=scaler.fit_transform(data)

data=pd.DataFrame(data=scaled_data, columns=columns)

data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,-0.074675,-1.2096,-1.0,0.0,0.0,0.0,0.0
1,-0.691558,-0.7936,0.0,0.0,0.0,0.0,1.0
2,0.298237,0.384,0.0,0.0,0.0,0.0,1.0
3,0.545918,0.2624,0.0,0.0,0.0,0.0,0.0
4,0.630334,0.4544,-1.0,0.0,0.0,0.0,2.0


In [145]:
#Model Training
model=DBSCAN(eps=0.78, min_samples=4)
model.fit(data)
preds=model.labels_

preds

array([-1,  0,  0,  1, -1,  2,  1,  2,  1,  1,  1, -1,  1,  2,  3,  1, -1,
        0, -1,  0,  1,  3,  3, -1,  1,  2,  1,  1,  1,  3,  1,  2,  3, -1,
        1,  0,  0, -1,  0, -1,  0,  1,  1,  1,  2,  1,  1,  2, -1,  1,  1,
        3, -1,  1,  2,  1, -1, -1,  4, -1,  4,  4,  4, -1,  0,  0,  3, -1,
        1,  4,  1, -1,  5, -1,  3,  1,  4, -1,  6,  6,  7,  6, -1, -1,  6,
       -1,  6,  6, -1,  6,  4,  1,  5, -1,  3, -1, -1,  4,  4,  1,  5,  5,
       -1,  5,  3,  4,  4,  4,  1, -1,  1, -1, -1,  1, -1,  3,  2,  8,  8,
       -1,  6,  8,  6,  6,  8, -1,  6,  8,  8, -1,  6,  8,  8,  8,  8,  8,
        8,  8,  7,  8,  8, -1, -1, -1,  8,  8, -1,  8,  6,  6,  1,  1,  0,
        2,  2, -1, -1, -1,  3,  2,  2,  1, -1,  1,  5,  0,  1,  2,  5,  5,
       -1,  4, -1,  9,  4,  9,  4,  4, -1,  9, -1, -1, -1, -1,  9, -1, -1,
       -1, -1, -1,  4, -1, -1,  7,  7,  6,  7, -1, 10,  7, -1, 10, 10, 10,
       -1, -1, -1, -1,  4,  5, -1, -1, -1,  5, -1,  5, -1,  4,  4, -1, -1,
       -1, -1, -1, -1, -1

In [146]:
silhouette_score(X=data, labels=preds)

0.10725902590436488

In [147]:
px.scatter_3d(data_frame=data, x = 'sex', y='tip', 
              z = 'time', color = preds)