In [4]:
import numpy as np
from scipy.stats import boxcox
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

In [5]:
data=pd.read_csv('df_relevant.csv')
data.head()

Unnamed: 0,CustomerID,RSCORE,F SCORE,M SCORE,RFM SCORE
0,12346,1,4,4,144
1,12347,4,1,4,414
2,12348,2,3,4,234
3,12349,3,2,4,324
4,12350,1,3,2,132


In [6]:
# Define the columns to transform
columns_to_transform = ['RSCORE', 'F SCORE', 'M SCORE']

# Apply Box-Cox transformation to each specified column
transformed_data = data.copy()  # Copy the original DataFrame to preserve it

for column in columns_to_transform:
    transformed_data[column], _ = boxcox(data[column] + 1)  # +1 to avoid issues with zero or negative values

# Save the transformed data to a new CSV file
transformed_data.to_csv('transformed_data.csv', index=False)

print("Transformed data has been saved to transformed_data.csv")

Transformed data has been saved to transformed_data.csv


In [7]:
df1=pd.read_csv('transformed_data.csv')
df1.head()

Unnamed: 0,CustomerID,RSCORE,F SCORE,M SCORE,RFM SCORE
0,12346,0.879444,2.8804,2.898055,144
1,12347,2.872286,0.880423,2.898055,414
2,12348,1.6151,2.276011,2.898055,234
3,12349,2.270598,1.618069,2.898055,324
4,12350,0.879444,2.276011,1.624517,132


In [8]:
df1.shape

(4372, 5)

In [15]:
X = df1

In [16]:
from mpl_toolkits.mplot3d import Axes3D

In [24]:
import plotly.graph_objects as go



# Apply K-Means Clustering
n_clusters = 3  # Define the number of clusters
kmeans = KMeans(n_clusters=n_clusters)
df1['Cluster'] = kmeans.fit_predict(df1[['RSCORE', 'F SCORE', 'M SCORE']])

# Get the coordinates of centroids
centroids = kmeans.cluster_centers_

# Amount of jitter
jitter_amount = 0.2

# Adding jitter by generating random noise
x_jitter = df1['RSCORE'] + np.random.normal(0, jitter_amount, len(df1))
y_jitter = df1['F SCORE'] + np.random.normal(0, jitter_amount, len(df1))
z_jitter = df1['M SCORE'] + np.random.normal(0, jitter_amount, len(df1))

# Creating an interactive 3D plot with Plotly
fig = go.Figure()

# Add scatter plot for the data points
fig.add_trace(go.Scatter3d(
    x=x_jitter,
    y=y_jitter,
    z=z_jitter,
    mode='markers',
    marker=dict(
        size=5,
        color=df1['Cluster'],  # Color by cluster
        colorscale='Viridis',
        opacity=0.8
    ),
    name='Data Points'
))

# Add scatter plot for centroids
fig.add_trace(go.Scatter3d(
    x=centroids[:, 0],
    y=centroids[:, 1],
    z=centroids[:, 2],
    mode='markers',
    marker=dict(
        size=10,
        color='red',
        symbol='x',
        opacity=1
    ),
    name='Centroids'
))

# Update layout for resizing
fig.update_layout(
    title='3D K-Means Clustering with Jitter and Centroids',
    scene=dict(
        xaxis_title='RSCORE',
        yaxis_title='F SCORE',
        zaxis_title='M SCORE'
    ),
    width=1200,  # Set the desired width
    height=800  # Set the desired height
)

fig.show()
