In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from sklearn.cluster import KMeans

In [None]:
df = pd.read_csv('pickles/biased_df.csv')
df = df.sample(frac=0.1).reset_index(drop=True)

In [None]:
# Use the elbow method to find the optimal number of clusters
X = df[['lifeSpan', 'speed', 'maxEnergy', 'kidEnergy', 'sensors', 'nkids', 'pgmDeath']]
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

In [None]:
fig = px.line(x=range(1, 11), y=wcss, title='Elbow Method', template='plotly_dark', width=500)
fig.show()

In [None]:
kmeans = KMeans(n_clusters=6, init='k-means++', max_iter=300, n_init=10, random_state=0)
y_kmeans = kmeans.fit_predict(X)
df['cluster'] = y_kmeans
df.to_csv('pickles/clustered_df.csv', index=False)

In [None]:
# Determine x/y location from PCA:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
df['x'] = X_pca[:, 0]
df['y'] = X_pca[:, 1]
df['created_alpha'] = (df['created'] / df['created'].max())/10
df['color'] = df['cluster'].map({0: 'salmon', 1: 'greenyellow', 2: 'dodgerblue', 3: 'darkorange', 4: 'purple', 5: 'cyan'})
df['size'] = df['lifeSpan'] / df['lifeSpan'].max() * 10

In [None]:
df.head(5)

In [None]:
df['era'] = df['created'].apply(lambda x: int(x/1000))

In [None]:
# Animate by era:
fig = go.Figure()
for era in df['era'].unique():
    df_era = df[df['era'] == era]
    fig.add_trace(go.Scatter(x=df_era['x'], y=df_era['y'], mode='markers', marker=dict(color=df_era['color'], size=df_era['size'], opacity=0.5), name=str(era)))
fig.update_layout(
    updatemenus=[
        dict(
            type='buttons',
            showactive=False,
            buttons=[dict(label='Play',
                           method='animate',
                           args=[None, dict(frame=dict(duration=500, redraw=True), fromcurrent=True, mode='immediate')])]
        )
    ]
)
fig.update_layout(
    title='KMeans Clustering of Creatures',
    template='plotly_dark',
    xaxis=dict(title='PCA1'),
    yaxis=dict(title='PCA2'),
    showlegend=False,
    width=800,
    height=800
)
fig.show()