In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from sklearn.cluster import KMeans
from utils.colors import *

In [3]:
df = pd.read_csv('pickles/biased_df.csv')

In [None]:
# Use the elbow method to find the optimal number of clusters
X = df[['lifeSpan', 'speed', 'maxEnergy', 'kidEnergy', 'sensors', 'nkids', 'pgmDeath']]
"""
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)
fig = px.line(x=range(1, 11), y=wcss, title='Elbow Method', template='plotly_dark', width=500)
fig.show()
"""

In [5]:
nclusters = 4
kmeans = KMeans(n_clusters=nclusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
y_kmeans = kmeans.fit_predict(X)
df['cluster'] = y_kmeans
df.to_csv('pickles/clustered_df.csv', index=False)

In [6]:
# Determine x/y location from PCA:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
df['x'] = X_pca[:, 0]
df['y'] = X_pca[:, 1]
df['created_alpha'] = (df['created'] / df['created'].max())/10
df['size'] = df['lifeSpan'] / df['lifeSpan'].max() * 10

In [7]:
df['era'] = df['created'].apply(lambda x: int(x/2000))
# Get the era rank of each cluster:
era_rank = df.groupby('cluster')['era'].mean().sort_values().index
# Re-name the clusters to match the era rank:
df['cluster'] = df['cluster'].apply(lambda x: list(era_rank).index(x))

In [None]:
colors = [
    #'rgb(95, 70, 144)',
 'rgb(56, 166, 165)',
 'rgb(115, 175, 72)',
 'rgb(237, 173, 8)',
 #'rgb(225, 124, 5)',
 'rgb(204, 80, 62)',
 'rgb(148, 52, 110)',
 'rgb(111, 64, 112)',
 'rgb(102, 102, 102)']
cluster_colors = {i: colors[i] for i in range(nclusters)}
cluster_colors

In [9]:
df['color'] = df['cluster'].map(cluster_colors)

In [None]:
df.head(5)

In [11]:
df_sample = df.sample(frac=0.1).reset_index(drop=True)

In [12]:
df_sample['label'] = df_sample.apply(lambda row: '<br>' + ''.join([f'<b>{key}:</b> {value}</br>' for key, value in row[['cluster', 'era', 'lifeSpan', 'speed', 'maxEnergy', 'kidEnergy', 'sensors', 'nkids', 'pgmDeath']].items()]), axis=1)

In [None]:
import plotly.graph_objs as go

fig = go.Figure()

eras = range(df_sample['era'].min(), df_sample['era'].max() + 1)
# Add the initial scatter plot with all eras
for era in eras:
    df_era = df_sample[df_sample['era'] == era]
    fig.add_trace(go.Scatter(
        x=df_era['x'],
        y=df_era['y'],
        text=df_era['label'],
        hoverinfo='text',
        mode='markers', marker=dict(color=df_era['color'], size=df_era['size'], opacity=0.5), name=str(era), visible=False))

# Create frames for each step of the animation
frames = []
for era in eras:
    frame_data = []
    for era_inner in eras:
        df_era_inner = df_sample[df_sample['era'] == era_inner]
        visible = (era == era_inner)
        frame_data.append(go.Scatter(
            x=df_era_inner['x'],
            y=df_era_inner['y'],
            text=df_era_inner['label'],
            hoverinfo='text',
            mode='markers', marker=dict(color=df_era_inner['color'], size=df_era_inner['size'], opacity=0.5), name=str(era_inner), visible=visible))
    frames.append(go.Frame(data=frame_data, name=str(era), layout=go.Layout(title_text=f'Era: {era}')))

fig.update(frames=frames)

# Determine the axis ranges
x_range = [df_sample['x'].min()*0.9, df_sample['x'].max()*1.1]
y_range = [df_sample['y'].min()*0.9, df_sample['y'].max()*1.1]

fig.update_layout(
    updatemenus=[
        dict(
            type='buttons',
            showactive=False,
            buttons=[
                dict(
                    label='Play',
                    method='animate',
                    args=[None, dict(frame=dict(duration=500, redraw=True), fromcurrent=True, mode='immediate')]
                ),
                dict(
                    label='Pause',
                    method='animate',
                    args=[[None], dict(frame=dict(duration=0, redraw=False), mode='immediate')]
                )
            ]
        )
    ],
    sliders=[
        dict(
            steps=[dict(method='animate', args=[[str(era)], dict(mode='immediate', frame=dict(duration=500, redraw=True))], label=str(era)) for era in eras],
            transition=dict(duration=300),
            x=0.1,  # Position of the slider
            len=0.9  # Length of the slider
        )
    ]
)

fig.add_annotation(
    x=0.2,
    y=0.8,
    xref='paper',
    yref='paper',
    text='<b>Color:</b> Cluster<br><b>Size:</b> Lifespan<br><b>Created:</b> Opacity',
    # align text left:
    align='left',
    showarrow=False,
)

fig.update_layout(
    title='KMeans Clustering of Creatures',
    template='plotly_dark',
    xaxis=dict(title='PCA1', range=x_range),
    yaxis=dict(title='PCA2', range=y_range),
    showlegend=False,
    width=800,
    height=800
)

fig.show()
fig.write_html('figs/cluster_animation.html')


In [None]:
fig = go.Figure(go.Scatter(
    x=df_sample['x'],
    y=df_sample['y'],
    text=df_sample['label'],
    hoverinfo='text',
    mode='markers', marker=dict(color=df_sample['color'], size=df_sample['size'], opacity=0.5)))
fig.add_annotation(
    x=0.2,
    y=0.8,
    xref='paper',
    yref='paper',
    text='<b>Color:</b> Cluster<br><b>Size:</b> Lifespan<br><b>Created:</b> Opacity',
    # align text left:
    align='left',
    showarrow=False,
)
fig.update_layout(title='KMeans Clustering of Creatures', template='plotly_dark', xaxis=dict(title='PCA1'), yaxis=dict(title='PCA2'), showlegend=False, width=800, height=800)
fig.write_html(f'figs/cluster_static_categorical_n{nclusters}.html')
fig.write_image(f'figs/cluster_static_categorical_n{nclusters}.svg')
fig.show()

In [None]:
# Create a heatmap comparing variables for each cluster:
import numpy as np
df_cluster = df.filter(['lifeSpan', 'speed', 'maxEnergy', 'kidEnergy', 'sensors', 'nkids', 'pgmDeath', 'cluster', 'era']).sort_values('era')
df_cluster = df_cluster.groupby('cluster').mean().reset_index()
df_cluster = df_cluster.set_index('cluster')
df_cluster = df_cluster.T
# Normalize by column to compare variables on the same scale    :
df_cluster = df_cluster.apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=1)
fig = px.imshow(df_cluster, title='Cluster Averages', template='plotly_dark', width=800, height=800, color_continuous_scale='Gray')
fig.update_coloraxes(showscale=False)
i = 0
for cluster in cluster_colors.keys():
    fig.add_shape(
        type='rect',
        x0=-0.5 + i,
        x1=0.5 + i,
        y0=-0.5,
        y1=7.5,
        line=dict(color=cluster_colors[cluster], width=5)
    )
    i+=1
fig.show()

In [15]:
df_sample = df_sample.sort_values('created')

In [None]:
px.scatter(df_sample, x='created', y='lifeSpan', trendline="rolling", trendline_options=dict(window=5)).data

In [None]:
import plotly.express as px
import plotly.graph_objs as go

# Define a function to create traces for a given characteristic
def create_traces(characteristic):
    traces = []
    for cluster in cluster_colors.keys():
        df_cluster = df_sample[df_sample['cluster'] == cluster]
        new_trace = px.scatter(df_cluster, x='created', y=characteristic, trendline="rolling", trendline_options=dict(window=100))
        scatter = new_trace.data[0]
        trendline = new_trace.data[1]
        scatter.marker.color = cluster_colors[cluster]
        scatter.marker.size = 2
        scatter.marker.opacity = 0.2
        trendline.line.color = cluster_colors[cluster]
        trendline.line.width = 2
        scatter.name = f'{cluster} {characteristic} Scatter'
        trendline.name = f'{cluster} {characteristic} Trendline'
        traces.append(scatter)
        traces.append(trendline)
    return traces

# Create initial figure
fig = go.Figure()

# Store all traces for all characteristics
all_traces = []
for characteristic in ['lifeSpan', 'speed', 'maxEnergy', 'kidEnergy', 'sensors', 'nkids', 'pgmDeath']:
    traces = create_traces(characteristic)
    for trace in traces:
        trace.visible = (characteristic == 'lifeSpan')  # Only the initial characteristic is visible
        fig.add_trace(trace)
    all_traces.append(traces)

# Create dropdown menu
dropdown_buttons = []
characteristics = ['lifeSpan', 'speed', 'maxEnergy', 'kidEnergy', 'sensors', 'nkids', 'pgmDeath']

for i, characteristic in enumerate(characteristics):
    visibility = [False] * len(fig.data)
    for j in range(len(characteristics)):
        if j == i:
            visibility[j * len(cluster_colors) * 2:(j + 1) * len(cluster_colors) * 2] = [True] * len(cluster_colors) * 2
    dropdown_buttons.append(dict(
        label=characteristic,
        method='update',
        args=[{'visible': visibility},
              {'title': f'{characteristic} Over Time', 'yaxis': {'title': characteristic}}]
    ))

# Update layout with the dropdown
fig.update_layout(
    updatemenus=[dict(
        type='dropdown',
        showactive=True,
        buttons=dropdown_buttons
    )],
    title='lifeSpan Over Time',
    template='plotly_dark',
    xaxis=dict(title='Created'),
    yaxis=dict(title='lifeSpan'),
    showlegend=True,
    width=800,
    height=800
)
fig.write_html('figs/cluster_trendlines.html')
fig.show()


In [29]:
def make_2d_continuous_scatter(df, x, y, color, size, opacity, label, weight, stroke_color):
    fig = go.Figure(go.Scatter(
        x=df[x],
        y=df[y],
        text=df[label],
        hoverinfo='text',
        mode='markers', marker=dict(color=continuous_color(df[color], df[opacity]), size=df[size], line=dict(width=weight, color=stroke_color))))
    fig.add_annotation(
        x=0.2,
        y=0.8,
        xref='paper',
        yref='paper',
        text=f'<b>Color:</b> {color}<br><b>Size:</b> {size}<br><b>Created:</b> {opacity}',
        # align text left:
        align='left',
        showarrow=False,
    )
    fig.update_layout(title='KMeans Clustering of Creatures', template='plotly_dark', xaxis=dict(title=x), yaxis=dict(title=y), showlegend=False, width=800, height=800)
    fig.show()

In [None]:
make_2d_continuous_scatter(df_sample,
    x='x',
    y='y',
    color='created',
    size='size',
    opacity='created_alpha',
    label='label',
    weight=0.5,
    stroke_color='white'
    )