### Assessment of personality of worm based on early behavior 

In [12]:
import personality as prs
import load_data as ld
import helpers as hp
import os
import pandas as pd

In [17]:
# Load data
repo_root = hp.find_repo_root()
optogenetics_path = os.path.join(repo_root, 'Data', 'Optogenetics')
optogenetics_data = ld.load_optogenetics(optogenetics_path)

Loading file: c:\Users\msgar\OneDrive\Documents\EPFL\Cours\MA1\Machine_Learning\ML_project2\ML-Project-2\Data\Optogenetics\ATR-\speeds_and_coordinates_02piworm16_2_updated.csv
File loaded: c:\Users\msgar\OneDrive\Documents\EPFL\Cours\MA1\Machine_Learning\ML_project2\ML-Project-2\Data\Optogenetics\ATR-\speeds_and_coordinates_02piworm16_2_updated.csv
   Frame  Speed   X   Y  Changed Pixels  Light_Pulse
0      1    0.0 NaN NaN             156            0
1      2    0.0 NaN NaN               0            0
2      3    0.0 NaN NaN               0            0
3      4    0.0 NaN NaN               0            0
4      5    0.0 NaN NaN               0            0
Loading file: c:\Users\msgar\OneDrive\Documents\EPFL\Cours\MA1\Machine_Learning\ML_project2\ML-Project-2\Data\Optogenetics\ATR-\speeds_and_coordinates_02piworm16_4_updated.csv
File loaded: c:\Users\msgar\OneDrive\Documents\EPFL\Cours\MA1\Machine_Learning\ML_project2\ML-Project-2\Data\Optogenetics\ATR-\speeds_and_coordinates_02piw

In [14]:
# Preprocessing


In [None]:
# Assume `optogenetics_data` is a dictionary of preprocessed worm data
# Convert the dictionary to a list of DataFrames
worms = [pd.DataFrame(data) for data in optogenetics_data.values()]

# Generate the feature matrix
feature_matrix = prs.create_feature_matrix(worms, percentile=50)

# Display the feature matrix
print("Feature Matrix:")
print(feature_matrix.head())


In [None]:
# Perform hierarchical clustering and save the dendrogram

dendrogram_path = os.path.join(repo_root, 'Data', 'Plots', 'personality')
os.makedirs(dendrogram_path, exist_ok=True)

prs.perform_hierarchical_clustering(
    feature_matrix.drop(columns=['worm_name']),  # Use numeric features only
    method='ward',
    output_file=os.path.join(dendrogram_path, 'dendrogram.png')  # Save in the specified folder
)

print(f"Dendrogram saved at: {os.path.join(dendrogram_path, 'dendrogram.png')}")

In [None]:
# Define the number of clusters based on the dendrogram
num_clusters = 4  # Example: Adjust this based on the dendrogram

# Perform K-Means clustering
cluster_labels = prs.perform_kmeans_clustering(
    feature_matrix.drop(columns=['worm_name']),  # Use numeric features only
    num_clusters=num_clusters
)

# Add the cluster labels to the feature matrix
feature_matrix['Cluster'] = cluster_labels

# Display the feature matrix with cluster labels
print("Feature Matrix with Clusters:")
print(feature_matrix.head())

In [None]:
# Map clusters to personality types (example mapping)
personality_map = {
    0: "Active",
    1: "Exploratory",
    2: "Fast",
    3: "Inactive"
}

# Add personality labels
feature_matrix['Personality'] = feature_matrix['Cluster'].map(personality_map)

# Display the labeled feature matrix
print("Feature Matrix with Personality Labels:")
print(feature_matrix[['worm_name', 'Cluster', 'Personality']].head())

#---------------------------
from sklearn.cluster import KMeans

# Define the number of clusters (choose based on the dendrogram or domain knowledge)
num_clusters = 4  # Example: Adjust based on your dendrogram analysis

# Perform K-Means clustering
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
feature_matrix['Cluster'] = kmeans.fit_predict(X)

# Display the resulting clusters
print("Clusters Assigned to Worms:")
print(feature_matrix[['worm_name', 'Cluster']])

# Save the feature matrix with clusters to a CSV file
clustered_output_path = os.path.join(repo_root, 'Data', 'Plots', 'personality', 'clustered_feature_matrix.csv')
feature_matrix.to_csv(clustered_output_path, index=False)

print(f"Clustered feature matrix saved at: {clustered_output_path}")


In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

# Reduce dimensions to 2D using PCA
pca = PCA(n_components=2)
reduced_features = pca.fit_transform(X)

# Add PCA results to the feature matrix for visualization
feature_matrix['PCA1'] = reduced_features[:, 0]
feature_matrix['PCA2'] = reduced_features[:, 1]

# Plot clusters in 2D
plt.figure(figsize=(10, 6))
sns.scatterplot(
    x='PCA1', y='PCA2', hue='Cluster', data=feature_matrix,
    palette='viridis', s=100, alpha=0.7
)
plt.title("K-Means Clustering Visualization (PCA Reduced)")
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend(title="Cluster")
plt.savefig(os.path.join(repo_root, 'Data', 'Plots', 'personality', 'kmeans_clusters_pca.png'))
plt.show()
