# BlackBox:Ghost - Cluster Analysis Notebook

This notebook demonstrates a basic unsupervised learning approach to identifying anomalous behavior patterns in synthetic user log data.

In [None]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("user_activity_logs.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['hour'] = df['timestamp'].dt.hour

# Encode categorical features
df_encoded = df.copy()
df_encoded['status'] = df_encoded['status'].map({'success': 1, 'denied': 0})
df_encoded['action'] = df_encoded['action'].astype('category').cat.codes
df_encoded['user_id'] = df_encoded['user_id'].astype('category').cat.codes

# Feature selection
features = df_encoded[['user_id', 'action', 'status', 'duration_ms', 'hour']]

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(features)

# Apply KMeans clustering
kmeans = KMeans(n_clusters=4, random_state=42)
df['cluster'] = kmeans.fit_predict(X)

# Visualization
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='hour', y='duration_ms', hue='cluster', palette='tab10')
plt.title("User Activity Clusters by Hour and Duration")
plt.xlabel("Hour of Day")
plt.ylabel("Action Duration (ms)")
plt.grid(True)
plt.show()
