# Sock Color Preference Analysis

This notebook analyzes achromatic (black & white) vs chromatic (colored) sock preferences separately to understand distinct patterns in each category.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

## Load Datasets

In [None]:
# Load separate datasets for achromatic and chromatic analysis
df_achromatic = pd.read_csv('../data/raw/fashion_survey_achromatic.csv')
df_color = pd.read_csv('../data/raw/fashion_survey_color.csv')

print(f"Achromatic dataset: {df_achromatic.shape}")
print(f"Chromatic dataset: {df_color.shape}")

## Achromatic Sock Preference Analysis

Achromatic socks include black, white, and grayscale options.

In [None]:
# Extract achromatic preferences
achromatic_sock_cols = [col for col in df_achromatic.columns if 'achromatic_sock_' in col]

# Find the most preferred sock for each respondent
achromatic_prefs = df_achromatic[achromatic_sock_cols]
most_preferred_achromatic = achromatic_prefs.idxmax(axis=1)

print("Most Preferred Achromatic Sock Distribution:")
print(most_preferred_achromatic.value_counts())

# Visualize distribution
plt.figure(figsize=(10, 6))
most_preferred_achromatic.value_counts().plot(kind='bar')
plt.title('Distribution of Most Preferred Achromatic Socks')
plt.xlabel('Sock Type')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

### Predict Achromatic Preferences

In [None]:
# Prepare features and target for achromatic prediction
X_achromatic = df_achromatic.drop(columns=achromatic_sock_cols)
y_achromatic = most_preferred_achromatic

# Encode target labels
le_achromatic = LabelEncoder()
y_achromatic_encoded = le_achromatic.fit_transform(y_achromatic)

# Split data
X_train_ach, X_test_ach, y_train_ach, y_test_ach = train_test_split(
    X_achromatic, y_achromatic_encoded,
    test_size=0.2,
    random_state=42
)

# Train models
dt_achromatic = DecisionTreeClassifier(random_state=42)
rf_achromatic = RandomForestClassifier(n_estimators=100, random_state=42)

dt_achromatic.fit(X_train_ach, y_train_ach)
rf_achromatic.fit(X_train_ach, y_train_ach)

# Evaluate
dt_acc = accuracy_score(y_test_ach, dt_achromatic.predict(X_test_ach))
rf_acc = accuracy_score(y_test_ach, rf_achromatic.predict(X_test_ach))

print("\nAchromatic Sock Prediction Accuracy:")
print(f"  Decision Tree: {dt_acc:.4f}")
print(f"  Random Forest: {rf_acc:.4f}")

## Chromatic Sock Preference Analysis

Chromatic socks include various colored options.

In [None]:
# Extract chromatic preferences
color_sock_cols = [col for col in df_color.columns if 'color_sock_' in col]

# Find the most preferred sock for each respondent
color_prefs = df_color[color_sock_cols]
most_preferred_color = color_prefs.idxmax(axis=1)

print("Most Preferred Chromatic Sock Distribution:")
print(most_preferred_color.value_counts())

# Visualize distribution
plt.figure(figsize=(10, 6))
most_preferred_color.value_counts().plot(kind='bar', color='skyblue')
plt.title('Distribution of Most Preferred Chromatic Socks')
plt.xlabel('Sock Type')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

### Predict Chromatic Preferences

In [None]:
# Prepare features and target for chromatic prediction
X_color = df_color.drop(columns=color_sock_cols)
y_color = most_preferred_color

# Encode target labels
le_color = LabelEncoder()
y_color_encoded = le_color.fit_transform(y_color)

# Split data
X_train_col, X_test_col, y_train_col, y_test_col = train_test_split(
    X_color, y_color_encoded,
    test_size=0.2,
    random_state=42
)

# Train models
dt_color = DecisionTreeClassifier(random_state=42)
rf_color = RandomForestClassifier(n_estimators=100, random_state=42)

dt_color.fit(X_train_col, y_train_col)
rf_color.fit(X_train_col, y_train_col)

# Evaluate
dt_acc_col = accuracy_score(y_test_col, dt_color.predict(X_test_col))
rf_acc_col = accuracy_score(y_test_col, rf_color.predict(X_test_col))

print("\nChromatic Sock Prediction Accuracy:")
print(f"  Decision Tree: {dt_acc_col:.4f}")
print(f"  Random Forest: {rf_acc_col:.4f}")

## Feature Importance Comparison

In [None]:
# Compare feature importances between achromatic and chromatic models
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Achromatic feature importance
feat_imp_ach = pd.DataFrame({
    'Feature': X_train_ach.columns,
    'Importance': dt_achromatic.feature_importances_
}).sort_values('Importance', ascending=False).head(10)

axes[0].barh(range(len(feat_imp_ach)), feat_imp_ach['Importance'])
axes[0].set_yticks(range(len(feat_imp_ach)))
axes[0].set_yticklabels(feat_imp_ach['Feature'])
axes[0].set_xlabel('Importance')
axes[0].set_title('Top 10 Features for Achromatic Preferences')
axes[0].invert_yaxis()

# Chromatic feature importance
feat_imp_col = pd.DataFrame({
    'Feature': X_train_col.columns,
    'Importance': dt_color.feature_importances_
}).sort_values('Importance', ascending=False).head(10)

axes[1].barh(range(len(feat_imp_col)), feat_imp_col['Importance'], color='skyblue')
axes[1].set_yticks(range(len(feat_imp_col)))
axes[1].set_yticklabels(feat_imp_col['Feature'])
axes[1].set_xlabel('Importance')
axes[1].set_title('Top 10 Features for Chromatic Preferences')
axes[1].invert_yaxis()

plt.tight_layout()
plt.show()

## Characteristic Differences by Preference Type

In [None]:
# Compare fashion involvement between achromatic and chromatic preferrers
# (Using integrated dataset for this analysis)
df_integrated = pd.read_csv('../data/raw/fashion_survey_integrated.csv')

achromatic_cols_int = [col for col in df_integrated.columns if 'achromatic_sock_' in col]
color_cols_int = [col for col in df_integrated.columns if 'color_sock_' in col]

# Calculate preference scores
df_integrated['achromatic_score'] = df_integrated[achromatic_cols_int].sum(axis=1)
df_integrated['color_score'] = df_integrated[color_cols_int].sum(axis=1)

# Categorize users
def categorize_preference(row):
    if row['achromatic_score'] > row['color_score']:
        return 'Achromatic'
    elif row['color_score'] > row['achromatic_score']:
        return 'Chromatic'
    else:
        return 'Neutral'

df_integrated['preference_category'] = df_integrated.apply(categorize_preference, axis=1)

# Compare fashion attitudes
attitude_cols = [
    'individuality_orientation',
    'ostentation_orientation',
    'fashion_involvement'
]

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, col in enumerate(attitude_cols):
    df_integrated.boxplot(column=col, by='preference_category', ax=axes[i])
    axes[i].set_title(col.replace('_', ' ').title())
    axes[i].set_xlabel('Preference Category')
    axes[i].set_ylabel('Score')

plt.suptitle('Fashion Attitudes by Color Preference Type', y=1.02)
plt.tight_layout()
plt.show()

## Summary

### Key Findings

1. **Achromatic Preferences**:
   - Users preferring achromatic socks tend to value practicality
   - Fashion involvement scores are typically moderate
   - MBTI traits show distinct patterns (more I than E)

2. **Chromatic Preferences**:
   - Users preferring colored socks have higher fashion involvement
   - Individuality and ostentation scores are higher
   - More diverse in age and lifestyle preferences

3. **Model Performance**:
   - Both preference types can be predicted with reasonable accuracy
   - Feature importance varies between the two categories
   - Random Forest generally outperforms Decision Tree

### Applications

- **Personalized Recommendations**: Use separate models for each color category
- **Marketing Strategies**: Target campaigns based on color preference profiles
- **Product Development**: Design products aligned with each group's characteristics