# Component Score Analysis\n\nThis notebook analyzes the distribution and effectiveness of component priority scores.

In [None]:
import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom pathlib import Path\n\n# Setup\nsns.set_style('whitegrid')\nplt.rcParams['figure.figsize'] = (10, 6)

In [None]:
# Load scored data\nscores_df = pd.read_csv('../data/output/component_scores_latest.csv')\nprint(f"Loaded {len(scores_df)} components")\nscores_df.head()

## Score Distribution Analysis

In [None]:
# Score distribution\nfig, axes = plt.subplots(1, 2, figsize=(15, 5))\n\n# Histogram\naxes[0].hist(scores_df['priority_score'], bins=50, edgecolor='black')\naxes[0].set_xlabel('Priority Score')\naxes[0].set_ylabel('Count')\naxes[0].set_title('Distribution of Priority Scores')\n\n# Box plot by source type\nscores_df.boxplot(column='priority_score', by='source_type', ax=axes[1])\naxes[1].set_title('Priority Score by Source Type')\n\nplt.tight_layout()\nplt.show()\n\n# Summary statistics\nprint("Score Statistics:")\nprint(scores_df['priority_score'].describe())

## Feature Correlation Analysis

In [None]:
# Correlation with priority score\nfeature_cols = ['inventory', 'leadtime_weeks', 'first_price', 'moq', \n                'demand_all_time', 'availability_score', 'is_authorized']\n\ncorrelations = scores_df[feature_cols + ['priority_score']].corr()['priority_score'].drop('priority_score')\n\n# Plot correlations\nplt.figure(figsize=(8, 6))\ncorrelations.sort_values().plot(kind='barh')\nplt.xlabel('Correlation with Priority Score')\nplt.title('Feature Correlations')\nplt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)\nplt.tight_layout()\nplt.show()

## Top Scored Components

In [None]:
# Top 10 components\ntop_10 = scores_df.nlargest(10, 'priority_score')[\n    ['pn', 'desc', 'priority_score', 'inventory', 'leadtime_weeks', 'first_price', 'demand_all_time']\n]\n\nprint("Top 10 Scored Components:")\ndisplay(top_10)

## Score Validation

In [None]:
# Check score criteria\nprint("Score Validation:")\nprint(f"Components with score = 0: {(scores_df['priority_score'] == 0).sum()}")\nprint(f"Components with score > 90: {(scores_df['priority_score'] > 90).sum()}")\nprint(f"Score range: {scores_df['priority_score'].min():.2f} - {scores_df['priority_score'].max():.2f}")\n\n# Verify unavailable items have score 0\nunavailable = scores_df[(scores_df['inventory'] == 0) & (scores_df['leadtime_weeks'] > 12)]\nprint(f"\nUnavailable items with score > 0: {(unavailable['priority_score'] > 0).sum()} (should be 0)")

## Business Impact Analysis

In [None]:
# Decile analysis\nscores_df['score_decile'] = pd.qcut(scores_df['priority_score'], 10, labels=False, duplicates='drop')\n\ndecile_stats = scores_df.groupby('score_decile').agg({\n    'priority_score': ['min', 'max', 'mean'],\n    'inventory': 'mean',\n    'leadtime_weeks': 'mean',\n    'first_price': 'mean',\n    'demand_all_time': 'mean'\n}).round(2)\n\nprint("Statistics by Score Decile:")\ndisplay(decile_stats)