# 🤝 Supplier / Vendor Analysis — Sourcing, Performance & Risk
This notebook performs EDA, contract expiry checks, supplier scoring and segmentation, and exports actionable lists.

In [None]:
# STEP 1: Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
sns.set(style='whitegrid')
pd.set_option('display.max_columns', None)


In [None]:
# STEP 2: Load data
df = pd.read_csv('supplier_data.csv', parse_dates=['contract_start','contract_end'])
print('Shape:', df.shape)
df.head()


In [None]:
# STEP 3: Data quality
print('Missing values:\n', df.isnull().sum())
print('Duplicate rows:', df.duplicated().sum())
df.describe(include='all').T


In [None]:
# STEP 4: Performance distributions
plt.figure(figsize=(8,4))
sns.histplot(df['on_time_delivery_rate'].dropna(), bins=20, kde=True)
plt.title('On-time Delivery Rate Distribution')
plt.xlabel('On-time Delivery Rate')
plt.show()

plt.figure(figsize=(8,4))
sns.histplot(df['quality_score'].dropna(), bins=20, kde=True)
plt.title('Quality Score Distribution')
plt.xlabel('Quality Score')
plt.show()


In [None]:
# STEP 5: Regional & country counts
plt.figure(figsize=(8,4))
df['region'].value_counts().plot(kind='bar')
plt.title('Suppliers by Region')
plt.show()

plt.figure(figsize=(8,4))
df['country'].value_counts().head(10).plot(kind='bar', color='teal')
plt.title('Top 10 Supplier Countries')
plt.show()


In [None]:
# STEP 6: Contract expiry & risk
today = pd.Timestamp('today').normalize()
df['days_to_contract_end'] = (df['contract_end'] - today).dt.days
expiring_90 = df[df['days_to_contract_end'] <= 90].sort_values('days_to_contract_end')
print('Suppliers with contracts expiring in next 90 days:', expiring_90.shape[0])
expiring_90[['supplier_id','supplier_name','contract_end','days_to_contract_end']].head(20)


In [None]:
# STEP 7: Supplier scoring (simple weighted score)
df_sc = df.copy()
df_sc['on_time_delivery_rate'] = df_sc['on_time_delivery_rate'].fillna(0)
df_sc['quality_score'] = df_sc['quality_score'].fillna(0)
df_sc['order_norm'] = (df_sc['total_orders_supplied'] - df_sc['total_orders_supplied'].min()) / (df_sc['total_orders_supplied'].max() - df_sc['total_orders_supplied'].min())
df_sc['score'] = (0.5 * df_sc['on_time_delivery_rate']) + (0.4 * df_sc['quality_score']) + (0.1 * df_sc['order_norm'])
df_sc['score'] = df_sc['score'].round(3)
df_sc.sort_values('score', ascending=False).head(20)


In [None]:
# STEP 8: Segment suppliers by score
bins = [0, 0.5, 0.7, 0.85, 1.0]
labels = ['Poor','Average','Good','Excellent']
df_sc['segment'] = pd.cut(df_sc['score'], bins=bins, labels=labels, include_lowest=True)
df_sc['segment'].value_counts().plot(kind='bar', color='coral')
plt.title('Supplier Segments by Score')
plt.show()


In [None]:
# STEP 9: Correlation between metrics
corr = df_sc[['on_time_delivery_rate','quality_score','total_orders_supplied','score']].corr()
plt.figure(figsize=(6,4))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()


In [None]:
# STEP 10: Export action lists
df_sc.to_csv('supplier_scored.csv', index=False)
expiring_90.to_csv('suppliers_expiring_90_days.csv', index=False)
print('Exported supplier_scored.csv and suppliers_expiring_90_days.csv')


## Next steps / Recommendations
- Start contract renewal outreach for suppliers in `suppliers_expiring_90_days.csv`
- Prioritize `Excellent` segment suppliers for strategic partnerships
- Investigate `Poor` segment suppliers for quality or delivery issues
