In [None]:
import pandas as pd
from datetime import datetime, timedelta

def calculate_vendor_metrics(df):
    # Calculate metrics per vendor
    metrics = df.groupby("channel").agg({
        "message_id": "count",
        "views": "mean",
        "date": lambda x: (x.max() - x.min()).days
    }).rename(columns={
        "message_id": "total_posts",
        "views": "avg_views",
        "date": "days_active"
    })
    
    # Posting frequency (posts per week)
    metrics["posts_per_week"] = metrics["total_posts"] / (metrics["days_active"] / 7)
    
    # Average price (from NER)
    # This would come from your NER model's price extraction
    # For demo, we'll assume we have this data
    metrics["avg_price"] = df.groupby("channel")["extracted_price"].mean()
    
    # Lending score
    metrics["lending_score"] = (
        0.5 * metrics["avg_views"] / metrics["avg_views"].max() +
        0.3 * metrics["posts_per_week"] / metrics["posts_per_week"].max() +
        0.2 * (1 - metrics["avg_price"] / metrics["avg_price"].max()))    
    return metrics.sort_values("lending_score", ascending=False)

# Load processed data with NER results
df = pd.read_csv("data/processed/with_ner_results.csv")
df["date"] = pd.to_datetime(df["date"])

# Calculate metrics
vendor_metrics = calculate_vendor_metrics(df)
print(vendor_metrics[["avg_views", "posts_per_week", "avg_price", "lending_score"]])