# Task 6: FinTech Vendor Scorecard for Micro-Lending

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import re
from datetime import datetime

SRC_PATH = os.path.abspath(os.path.join(os.getcwd(), '../src'))
if SRC_PATH not in sys.path:
    sys.path.insert(0, SRC_PATH)
    
from model_training.ner_trainer import FinalNERPredictor

# Load NER model
predictor = FinalNERPredictor('../models/distilbert_ner')

# Load CSV data from 5 channels
csv_files = {
    'AwasMart': '../data/processed/AwasMart_20250621_051236.csv',
    'ethio_market_place': '../data/processed/ethio_market_place_20250621_050022.csv',
    'ethiomarketo': '../data/processed/ethiomarketo_20250621_050735.csv',
    'helloomarketethiopia': '../data/processed/helloomarketethiopia_20250621_050538.csv',
    'jijietcom': '../data/processed/jijietcom_20250621_050636.csv'
}

vendor_data = {}
for vendor, file_path in csv_files.items():
    vendor_data[vendor] = pd.read_csv(file_path)
    print(f"{vendor}: {len(vendor_data[vendor])} posts")

  from .autonotebook import tqdm as notebook_tqdm


AwasMart: 100 posts
ethio_market_place: 100 posts
ethiomarketo: 99 posts
helloomarketethiopia: 100 posts
jijietcom: 100 posts


In [2]:
def extract_prices(text):
    """Extract prices using NER + regex"""
    prices = []
    entities = predictor.predict_simple(text)
    for entity in entities:
        if entity['label'] == 'PRICE':
            nums = re.findall(r'\d+', entity['text'])
            if nums:
                prices.append(int(nums[0]))
    
    # Fallback regex
    if not prices:
        for pattern in [r'(\d+)\s*ብር', r'(\d+)\s*ETB', r'(\d+)\s*birr']:
            prices.extend([int(m) for m in re.findall(pattern, text, re.IGNORECASE)])
    
    return prices

def analyze_vendor(df, vendor_name):
    """Analyze vendor metrics"""
    total_posts = len(df)
    avg_views = df['views'].mean() if 'views' in df.columns else 0
    
    # Calculate posts per week
    if 'date' in df.columns:
        dates = pd.to_datetime(df['date'], errors='coerce').dropna()
        if len(dates) > 1:
            days = (dates.max() - dates.min()).days
            posts_per_week = (total_posts * 7) / max(days, 1)
        else:
            posts_per_week = 0
    else:
        posts_per_week = 0
    
    # Extract prices
    all_prices = []
    for text in df['text'].fillna(''):
        all_prices.extend(extract_prices(str(text)))
    
    avg_price = np.mean(all_prices) if all_prices else 0
    
    # Top post
    if 'views' in df.columns:
        top_idx = df['views'].idxmax()
        top_views = df.loc[top_idx, 'views']
        top_text = str(df.loc[top_idx, 'text'])[:100] + '...'
    else:
        top_views = 0
        top_text = ''
    
    return {
        'vendor': vendor_name,
        'total_posts': total_posts,
        'avg_views': avg_views,
        'posts_per_week': posts_per_week,
        'avg_price': avg_price,
        'top_views': top_views,
        'top_text': top_text,
        'price_count': len(all_prices)
    }

# Analyze all vendors
vendor_metrics = {}
for vendor, df in vendor_data.items():
    vendor_metrics[vendor] = analyze_vendor(df, vendor)
    print(f"{vendor}: {vendor_metrics[vendor]['avg_views']:.1f} avg views, {vendor_metrics[vendor]['posts_per_week']:.1f} posts/week")

AwasMart: 2740.7 avg views, 53.8 posts/week
ethio_market_place: 34.3 avg views, 19.4 posts/week
ethiomarketo: 150749.0 avg views, 99.0 posts/week
helloomarketethiopia: 3040.7 avg views, 7.8 posts/week
jijietcom: 223.8 avg views, 700.0 posts/week


In [3]:
def calculate_lending_score(metrics):
    """Calculate lending score"""
    view_score = min(metrics['avg_views'] / 100, 100)
    frequency_score = min(metrics['posts_per_week'] * 10, 100)
    price_score = min(metrics['avg_price'] / 100, 100) if metrics['avg_price'] > 0 else 0
    
    return round(view_score * 0.5 + frequency_score * 0.3 + price_score * 0.2, 2)

# Create scorecard
scorecard = []
for vendor, metrics in vendor_metrics.items():
    lending_score = calculate_lending_score(metrics)
    scorecard.append({
        'Vendor': vendor,
        'Avg Views/Post': round(metrics['avg_views'], 1),
        'Posts/Week': round(metrics['posts_per_week'], 1),
        'Avg Price (ETB)': round(metrics['avg_price'], 0) if metrics['avg_price'] > 0 else 'N/A',
        'Total Posts': metrics['total_posts'],
        'Lending Score': lending_score
    })

scorecard_df = pd.DataFrame(scorecard).sort_values('Lending Score', ascending=False)

print("=== VENDOR SCORECARD ===")
print(scorecard_df.to_string(index=False))

=== VENDOR SCORECARD ===
              Vendor  Avg Views/Post  Posts/Week  Avg Price (ETB)  Total Posts  Lending Score
        ethiomarketo        150749.0        99.0     2.266486e+11           99         100.00
            AwasMart          2740.7        53.8     2.119144e+11          100          63.70
helloomarketethiopia          3040.7         7.8     2.260847e+11          100          58.54
           jijietcom           223.8       700.0     4.354030e+09          100          51.12
  ethio_market_place            34.3        19.4     1.845044e+08          100          50.17


In [4]:
# Lending recommendations
recommendations = []
for _, row in scorecard_df.iterrows():
    score = row['Lending Score']
    if score >= 70:
        risk, loan = "LOW RISK", "50,000-100,000 ETB"
    elif score >= 50:
        risk, loan = "MEDIUM RISK", "20,000-50,000 ETB"
    elif score >= 30:
        risk, loan = "HIGH RISK", "5,000-20,000 ETB"
    else:
        risk, loan = "REJECT", "Not Recommended"
    
    recommendations.append({
        'Vendor': row['Vendor'],
        'Risk Level': risk,
        'Recommended Loan': loan,
        'Score': score
    })

rec_df = pd.DataFrame(recommendations)

print("\n=== LENDING RECOMMENDATIONS ===")
print(rec_df.to_string(index=False))

print("\n=== SUMMARY ===")
print(f"Total Vendors: {len(scorecard_df)}")
print(f"Low Risk: {len(rec_df[rec_df['Risk Level'] == 'LOW RISK'])}")
print(f"Medium Risk: {len(rec_df[rec_df['Risk Level'] == 'MEDIUM RISK'])}")
print(f"High Risk: {len(rec_df[rec_df['Risk Level'] == 'HIGH RISK'])}")
print(f"Rejected: {len(rec_df[rec_df['Risk Level'] == 'REJECT'])}")
print(f"Average Score: {scorecard_df['Lending Score'].mean():.2f}")


=== LENDING RECOMMENDATIONS ===
              Vendor  Risk Level   Recommended Loan  Score
        ethiomarketo    LOW RISK 50,000-100,000 ETB 100.00
            AwasMart MEDIUM RISK  20,000-50,000 ETB  63.70
helloomarketethiopia MEDIUM RISK  20,000-50,000 ETB  58.54
           jijietcom MEDIUM RISK  20,000-50,000 ETB  51.12
  ethio_market_place MEDIUM RISK  20,000-50,000 ETB  50.17

=== SUMMARY ===
Total Vendors: 5
Low Risk: 1
Medium Risk: 4
High Risk: 0
Rejected: 0
Average Score: 64.71


In [5]:
# Top performers analysis
print("=== TOP PERFORMERS ===")
for _, vendor in scorecard_df.head(3).iterrows():
    vendor_name = vendor['Vendor']
    metrics = vendor_metrics[vendor_name]
    print(f"\n{vendor_name}:")
    print(f"  Score: {vendor['Lending Score']}")
    print(f"  Views: {vendor['Avg Views/Post']} avg/post")
    print(f"  Activity: {vendor['Posts/Week']} posts/week")
    print(f"  Top Post: {metrics['top_views']} views")
    print(f"  Sample: {metrics['top_text']}")

# Export results
os.makedirs('../results', exist_ok=True)
scorecard_df.to_csv('../results/vendor_scorecard.csv', index=False)
rec_df.to_csv('../results/lending_recommendations.csv', index=False)
print("\nResults exported to ../results/")

=== TOP PERFORMERS ===

ethiomarketo:
  Score: 100.0
  Views: 150749.0 avg/post
  Activity: 99.0 posts/week
  Top Post: 368087 views
  Sample: #New_Max_Man_Enlargement
🌔🌹🌺የስንፈተ ወሲብ መፍትሄን💯 በሁሉም አማራጭ አቀረብንሎት🙏🙏
☎️☎️ 📞 +251977013966
❤️በወሲብ (sex...

AwasMart:
  Score: 63.7
  Views: 2740.7 avg/post
  Activity: 53.8 posts/week
  Top Post: 3763 views
  Sample: nan...

helloomarketethiopia:
  Score: 58.54
  Views: 3040.7 avg/post
  Activity: 7.8 posts/week
  Top Post: 4809 views
  Sample: nan...

Results exported to ../results/
