# Lead Scraper + Smart Scoring Tool

In [1]:
# 1. Install dependencies
!pip install pandas faker streamlit pyngrok plotly

Collecting faker
  Downloading faker-37.5.3-py3-none-any.whl.metadata (15 kB)
Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading faker-37.5.3-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading streamlit-1.47.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.12-py3-none-any.

In [2]:
%%writefile app.py
import streamlit as st
import pandas as pd
import math
from faker import Faker
import random
import plotly.express as px

# 2. Generate Mock Data
def generate_mock_data(n=50):
    fake = Faker()
    data = []
    industries = ['Software', 'Healthcare', 'Retail', 'Finance', 'Technology','']
    types = ['B2B', 'B2C', '']
    for _ in range(n):
        data.append({
            'company': fake.company(),
            'industry': random.choice(industries),
            'location': fake.city(),
            'employees': random.randint(10, 1000),
            'revenue': random.randint(10_000, 50_000_000),
            'founded_year': random.randint(2000, 2024),
            'business_type': random.choice(types),
            'email': fake.email(),
            'website': fake.url(),
            'phone': fake.phone_number(),
        })
    df = pd.DataFrame(data)
    df.to_csv('leads.csv', index=False)

generate_mock_data()

# 3. Define Lead Scoring
def compute_raw_score(row):
    # Normalize revenue (cap at 50M)
    rev_score = min(math.log1p(row['revenue']), math.log1p(50_000_000)) / math.log1p(50_000_000)

    # Normalize employees (cap at 1000)
    emp_score = min(math.log1p(row['employees']), math.log1p(1000)) / math.log1p(1000)

    # Industry fit
    ind_score = 1.0 if row['industry'] in ['Software', 'Technology'] else 0.0

    # Business type
    b2b_score = 1.0 if row['business_type'] == 'B2B' else 0.0

    # Founding recency (newer is better, 2000–2025 → normalized)
    recency = 2025 - row['founded_year']
    recency_score = max(0, min((25 - recency) / 25, 1.0))  # scaled to 0–1

    # Completeness: how many of email, website, phone are filled
    complete_score = sum([bool(row.get(f)) for f in ['email','website','phone']]) / 3

    # Weighted average (all weights = 1 for simplicity, can adjust if needed)
    final_score = (
        0.2 * rev_score +
        0.2 * emp_score +
        0.15 * ind_score +
        0.15 * b2b_score +
        0.15 * recency_score +
        0.15 * complete_score
    ) * 100  # Scale to 0–100

    return round(final_score, 2)

def assign_potential(score):
    if score >= 75:
        return '🔥 High'
    elif score >= 50:
        return '⚠️ Medium'
    else:
        return '🧊 Low'

def recommend_action(score):
    if score >= 75:
        return "Reach out immediately with personalized messaging. High-value target."
    elif score >= 50:
        return "Warm lead. Consider outreach via email or LinkedIn within the week."
    else:
        return "Low potential. Monitor occasionally or nurture via newsletter."

# 4. Load + Score Data
def load_and_score():
    df = pd.read_csv('leads.csv')
    df['score'] = df.apply(compute_raw_score, axis=1)
    min_score = df['score'].min()
    max_score = df['score'].max()
    # df['percentage'] = df['score'].apply(lambda s: normalize_score(s, min_score, max_score))
    df['potential'] = df['score'].apply(assign_potential)
    df['recommendation'] = df['score'].apply(recommend_action)
    return df

# 5. Chart Score
def breakdown_factors(row):
    # Normalized Revenue
    rev_score = min(math.log1p(row['revenue']), math.log1p(50_000_000)) / math.log1p(50_000_000)
    rev_score *= 0.2 * 100

    # Normalized Employees
    emp_score = min(math.log1p(row['employees']), math.log1p(1000)) / math.log1p(1000)
    emp_score *= 0.2 * 100

    # Industry Boost
    ind_score = (1.0 if row['industry'] in ['Software', 'Technology'] else 0.0) * 0.15 * 100

    # B2B Boost
    b2b_score = (1.0 if row['business_type'] == 'B2B' else 0.0) * 0.15 * 100

    # Recency Score
    recency = 2025 - row['founded_year']
    recency_score = max(0, min((25 - recency) / 25, 1.0)) * 0.15 * 100

    # Completeness
    complete_score = sum([bool(row.get(f)) for f in ['email','website','phone']]) / 3
    complete_score *= 0.15 * 100

    return {
        'Revenue': round(rev_score, 2),
        'Employees': round(emp_score, 2),
        'Industry Boost': round(ind_score, 2),
        'B2B Boost': round(b2b_score, 2),
        'Recency': round(recency_score, 2),
        'Completeness': round(complete_score, 2)
    }

# 6. Streamlit UI
import streamlit as st
st.set_page_config(page_title="Lead Scoring Dashboard", layout="wide")

st.title("🚀 Smart Lead Scoring Dashboard")

with st.sidebar:
    st.header("🔍 Filters")
    industry_filter = st.multiselect("Industry", ["Software", "Healthcare", "Retail", "Finance"])
    business_type_filter = st.multiselect("Business Type", ["B2B", "B2C"])
    min_score = st.slider("Minimum Score", 0.0, 100.0, 5.0, step=0.5)

if st.button("Generate Mock Data"):
    generate_mock_data()
    st.success("Mock data generated!")

df = load_and_score()

# Apply filters
if industry_filter:
    df = df[df['industry'].isin(industry_filter)]
if business_type_filter:
    df = df[df['business_type'].isin(business_type_filter)]
df = df[df['score'] >= min_score]

st.subheader(f"Top {len(df)} Leads")
st.dataframe(df.sort_values('score', ascending=False), use_container_width=True)

st.download_button("📥 Download CSV", df.to_csv(index=False), file_name="scored_leads.csv")

# Apply Score Breakdown Chart
st.markdown("### 📊 Company Score Breakdown Chart")
lead_name = st.selectbox("Select a company to view score breakdown", df['company'].unique())
lead_row = df[df['company'] == lead_name].iloc[0]
factors = breakdown_factors(lead_row)
factor_df = pd.DataFrame(dict(Feature=list(factors.keys()), Score=list(factors.values())))

fig = px.line_polar(factor_df, r='Score', theta='Feature', line_close=True, title="Lead Score Breakdown")
st.plotly_chart(fig)

Writing app.py


In [3]:
# 7. Run Streamlit UI using Ngrok
from pyngrok import ngrok

# Kill any existing tunnels
ngrok.kill()

# Set your ngrok authtoken
# Replace "YOUR_AUTHTOKEN" with your actual authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
ngrok.set_auth_token("30jrdgWsCkrjdhJ2Xyc15xVpEAK_6dxhfcD6p4N5P3xkqqgPq")


# Run Streamlit in background
get_ipython().system_raw('streamlit run app.py &')

# Get public URL
## Note: If error please re-run again this cell
url = ngrok.connect("8501") # Specify port as a string
print(f"🌐 Your Streamlit app is live at: {url}")

🌐 Your Streamlit app is live at: NgrokTunnel: "https://2d6f31413657.ngrok-free.app" -> "http://localhost:8501"
