In [26]:
# Install necessary libraries
%pip install streamlit email-validator pandas



In [32]:
!pip install pyngrok



In [27]:
# preprocess_leads.py

import pandas as pd
from email_validator import validate_email, EmailNotValidError

# Load raw leads
df = pd.read_csv("raw_leads_100.csv")

# Email validation
def validate_emails(email):
    try:
        validate_email(email)
        return True
    except EmailNotValidError:
        return False

df["Email Valid"] = df["Email"].apply(validate_emails)

# Title scoring
def score_title(title):
    title = title.lower()
    if any(k in title for k in ["founder", "ceo", "cto", "head", "lead"]):
        return 2
    elif any(k in title for k in ["manager", "director"]):
        return 1
    else:
        return -1

# Domain scoring
def score_domain(domain):
    free_domains = ["gmail.com", "yahoo.com", "hotmail.com"]
    return -1 if domain in free_domains else 1

# LinkedIn scoring
def valid_linkedin(url):
    return "linkedin.com/in/" in url

df["Title Score"] = df["Job Title"].apply(score_title)
df["Domain Score"] = df["Domain"].apply(score_domain)
df["LinkedIn Valid"] = df["LinkedIn"].apply(valid_linkedin)
df["LinkedIn Score"] = df["LinkedIn Valid"].apply(lambda x: 1 if x else -1)
df["Email Score"] = df["Email Valid"].apply(lambda x: 1 if x else -1)

df["Lead Score"] = df["Title Score"] + df["Domain Score"] + df["LinkedIn Score"] + df["Email Score"]

# Save processed leads
df.to_csv("processed_leads.csv", index=False)

In [28]:
%%writefile streamlit_app.py
import streamlit as st
import pandas as pd

# Load the actual data file
try:
    df = pd.read_csv("/content/processed_leads.csv")
    df.columns = df.columns.str.strip()

    st.title("Lead Scoring Dashboard")

    # Slider for filtering
    min_score = st.slider("Minimum Lead Score", min_value=int(df['Lead Score'].min()), max_value=int(df['Lead Score'].max()), value=0)
    filtered_df = df[df['Lead Score'] >= min_score]

    st.write("Filtered Leads:")
    st.dataframe(filtered_df)

    st.download_button("Download Filtered Leads as CSV", data=filtered_df.to_csv(index=False), file_name="filtered_leads.csv", mime="text/csv")

except FileNotFoundError:
    st.error("Error: processed_leads.csv not found.")
except KeyError:
    st.error("Error: 'Lead Score' column not found.")
except Exception as e:
    st.error(f"Unexpected error: {e}")


Overwriting streamlit_app.py


In [35]:
%%writefile streamlit_app.py
import streamlit as st
import pandas as pd
import altair as alt

st.set_page_config(page_title="Lead Dashboard", layout="wide")

# Load processed data - make sure this file exists and is accessible
try:
    df = pd.read_csv("/content/processed_leads.csv")
    df.columns = df.columns.str.strip()

    st.title("🚀 Lead Scoring Dashboard")

    # 📊 Quick Stats
    col1, col2, col3 = st.columns(3)
    col1.metric("Total Leads", len(df))
    col2.metric("Avg Score", round(df["Lead Score"].mean(), 2))
    col3.metric("Top Leads", len(df[df["Lead Score"] >= 5]))

    st.markdown("---")

    # 🎯 Minimum Score Filter
    st.markdown("### 🎯 Filter by Minimum Lead Score")
    min_score = st.slider("Minimum Lead Score",
                          min_value=int(df['Lead Score'].min()),
                          max_value=int(df['Lead Score'].max()),
                          value=0)

    filtered_df = df[df['Lead Score'] >= min_score]

    # 📈 Chart
    st.markdown("### 📈 Lead Score Distribution")
    chart = alt.Chart(filtered_df).mark_bar().encode(
        x=alt.X("Lead Score:Q", bin=alt.Bin(maxbins=20)),
        y='count()',
        tooltip=['Lead Score']
    ).properties(
        width=700,
        height=300
    )

    st.altair_chart(chart, use_container_width=True)

    # 📋 Table
    st.markdown("### 📋 Filtered Leads")
    st.dataframe(filtered_df.style.background_gradient(cmap="YlGnBu", subset=["Lead Score"]))

    # 📥 Download
    st.download_button("📥 Download Filtered Leads as CSV", data=filtered_df.to_csv(index=False), file_name="filtered_leads.csv", mime="text/csv")

except FileNotFoundError:
    st.error("Error: processed_leads.csv not found.")
except KeyError:
    st.error("Error: 'Lead Score' column missing.")
except Exception as e:
    st.error(f"Unexpected error: {e}")


Overwriting streamlit_app.py


In [None]:
from pyngrok import ngrok
import subprocess
import os
import time # Import the time module

# Terminate open tunnels if any
print("Terminating existing ngrok tunnels")
ngrok.kill()

ngrok.set_auth_token("2zEuue7oUVwVdxGUm8i4WUPukqg_5Ze5g2hceFbZPyR3CNPoC")

# Open a HTTP tunnel on port 8501 (Streamlit's default port)
print("Opening ngrok tunnel")
public_url = ngrok.connect(addr="8501", proto="http")
print(f"Streamlit app will be available at: {public_url}")

# Run the Streamlit app in the background
print("Running Streamlit app")
# Make sure the script exists in the current directory or provide the correct path
streamlit_process = subprocess.Popen(["streamlit", "run", "streamlit_app.py"])

# Keep the cell alive while the Streamlit app is running
# You will need to manually stop this cell when you are done
try:
    streamlit_process.wait()
except KeyboardInterrupt:
    streamlit_process.terminate()
    print("Streamlit process terminated.")

Terminating existing ngrok tunnels
Opening ngrok tunnel
Streamlit app will be available at: NgrokTunnel: "https://1ab0-34-31-91-148.ngrok-free.app" -> "http://localhost:8501"
Running Streamlit app
