In [1]:
!pip install streamlit pandas numpy matplotlib seaborn plotly pyngrok

Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m66.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.wh

In [2]:
from google.colab import files
uploaded = files.upload()  # This opens a file picker to upload customers-100.csv

Saving customers-100.csv to customers-100.csv


In [3]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(/content/customers-100.csv):
    df = pd.read_csv(file_path)
    # Remove duplicates
    df = df.drop_duplicates()
    # Drop rows with missing critical values
    df = df.dropna(subset=['First Name', 'Email'])
    # Drop unnecessary columns
    df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
    return df

# File path
file_path = "customers-100.csv"
try:
    df = load_and_clean_data(file_path)
except FileNotFoundError:
    st.error("customers-100.csv not found! Please upload the file.")
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")

    # Summary stats
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()

    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")

    # Editable table
    edited_df = st.data_editor(df, num_rows="dynamic")

    # Export button
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")

    # Bar chart: Top 5 countries
    top_5_countries = df['Country'].value_counts().head(5)
    fig1 = px.bar(
        x=top_5_countries.index,
        y=top_5_countries.values,
        labels={'x': 'Country', 'y': 'Customer Count'},
        title="Top 5 Countries by Customer Count"
    )
    st.plotly_chart(fig1)

    # Bar chart: Top 5 cities
    top_5_cities = df['City'].value_counts().head(5)
    fig2 = px.bar(
        x=top_5_cities.index,
        y=top_5_cities.values,
        labels={'x': 'City', 'y': 'Customer Count'},
        title="Top 5 Cities by Customer Count"
    )
    st.plotly_chart(fig2)

    # Pie chart: Country distribution (Top 5 + Others)
    country_counts = df['Country'].value_counts()
    top_n = 5
    top_countries = country_counts.head(top_n)
    others_count = country_counts.iloc[top_n:].sum()
    pie_data = pd.concat([
        top_countries,
        pd.Series([others_count], index=['Others'])
    ])
    fig3 = px.pie(
        names=pie_data.index,
        values=pie_data.values,
        title=f"Customer Distribution by Country (Top {top_n} + Others)"
    )
    st.plotly_chart(fig3)

    # Pie chart: Email domains
    df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
    email_domain_counts = df['Email Domain'].value_counts().head(5)
    fig4 = px.pie(
        names=email_domain_counts.index,
        values=email_domain_counts.values,
        title="Customer Distribution by Email Domain"
    )
    st.plotly_chart(fig4)

    # Horizontal bar: Top 10 companies
    top_10_companies = df['Company'].value_counts().head(10)
    fig5 = px.bar(
        y=top_10_companies.index,
        x=top_10_companies.values,
        orientation='h',
        labels={'x': 'Customer Count', 'y': 'Company'},
        title="Top 10 Companies by Customer Count"
    )
    st.plotly_chart(fig5)


Writing app.py


In [12]:
from pyngrok import ngrok
import subprocess

# Set your ngrok authtoken
!2zarR0Ln43mhGvAsMJ0EhKy9P1c_6jj4ELrgSiLKCKtrfB6CK  # Replace with your actual authtoken

# Start Streamlit server
process = subprocess.Popen(['streamlit', 'run', 'app.py', '--server.port', '8501'])

# Create a public URL with ngrok
public_url = ngrok.connect(8501)
print(f"Streamlit app is running at: {public_url}")

/bin/bash: line 1: 2zarR0Ln43mhGvAsMJ0EhKy9P1c_6jj4ELrgSiLKCKtrfB6CK: command not found


ERROR:pyngrok.process.ngrok:t=2025-07-08T13:08:35+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-07-08T13:08:35+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"
ERROR:pyngrok.process.ngrok:t=2025-07-08T13:08:35+0000 lvl=eror msg="terminating with error" obj=app err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your aut

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.

In [13]:
# Install required libraries
!pip install streamlit pandas numpy matplotlib seaborn plotly pyngrok

# Set ngrok authtoken
!ngrok authtoken 2zarR0Ln43mhGvAsMJ0EhKy9P1c_6jj4ELrgSiLKCKtrfB6CK

# Write the Streamlit app code (app.py)
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    df = pd.read_csv(file_path)
    df = df.drop_duplicates()
    df = df.dropna(subset=['First Name', 'Email'])
    df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
    return df

# File path
file_path = "/content/customers-100.csv"
try:
    df = load_and_clean_data(file_path)
except FileNotFoundError:
    st.error("customers-100.csv not found! Please upload the file.")
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")
    edited_df = st.data_editor(df, num_rows="dynamic")
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")
    top_5_countries = df['Country'].value_counts().head(5)
    fig1 = px.bar(x=top_5_countries.index, y=top_5_countries.values,
                  labels={'x': 'Country', 'y': 'Customer Count'}, title="Top 5 Countries by Customer Count")
    st.plotly_chart(fig1)
    top_5_cities = df['City'].value_counts().head(5)
    fig2 = px.bar(x=top_5_cities.index, y=top_5_cities.values,
                  labels={'x': 'City', 'y': 'Customer Count'}, title="Top 5 Cities by Customer Count")
    st.plotly_chart(fig2)
    country_counts = df['Country'].value_counts()
    top_n = 5
    top_countries = country_counts.head(top_n)
    others_count = country_counts.iloc[top_n:].sum()
    pie_data = pd.concat([top_countries, pd.Series([others_count], index=['Others'])])
    fig3 = px.pie(names=pie_data.index, values=pie_data.values,
                  title=f"Customer Distribution by Country (Top {top_n} + Others)")
    st.plotly_chart(fig3)
    df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
    email_domain_counts = df['Email Domain'].value_counts().head(5)
    fig4 = px.pie(names=email_domain_counts.index, values=email_domain_counts.values,
                  title="Customer Distribution by Email Domain")
    st.plotly_chart(fig4)
    top_10_companies = df['Company'].value_counts().head(10)
    fig5 = px.bar(y=top_10_companies.index, x=top_10_companies.values, orientation='h',
                  labels={'x': 'Customer Count', 'y': 'Company'}, title="Top 10 Companies by Customer Count")
    st.plotly_chart(fig5)

# Start Streamlit server and ngrok
from pyngrok import ngrok
import subprocess

# Start Streamlit server
process = subprocess.Popen(['streamlit', 'run', 'app.py', '--server.port', '8501'])

# Create a public URL with ngrok
public_url = ngrok.connect(8501)
print(f"Streamlit app is running at: {public_url}")

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


UsageError: Line magic function `%%writefile` not found.


In [19]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    df = pd.read_csv(file_path)
    df = df.drop_duplicates()
    df = df.dropna(subset=['First Name', 'Email'])
    df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
    return df

# File path
file_path = "customers-100.csv"
try:
    df = load_and_clean_data(file_path)
except FileNotFoundError:
    st.error("customers-100.csv not found! Please upload the file.")
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")
    edited_df = st.data_editor(df, num_rows="dynamic")
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")

    # Bar chart: Top 5 countries
    top_5_countries = df['Country'].value_counts().head(5)
    fig1 = px.bar(x=top_5_countries.index, y=top_5_countries.values,
                  labels={'x': 'Country', 'y': 'Customer Count'}, title="Top 5 Countries by Customer Count")
    st.plotly_chart(fig1)

    # Bar chart: Top 5 cities
    top_5_cities = df['City'].value_counts().head(5)
    fig2 = px.bar(x=top_5_cities.index, y=top_5_cities.values,
                  labels={'x': 'City', 'y': 'Customer Count'}, title="Top 5 Cities by Customer Count")
    st.plotly_chart(fig2)

    # Pie chart: Country distribution (Top 5 + Others)
    country_counts = df['Country'].value_counts()
    top_n = 5
    top_countries = country_counts.head(top_n)
    others_count = country_counts.iloc[top_n:].sum()
    pie_data = pd.concat([top_countries, pd.Series([others_count], index=['Others'])])
    fig3 = px.pie(names=pie_data.index, values=pie_data.values,
                  title=f"Customer Distribution by Country (Top {top_n} + Others)")
    st.plotly_chart(fig3)

    # Pie chart: Email domains
    df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
    email_domain_counts = df['Email Domain'].value_counts().head(5)
    fig4 = px.pie(names=email_domain_counts.index, values=email_domain_counts.values,
                  title="Customer Distribution by Email Domain")
    st.plotly_chart(fig4)

    # Horizontal bar: Top 10 companies
    top_10_companies = df['Company'].value_counts().head(10)
    fig5 = px.bar(y=top_10_companies.index, x=top_10_companies.values, orientation='h',
                  labels={'x': 'Customer Count', 'y': 'Company'}, title="Top 10 Companies by Customer Count")
    st.plotly_chart(fig5)

    # Optional: Stacked bar chart (Country + Email Availability)
    email_availability = df.copy()
    email_availability['Email Status'] = email_availability['Email'].isna().map({True: 'Missing Email', False: 'Has Email'})
    stacked_data = email_availability.groupby(['Country', 'Email Status']).size().unstack(fill_value=0)
    fig6 = px.bar(stacked_data, barmode='stack',
                  title="Customers by Country and Email Availability",
                  labels={'value': 'Customer Count', 'Country': 'Country'})
    st.plotly_chart(fig6)

    # Optional: Box plot (Company Name Length)
    df['Company Name Length'] = df['Company'].str.len()
    fig7 = px.box(df, y='Company Name Length',
                  title="Distribution of Company Name Lengths")
    st.plotly_chart(fig7)

Writing app.py


In [20]:
from pyngrok import ngrok
import subprocess

# Start Streamlit server in the background
process = subprocess.Popen(['streamlit', 'run', 'app.py', '--server.port', '8501'])

# Create a public URL with ngrok
public_url = ngrok.connect(8501)
print(f"Streamlit app is running at: {public_url}")

Streamlit app is running at: NgrokTunnel: "https://302d3ab5781d.ngrok-free.app" -> "http://localhost:8501"


In [21]:
!cat app.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    df = pd.read_csv(file_path)
    df = df.drop_duplicates()
    df = df.dropna(subset=['First Name', 'Email'])
    df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
    return df

# File path
file_path = "customers-100.csv"
try:
    df = load_and_clean_data(file_path)
except FileNotFoundError:
    st.error("customers-100.csv not found! Please upload the file.")
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_

In [22]:
import pandas as pd
df = pd.read_csv('customers-100.csv')
print("Dataset Info:")
print(df.info())
print("\nMissing Emails:", df['Email'].isna().sum())
print("\nCompany Column Sample:", df['Company'].head())

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Index              102 non-null    int64 
 1   Customer Id        102 non-null    object
 2   First Name         101 non-null    object
 3   Last Name          102 non-null    object
 4   Company            101 non-null    object
 5   City               102 non-null    object
 6   Country            102 non-null    object
 7   Phone 1            102 non-null    object
 8   Phone 2            102 non-null    object
 9   Email              101 non-null    object
 10  Subscription Date  102 non-null    object
 11  Website            102 non-null    object
dtypes: int64(1), object(11)
memory usage: 9.7+ KB
None

Missing Emails: 1

Company Column Sample: 0                    Rasmussen Group
1                        Vega-Gentry
2                      Murillo-Perry
3    Domingue

In [23]:
!rm -rf ~/.streamlit/cache

In [25]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    try:
        df = pd.read_csv(file_path)
        df = df.drop_duplicates()
        df = df.dropna(subset=['First Name', 'Email'])
        df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
        st.write("Data loaded and cleaned successfully.")
        return df
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return None

# File path
file_path = "customers-100.csv"
df = load_and_clean_data(file_path)
if df is None:
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Clear cache button
if st.sidebar.button("Clear Cache"):
    st.cache_data.clear()
    st.write("Cache cleared. Please refresh the page.")

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")
    edited_df = st.data_editor(df, num_rows="dynamic")
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")

    try:
        # Bar chart: Top 5 countries
        st.write("Rendering: Top 5 Countries Bar Chart")
        top_5_countries = df['Country'].value_counts().head(5)
        fig1 = px.bar(x=top_5_countries.index, y=top_5_countries.values,
                      labels={'x': 'Country', 'y': 'Customer Count'}, title="Top 5 Countries by Customer Count")
        st.plotly_chart(fig1)

        # Bar chart: Top 5 cities
        st.write("Rendering: Top 5 Cities Bar Chart")
        top_5_cities = df['City'].value_counts().head(5)
        fig2 = px.bar(x=top_5_cities.index, y=top_5_cities.values,
                      labels={'x': 'City', 'y': 'Customer Count'}, title="Top 5 Cities by Customer Count")
        st.plotly_chart(fig2)

        # Pie chart: Country distribution (Top 5 + Others)
        st.write("Rendering: Country Distribution Pie Chart")
        country_counts = df['Country'].value_counts()
        top_n = 5
        top_countries = country_counts.head(top_n)
        others_count = country_counts.iloc[top_n:].sum()
        pie_data = pd.concat([top_countries, pd.Series([others_count], index=['Others'])])
        fig3 = px.pie(names=pie_data.index, values=pie_data.values,
                      title=f"Customer Distribution by Country (Top {top_n} + Others)")
        st.plotly_chart(fig3)

        # Pie chart: Email domains
        st.write("Rendering: Email Domains Pie Chart")
        df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
        email_domain_counts = df['Email Domain'].value_counts().head(5)
        fig4 = px.pie(names=email_domain_counts.index, values=email_domain_counts.values,
                      title="Customer Distribution by Email Domain")
        st.plotly_chart(fig4)

        # Horizontal bar: Top 10 companies
        st.write("Rendering: Top 10 Companies Horizontal Bar Chart")
        top_10_companies = df['Company'].value_counts().head(10)
        fig5 = px.bar(y=top_10_companies.index, x=top_10_companies.values, orientation='h',
                      labels={'x': 'Customer Count', 'y': 'Company'}, title="Top 10 Companies by Customer Count")
        st.plotly_chart(fig5)

        # Optional: Stacked bar chart (Country + Email Availability)
        st.write("Rendering: Stacked Bar Chart (Country + Email Availability)")
        email_availability = df.copy()
        email_availability['Email Status'] = email_availability['Email'].isna().map({True: 'Missing Email', False: 'Has Email'})
        stacked_data = email_availability.groupby(['Country', 'Email Status']).size().unstack(fill_value=0)
        if stacked_data.empty:
            st.warning("No data available for Stacked Bar Chart. Check 'Email' column.")
        else:
            fig6 = px.bar(stacked_data, barmode='stack',
                          title="Customers by Country and Email Availability",
                          labels={'value': 'Customer Count', 'Country': 'Country'})
            st.plotly_chart(fig6)

        # Optional: Box plot (Company Name Length)
        st.write("Rendering: Box Plot (Company Name Length)")
        df['Company Name Length'] = df['Company'].str.len()
        if df['Company Name Length'].isna().all():
            st.warning("No valid company name lengths for Box Plot. Check 'Company' column.")
        else:
            fig7 = px.box(df, y='Company Name Length',
                          title="Distribution of Company Name Lengths")
            st.plotly_chart(fig7)

    except Exception as e:
        st.error(f"Error rendering visualizations: {e}")

2025-07-08 15:42:25.945 No runtime found, using MemoryCacheStorageManager


In [26]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    try:
        df = pd.read_csv(file_path)
        df = df.drop_duplicates()
        df = df.dropna(subset=['First Name', 'Email'])
        df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
        st.write("Data loaded and cleaned successfully.")
        return df
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return None

# File path
file_path = "customers-100.csv"
df = load_and_clean_data(file_path)
if df is None:
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Clear cache button
if st.sidebar.button("Clear Cache"):
    st.cache_data.clear()
    st.write("Cache cleared. Please refresh the page.")

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")
    edited_df = st.data_editor(df, num_rows="dynamic")
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")

    try:
        # Bar chart: Top 5 countries
        st.write("Rendering: Top 5 Countries Bar Chart")
        top_5_countries = df['Country'].value_counts().head(5)
        fig1 = px.bar(x=top_5_countries.index, y=top_5_countries.values,
                      labels={'x': 'Country', 'y': 'Customer Count'}, title="Top 5 Countries by Customer Count")
        st.plotly_chart(fig1)

        # Bar chart: Top 5 cities
        st.write("Rendering: Top 5 Cities Bar Chart")
        top_5_cities = df['City'].value_counts().head(5)
        fig2 = px.bar(x=top_5_cities.index, y=top_5_cities.values,
                      labels={'x': 'City', 'y': 'Customer Count'}, title="Top 5 Cities by Customer Count")
        st.plotly_chart(fig2)

        # Pie chart: Country distribution (Top 5 + Others)
        st.write("Rendering: Country Distribution Pie Chart")
        country_counts = df['Country'].value_counts()
        top_n = 5
        top_countries = country_counts.head(top_n)
        others_count = country_counts.iloc[top_n:].sum()
        pie_data = pd.concat([top_countries, pd.Series([others_count], index=['Others'])])
        fig3 = px.pie(names=pie_data.index, values=pie_data.values,
                      title=f"Customer Distribution by Country (Top {top_n} + Others)")
        st.plotly_chart(fig3)

        # Pie chart: Email domains
        st.write("Rendering: Email Domains Pie Chart")
        df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
        email_domain_counts = df['Email Domain'].value_counts().head(5)
        fig4 = px.pie(names=email_domain_counts.index, values=email_domain_counts.values,
                      title="Customer Distribution by Email Domain")
        st.plotly_chart(fig4)

        # Horizontal bar: Top 10 companies
        st.write("Rendering: Top 10 Companies Horizontal Bar Chart")
        top_10_companies = df['Company'].value_counts().head(10)
        fig5 = px.bar(y=top_10_companies.index, x=top_10_companies.values, orientation='h',
                      labels={'x': 'Customer Count', 'y': 'Company'}, title="Top 10 Companies by Customer Count")
        st.plotly_chart(fig5)

        # Optional: Stacked bar chart (Country + Email Availability)
        st.write("Rendering: Stacked Bar Chart (Country + Email Availability)")
        email_availability = df.copy()
        email_availability['Email Status'] = email_availability['Email'].isna().map({True: 'Missing Email', False: 'Has Email'})
        stacked_data = email_availability.groupby(['Country', 'Email Status']).size().unstack(fill_value=0)
        if stacked_data.empty:
            st.warning("No data available for Stacked Bar Chart. Check 'Email' column.")
        else:
            fig6 = px.bar(stacked_data, barmode='stack',
                          title="Customers by Country and Email Availability",
                          labels={'value': 'Customer Count', 'Country': 'Country'})
            st.plotly_chart(fig6)

        # Optional: Box plot (Company Name Length)
        st.write("Rendering: Box Plot (Company Name Length)")
        df['Company Name Length'] = df['Company'].str.len()
        if df['Company Name Length'].isna().all():
            st.warning("No valid company name lengths for Box Plot. Check 'Company' column.")
        else:
            fig7 = px.box(df, y='Company Name Length',
                          title="Distribution of Company Name Lengths")
            st.plotly_chart(fig7)

    except Exception as e:
        st.error(f"Error rendering visualizations: {e}")

2025-07-08 15:42:50.344 No runtime found, using MemoryCacheStorageManager


In [27]:
!rm -rf ~/.streamlit/cache

In [28]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    try:
        df = pd.read_csv(file_path)
        df = df.drop_duplicates()
        df = df.dropna(subset=['First Name', 'Email'])
        df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
        st.write("Data loaded and cleaned successfully.")
        return df
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return None

# File path
file_path = "customers-100.csv"
df = load_and_clean_data(file_path)
if df is None:
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Clear cache button
if st.sidebar.button("Clear Cache"):
    st.cache_data.clear()
    st.write("Cache cleared. Please refresh the page.")

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")
    edited_df = st.data_editor(df, num_rows="dynamic")
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")

    try:
        # Bar chart: Top 5 countries
        st.write("Rendering: Top 5 Countries Bar Chart")
        top_5_countries = df['Country'].value_counts().head(5)
        fig1 = px.bar(x=top_5_countries.index, y=top_5_countries.values,
                      labels={'x': 'Country', 'y': 'Customer Count'}, title="Top 5 Countries by Customer Count")
        st.plotly_chart(fig1)

        # Bar chart: Top 5 cities
        st.write("Rendering: Top 5 Cities Bar Chart")
        top_5_cities = df['City'].value_counts().head(5)
        fig2 = px.bar(x=top_5_cities.index, y=top_5_cities.values,
                      labels={'x': 'City', 'y': 'Customer Count'}, title="Top 5 Cities by Customer Count")
        st.plotly_chart(fig2)

        # Pie chart: Country distribution (Top 5 + Others)
        st.write("Rendering: Country Distribution Pie Chart")
        country_counts = df['Country'].value_counts()
        top_n = 5
        top_countries = country_counts.head(top_n)
        others_count = country_counts.iloc[top_n:].sum()
        pie_data = pd.concat([top_countries, pd.Series([others_count], index=['Others'])])
        fig3 = px.pie(names=pie_data.index, values=pie_data.values,
                      title=f"Customer Distribution by Country (Top {top_n} + Others)")
        st.plotly_chart(fig3)

        # Pie chart: Email domains
        st.write("Rendering: Email Domains Pie Chart")
        df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
        email_domain_counts = df['Email Domain'].value_counts().head(5)
        fig4 = px.pie(names=email_domain_counts.index, values=email_domain_counts.values,
                      title="Customer Distribution by Email Domain")
        st.plotly_chart(fig4)

        # Horizontal bar: Top 10 companies
        st.write("Rendering: Top 10 Companies Horizontal Bar Chart")
        top_10_companies = df['Company'].value_counts().head(10)
        fig5 = px.bar(y=top_10_companies.index, x=top_10_companies.values, orientation='h',
                      labels={'x': 'Customer Count', 'y': 'Company'}, title="Top 10 Companies by Customer Count")
        st.plotly_chart(fig5)

        # Optional: Stacked bar chart (Country + Email Availability)
        st.write("Rendering: Stacked Bar Chart (Country + Email Availability)")
        email_availability = df.copy()
        email_availability['Email Status'] = email_availability['Email'].isna().map({True: 'Missing Email', False: 'Has Email'})
        stacked_data = email_availability.groupby(['Country', 'Email Status']).size().unstack(fill_value=0)
        if stacked_data.empty:
            st.warning("No data available for Stacked Bar Chart. Check 'Email' column.")
        else:
            fig6 = px.bar(stacked_data, barmode='stack',
                          title="Customers by Country and Email Availability",
                          labels={'value': 'Customer Count', 'Country': 'Country'})
            st.plotly_chart(fig6)

        # Optional: Box plot (Company Name Length)
        st.write("Rendering: Box Plot (Company Name Length)")
        df['Company Name Length'] = df['Company'].str.len()
        if df['Company Name Length'].isna().all():
            st.warning("No valid company name lengths for Box Plot. Check 'Company' column.")
        else:
            fig7 = px.box(df, y='Company Name Length',
                          title="Distribution of Company Name Lengths")
            st.plotly_chart(fig7)

    except Exception as e:
        st.error(f"Error rendering visualizations: {e}")

2025-07-08 15:43:46.051 No runtime found, using MemoryCacheStorageManager


In [29]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path

# Set page config
st.set_page_config(page_title="Customer Manager & Analyzer", layout="wide")

# Load and clean data
@st.cache_data
def load_and_clean_data(file_path):
    try:
        df = pd.read_csv(file_path)
        df = df.drop_duplicates()
        df = df.dropna(subset=['First Name', 'Email'])
        df = df.drop(columns=['Index', 'Subscription Date'], errors='ignore')
        st.write("Data loaded and cleaned successfully.")
        return df
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return None

# File path
file_path = "customers-100.csv"
df = load_and_clean_data(file_path)
if df is None:
    st.stop()

# Sidebar for navigation
st.sidebar.title("Navigation")
view = st.sidebar.radio("Select View", ["Overview", "Table", "Stats"])

# Clear cache button
if st.sidebar.button("Clear Cache"):
    st.cache_data.clear()
    st.write("Cache cleared. Please refresh the page.")

# Overview View
if view == "Overview":
    st.title("Customer Data Overview")
    st.write("Summary statistics of the cleaned customer dataset.")
    total_customers = len(df)
    unique_countries = df['Country'].nunique()
    most_common_country = df['Country'].mode()[0]
    top_3_cities = df['City'].value_counts().head(3).index.tolist()
    top_5_companies = df['Company'].value_counts().head(5).index.tolist()
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Customers", total_customers)
        st.metric("Unique Countries", unique_countries)
    with col2:
        st.metric("Most Common Country", most_common_country)
        st.write("Top 3 Cities:", ", ".join(top_3_cities))
        st.write("Top 5 Companies:", ", ".join(top_5_companies))

# Table View
elif view == "Table":
    st.title("Editable Customer Table")
    st.write("Edit the dataset below and download the updated version.")
    edited_df = st.data_editor(df, num_rows="dynamic")
    if st.button("Export Edited Data"):
        edited_df.to_csv("edited_customers.csv", index=False)
        st.download_button(
            label="Download Edited CSV",
            data=Path("edited_customers.csv").read_bytes(),
            file_name="edited_customers.csv",
            mime="text/csv"
        )

# Stats View
elif view == "Stats":
    st.title("Customer Data Visualizations")
    st.write("Graphical insights into customer distribution.")

    try:
        # Bar chart: Top 5 countries
        st.write("Rendering: Top 5 Countries Bar Chart")
        top_5_countries = df['Country'].value_counts().head(5)
        fig1 = px.bar(x=top_5_countries.index, y=top_5_countries.values,
                      labels={'x': 'Country', 'y': 'Customer Count'}, title="Top 5 Countries by Customer Count")
        st.plotly_chart(fig1)

        # Bar chart: Top 5 cities
        st.write("Rendering: Top 5 Cities Bar Chart")
        top_5_cities = df['City'].value_counts().head(5)
        fig2 = px.bar(x=top_5_cities.index, y=top_5_cities.values,
                      labels={'x': 'City', 'y': 'Customer Count'}, title="Top 5 Cities by Customer Count")
        st.plotly_chart(fig2)

        # Pie chart: Country distribution (Top 5 + Others)
        st.write("Rendering: Country Distribution Pie Chart")
        country_counts = df['Country'].value_counts()
        top_n = 5
        top_countries = country_counts.head(top_n)
        others_count = country_counts.iloc[top_n:].sum()
        pie_data = pd.concat([top_countries, pd.Series([others_count], index=['Others'])])
        fig3 = px.pie(names=pie_data.index, values=pie_data.values,
                      title=f"Customer Distribution by Country (Top {top_n} + Others)")
        st.plotly_chart(fig3)

        # Pie chart: Email domains
        st.write("Rendering: Email Domains Pie Chart")
        df['Email Domain'] = df['Email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else 'Unknown')
        email_domain_counts = df['Email Domain'].value_counts().head(5)
        fig4 = px.pie(names=email_domain_counts.index, values=email_domain_counts.values,
                      title="Customer Distribution by Email Domain")
        st.plotly_chart(fig4)

        # Horizontal bar: Top 10 companies
        st.write("Rendering: Top 10 Companies Horizontal Bar Chart")
        top_10_companies = df['Company'].value_counts().head(10)
        fig5 = px.bar(y=top_10_companies.index, x=top_10_companies.values, orientation='h',
                      labels={'x': 'Customer Count', 'y': 'Company'}, title="Top 10 Companies by Customer Count")
        st.plotly_chart(fig5)

        # Optional: Stacked bar chart (Country + Email Availability)
        st.write("Rendering: Stacked Bar Chart (Country + Email Availability)")
        email_availability = df.copy()
        email_availability['Email Status'] = email_availability['Email'].isna().map({True: 'Missing Email', False: 'Has Email'})
        stacked_data = email_availability.groupby(['Country', 'Email Status']).size().unstack(fill_value=0)
        if stacked_data.empty:
            st.warning("No data available for Stacked Bar Chart. Check 'Email' column.")
        else:
            fig6 = px.bar(stacked_data, barmode='stack',
                          title="Customers by Country and Email Availability",
                          labels={'value': 'Customer Count', 'Country': 'Country'})
            st.plotly_chart(fig6)

        # Optional: Box plot (Company Name Length)
        st.write("Rendering: Box Plot (Company Name Length)")
        df['Company Name Length'] = df['Company'].str.len()
        if df['Company Name Length'].isna().all():
            st.warning("No valid company name lengths for Box Plot. Check 'Company' column.")
        else:
            fig7 = px.box(df, y='Company Name Length',
                          title="Distribution of Company Name Lengths")
            st.plotly_chart(fig7)

    except Exception as e:
        st.error(f"Error rendering visualizations: {e}")

Writing app.py


In [30]:
from pyngrok import ngrok
import subprocess

# Start Streamlit server
process = subprocess.Popen(['streamlit', 'run', 'app.py', '--server.port', '8501'])

# Create a public URL with ngrok
public_url = ngrok.connect(8501)
print(f"Streamlit app is running at: {public_url}")



PyngrokNgrokHTTPError: ngrok client exception, API returned 502: {"error_code":103,"status_code":502,"msg":"failed to start tunnel","details":{"err":"failed to start tunnel: Your account may not run more than 3 tunnels over a single ngrok agent session.\nThe tunnels already running on this session are:\ntn_2zawY0V1CBR4wJnDL6qIL4YdlTS, tn_2zayKgadmizJIhnhxdhA7RBufEC, tn_2zb9BcOkI8JmXdKKOiaoQkXGJti\n\r\n\r\nERR_NGROK_324\r\n"}}


In [31]:
!pkill ngrok

In [32]:
!pkill streamlit

In [33]:
!ps aux | grep ngrok
!ps aux | grep streamlit

root       95033  0.0  0.0   7376  3572 ?        S    15:48   0:00 /bin/bash -c ps aux | grep ngrok
root       95035  0.0  0.0   6484  2280 ?        S    15:48   0:00 grep ngrok
root       37626  0.0  0.0      0     0 ?        Z    13:07   0:00 [streamlit] <defunct>
root       94016  0.4  0.0      0     0 ?        Z    15:45   0:00 [streamlit] <defunct>
root       95037  0.0  0.0   7376  3444 ?        S    15:48   0:00 /bin/bash -c ps aux | grep streamlit
root       95039  0.0  0.0   6484  2300 ?        S    15:48   0:00 grep streamlit


In [34]:
!rm -rf ~/.streamlit/cache

In [35]:
from pyngrok import ngrok
import subprocess
import time

# Terminate existing ngrok tunnels
!pkill ngrok
!pkill streamlit

# Start Streamlit server
process = subprocess.Popen(['streamlit', 'run', 'app.py', '--server.port', '8501'])

# Wait briefly to ensure Streamlit starts
time.sleep(5)

# Create a public URL with ngrok
try:
    public_url = ngrok.connect(8501)
    print(f"Streamlit app is running at: {public_url}")
except Exception as e:
    print(f"Error creating ngrok tunnel: {e}")

Streamlit app is running at: NgrokTunnel: "https://b14c715850b3.ngrok-free.app" -> "http://localhost:8501"
