In [1]:
pip install altair vega_datasets altair_saver

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import re
import altair as alt
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

# Classify rows into clusters
def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Dropdown for cluster selection
dropdown = widgets.Dropdown(
    options=sorted(df_top10["cluster"].unique()),
    description="Select Cluster:",
    layout=widgets.Layout(width="60%"),
    style={"description_width": "initial"}
)

output = widgets.Output()

# Display chart on selection
def show_chart(change):
    with output:
        clear_output(wait=True)
        cluster = change["new"]
        data = df_top10[df_top10["cluster"] == cluster]
        chart = (
            alt.Chart(data)
            .mark_bar(color="steelblue")
            .encode(
                y=alt.Y("market:N", sort="-x", title="Market"),
                x=alt.X("total_funding:Q", title="Total Funding (USD)"),
                tooltip=[
                    alt.Tooltip("market:N", title="Market"),
                    alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
                    alt.Tooltip("cluster:N", title="Cluster")
                ]
            )
            .properties(width=750, height=400, title=f"Top 10 Markets in {cluster}")
        )
        display(chart)

dropdown.observe(show_chart, names="value")
display(dropdown)
display(output)

# Initial chart trigger
initial_chart = show_chart({"new": dropdown.value})

Dropdown(description='Select Cluster:', layout=Layout(width='60%'), options=('Art & Design', 'CleanTech, Energ…

Output()

AttributeError: 'NoneType' object has no attribute 'save'

In [None]:
import pandas as pd
import re
import altair as alt

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": [
        "Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", 
        "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", 
        "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", 
        "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", 
        "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", 
        "Aerospace", "Chemicals", "Testing"
    ],
    "Software & IT": [
        "Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", 
        "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", 
        "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", 
        "Web Development", "Development Platforms", "Health Care Information Technology", 
        "Application Performance Monitoring", "Browser Extensions", "Information Services", 
        "Privacy", "VoIP", "Information Technology", "Social CRM", 
        "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", 
        "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"
    ],
    "Cloud & Data": [
        "Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", 
        "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", 
        "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", 
        "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"
    ],
    "Internet & Social": [
        "Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", 
        "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", 
        "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", 
        "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", 
        "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", 
        "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", 
        "Private Social Networking"
    ],
    "Commerce & Retail": [
        "E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", 
        "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", 
        "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", 
        "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", 
        "Groceries", "Auctions", "Storage", "Toys", "Distribution"
    ],
    "Finance & Enterprise": [
        "Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", 
        "Financial services", "Local businesses", "Stock exchanges", "Online rental", 
        "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", 
        "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", 
        "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", 
        "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", 
        "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", 
        "Business Development", "Social Business", "Investment Management", "Franchises", 
        "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", 
        "Wealth Management", "Billing", "Optimization", "Logistics"
    ],
    "CleanTech, Energy & Infrastructure": [
        "Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", 
        "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", 
        "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", 
        "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", 
        "Electrical Distribution"
    ],
    "Travel & Lifestyle": [
        "Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", 
        "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", 
        "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", 
        "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", 
        "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", 
        "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", 
        "Women", "Craft Beer", "Sex", "Twin-Tip Skis"
    ],
    "Hardware & Devices": [
        "Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", 
        "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"
    ],
    "Media, Games, & Entertainment": [
        "Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", 
        "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", 
        "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", 
        "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", 
        "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", 
        "Contests", "Opinions"
    ],
    "Art & Design": [
        "Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", 
        "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", 
        "Graphics", "CAD", "Visualization", "Human Computer Interaction"
    ],
    "Education & Employment": [
        "Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", 
        "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", 
        "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", 
        "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", 
        "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", 
        "Employer Benefits Programs"
    ],
}

def clean_text(text):
    # Remove non-alphanumeric characters, convert to lower
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(
    lambda row: classify_cluster(row["market"], row["category_list"]), axis=1
)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(
    method="first", ascending=False
)
df_top10 = df_grouped[
    (df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")
]

# Altair-native dropdown interactivity
input_dropdown = alt.binding_select(
    options=sorted(df_top10["cluster"].unique()), name="Select Cluster: "
)
selection = alt.selection_point(
    fields=["cluster"], bind=input_dropdown, value=sorted(df_top10["cluster"].unique())[0]
)

# Interactive bar chart (filtered by selection) with explicit axis configuration
interactive_chart = (
    alt.Chart(df_top10)
    .mark_bar(color="steelblue")
    .encode(
        y=alt.Y(
            "market:N",
            sort="-x",
            axis=alt.Axis(
                title="Market",
                # Increase labelLimit so labels aren’t truncated
                labelLimit=200,
                labelOverlap=False,
                titlePadding=10,
                labelSeparation=5
            ),
        ),
        x=alt.X("total_funding:Q", title="Total Funding (USD)"),
        tooltip=[
            alt.Tooltip("market:N", title="Market"),
            alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
            alt.Tooltip("cluster:N", title="Cluster"),
        ],
    )
    .add_params(selection)
    .transform_filter(selection)
    # Adjust width/height so there's enough space for the y-axis and bar labels
    .properties(width=800, height=alt.Step(40))
)

# Prepare a DataFrame for unique clusters to build a dynamic title
cluster_df = pd.DataFrame({"cluster": sorted(df_top10["cluster"].unique())})

# Create a text chart for a dynamic title based on the selected cluster
title_chart = (
    alt.Chart(cluster_df)
    .transform_calculate(dynamic_title="('Top 10 Markets in ' + datum.cluster)")
    .mark_text(fontSize=20, dy=-10)
    .encode(
        # Adjust X so text is centered for your chosen width
        x=alt.value(400),
        text=alt.Text("dynamic_title:N"),
    )
    .add_params(selection)
    .transform_filter(selection)
)

# Combine the title chart and the bar chart vertically
final_chart = alt.vconcat(
    title_chart,
    interactive_chart,
    spacing=5
)

# Show chart in notebook (if in Jupyter/Colab) and save as HTML
final_chart.display()
final_chart.save("top10_markets_by_industry.html")

In [None]:
import pandas as pd
import re
import altair as alt

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Altair-native dropdown interactivity
input_dropdown = alt.binding_select(
    options=sorted(df_top10["cluster"].unique()), name="Select Cluster: "
)
selection = alt.selection_point(fields=["cluster"], bind=input_dropdown, value=sorted(df_top10["cluster"].unique())[0])

# Interactive bar chart (filtered by selection) with updated y-axis configuration
interactive_chart = (
    alt.Chart(df_top10)
    .mark_bar(color="steelblue")
    .encode(
        y=alt.Y(
            "market:N",
            sort="-x",
            axis=alt.Axis(
                title="Market",      # Ensure the y-axis title is set
                labelLimit=0,        # Disable truncation of long market names
                titlePadding=10      # Add extra padding to display the title properly
            )
        ),
        x=alt.X("total_funding:Q", title="Total Funding (USD)"),
        tooltip=[
            alt.Tooltip("market:N", title="Market"),
            alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
            alt.Tooltip("cluster:N", title="Cluster")
        ]
    )
    .add_params(selection)
    .transform_filter(selection)
    .properties(width=750, height=alt.Step(40))
)

# Prepare a DataFrame for unique clusters to build a dynamic title
cluster_df = pd.DataFrame({"cluster": sorted(df_top10["cluster"].unique())})

# Create a text chart for a dynamic title based on the selected cluster.
title_chart = (
    alt.Chart(cluster_df)
    .transform_calculate(dynamic_title="('Top 10 Markets in ' + datum.cluster)")
    .mark_text(fontSize=20, dy=-10)
    .encode(
        x=alt.value(375),  # Center horizontally for a 750px wide chart
        text=alt.Text("dynamic_title:N")
    )
    .add_params(selection)
    .transform_filter(selection)
)

# Combine the title chart and the bar chart vertically.
final_chart = alt.vconcat(
    title_chart,
    interactive_chart,
    spacing=5
).configure_axisY(
    labelLimit=0,
    title='Market',
    titlePadding=10
)

# Show chart in notebook and save as HTML
final_chart.display()
final_chart.save("top10_markets_industry.html")

In [None]:
import pandas as pd
import re
import altair as alt

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Altair-native dropdown interactivity
input_dropdown = alt.binding_select(
    options=sorted(df_top10["cluster"].unique()), name="Select Cluster: "
)
selection = alt.selection_point(fields=["cluster"], bind=input_dropdown, value=sorted(df_top10["cluster"].unique())[0])

# Interactive bar chart (filtered by selection)
interactive_chart = (
    alt.Chart(df_top10)
    .mark_bar(color="steelblue")
    .encode(
        y=alt.Y("market:N", sort="-x", title="Market"),
        x=alt.X("total_funding:Q", title="Total Funding (USD)"),
        tooltip=[
            alt.Tooltip("market:N", title="Market"),
            alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
            alt.Tooltip("cluster:N", title="Cluster")
        ]
    )
    .add_params(selection)
    .transform_filter(selection)
    # Using alt.Step ensures the chart height adjusts based on the number of markets
    .properties(width=750, height=alt.Step(40))
)

# Prepare a DataFrame for unique clusters to build a dynamic title
cluster_df = pd.DataFrame({"cluster": sorted(df_top10["cluster"].unique())})

# Create a text chart for a dynamic title based on the selected cluster.
title_chart = (
    alt.Chart(cluster_df)
    .transform_calculate(dynamic_title="('Top 10 Markets in ' + datum.cluster)")
    .mark_text(fontSize=20, dy=-10)
    .encode(
        x=alt.value(375),  # center horizontally for a 750px wide chart
        text=alt.Text("dynamic_title:N")
    )
    .add_params(selection)
    .transform_filter(selection)
)

# Combine the title chart and the bar chart vertically.
final_chart = alt.vconcat(
    title_chart,
    interactive_chart,
    spacing=5
)

# Show chart in notebook and save as HTML
final_chart.display()
final_chart.save("top10_markets_by_industry.html")

In [None]:
import pandas as pd
import re
import altair as alt

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Altair-native dropdown interactivity
input_dropdown = alt.binding_select(
    options=sorted(df_top10["cluster"].unique()), name="Select Cluster: "
)
selection = alt.selection_point(fields=["cluster"], bind=input_dropdown, value=sorted(df_top10["cluster"].unique())[0])

# Interactive bar chart (filtered by selection)
interactive_chart = (
    alt.Chart(df_top10)
    .mark_bar(color="steelblue")
    .encode(
        y=alt.Y("market:N", sort="-x", title="Market"),
        x=alt.X("total_funding:Q", title="Total Funding (USD)"),
        tooltip=[
            alt.Tooltip("market:N", title="Market"),
            alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
            alt.Tooltip("cluster:N", title="Cluster")
        ]
    )
    .add_params(selection)
    .transform_filter(selection)
    .properties(width=750, height=400)
)

# To build a dynamic title we first create a DataFrame of unique clusters
cluster_df = pd.DataFrame({"cluster": sorted(df_top10["cluster"].unique())})

# Create a text chart that computes the title based on the selected cluster.
title_chart = (
    alt.Chart(cluster_df)
    .transform_calculate(dynamic_title="('Top 10 Markets in ' + datum.cluster)")
    .mark_text(fontSize=20, dy=-10)
    .encode(
        x=alt.value(375),  # center horizontally (for a 750px wide chart)
        text=alt.Text("dynamic_title:N")
    )
    .add_params(selection)
    .transform_filter(selection)
)

# Combine the title chart and the bar chart vertically.
final_chart = alt.vconcat(
    title_chart,
    interactive_chart,
    spacing=5
)

# Show chart in notebook and save as HTML
final_chart.display()
final_chart.save("top10_markets_by_cluster.html")

In [None]:
import pandas as pd
import re
import altair as alt

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Altair-native dropdown interactivity
input_dropdown = alt.binding_select(
    options=sorted(df_top10["cluster"].unique()), name="Select Cluster: "
)
selection = alt.selection_point(fields=["cluster"], bind=input_dropdown, value=sorted(df_top10["cluster"].unique())[0])

interactive_chart = (
    alt.Chart(df_top10)
    .mark_bar(color="steelblue")
    .encode(
        y=alt.Y("market:N", sort="-x", title="Market"),
        x=alt.X("total_funding:Q", title="Total Funding (USD)"),
        tooltip=[
            alt.Tooltip("market:N", title="Market"),
            alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
            alt.Tooltip("cluster:N", title="Cluster")
        ]
    )
    .add_params(selection)
    .transform_filter(selection)
    .properties(width=750, height=400, title="Top 10 Markets by Cluster")
)

# Show chart in notebook and save as HTML
interactive_chart.display()
interactive_chart.save("top10_markets_by_cluster_interactive.html")

In [None]:
import pandas as pd
import re
import altair as alt
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Dropdown for cluster selection
dropdown = widgets.Dropdown(
    options=sorted(df_top10["cluster"].unique()),
    description="Select Cluster:",
    layout=widgets.Layout(width="60%"),
    style={"description_width": "initial"}
)

output = widgets.Output()

def show_chart(change):
    with output:
        clear_output(wait=True)
        cluster = change["new"]
        data = df_top10[df_top10["cluster"] == cluster]
        chart = (
            alt.Chart(data)
            .mark_bar(color="steelblue")
            .encode(
                y=alt.Y("market:N", sort="-x", title="Market"),
                x=alt.X("total_funding:Q", title="Total Funding (USD)"),
                tooltip=[
                    alt.Tooltip("market:N", title="Market"),
                    alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
                    alt.Tooltip("cluster:N", title="Cluster")
                ]
            )
            .properties(width=750, height=400, title=f"Top 10 Markets in {cluster}")
        )
        display(chart)

dropdown.observe(show_chart, names="value")
display(dropdown)
display(output)

# Trigger for initial selection
show_chart({"new": dropdown.value})

In [None]:
import pandas as pd
import re
import altair as alt
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load dataset
df = pd.read_csv("data.csv", encoding="ISO-8859-1")
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df["market"] = df["market"].fillna("").astype(str)
df["category_list"] = df["category_list"].fillna("").astype(str)

# Funding rounds and total funding
funding_rounds = ["seed", "round_a", "round_b", "round_c", "round_d", 
                  "round_e", "round_f", "round_g", "round_h"]
df[funding_rounds] = df[funding_rounds].apply(pd.to_numeric, errors="coerce").fillna(0)
df["total_funding"] = df[funding_rounds].sum(axis=1)

# Define cluster keywords
cluster_keywords_raw = {
    "Health & Life Sciences": ["Biotechnology", "Health and wellness", "Fitness", "Health care", "Sports", "Medical devices", "Nanotechnology", "Pharmaceuticals", "Diabetes", "Medical", "Fantasy Sports", "Therapeutics", "Life Sciences", "Electronic Health Records", "Bioinformatics", "Personal Health", "Exercise", "Diagnostics", "Healthcare Services", "Dental", "Medical Professionals", "Doctors", "mHealth", "Health Services Industry", "Aerospace", "Chemicals", "Testing"],
    "Software & IT": ["Software", "Enterprise software", "SaaS", "Sensors", "Web hosting", "Mobile security", "Productivity Software", "Web CMS", "CRM", "Open source", "IT and Cybersecurity", "Tech Field Support", "File Sharing", "Digital Rights Management", "Web Development", "Development Platforms", "Health Care Information Technology", "Application Performance Monitoring", "Browser Extensions", "Information Services", "Privacy", "VoIP", "Information Technology", "Social CRM", "Enterprise Resource Planning", "Web Browsers", "Audio", "Cyber", "IT Management", "Linux", "Gps", "Fraud Detection", "Corporate IT", "Security", "Tracking", "Navigation"],
    "Cloud & Data": ["Analytics", "Big data", "Web hosting", "Cloud computing", "Cloud infrastructure", "Cloud data services", "Big data analytics", "Databases", "PaaS", "Data integration", "Ediscovery", "Cloud Security", "Cloud Management", "IaaS", "Task Management", "Document Management", "Contact Management", "Business Information Systems", "Geospatial", "M2M"],
    "Internet & Social": ["Messaging", "Social media", "Internet of things", "Internet", "Search", "Web Tools", "Email", "SEO", "Video Conferencing", "Networking", "Social search", "Social Network Media", "Blogging Platforms", "Photo Sharing", "Chat", "Communities", "Twitter Applications", "Facebook Applications", "Proximity Internet", "Unifed Communications", "Network Security", "Consumer Internet", "Forums", "Local Search", "SMS", "Visual Search", "Enterprise Search", "Semantic Search", "Social Media Monitoring", "Semantic Web", "MicroBlogging", "Private Social Networking"],
    "Commerce & Retail": ["E-commerce", "Marketplaces", "Manufacturing", "Shopping", "Subscription businesses", "Small and Medium Businesses", "Mobile Commerce", "Retail", "Online Shopping", "Online Rental", "Online Reservations", "Virtual Goods", "Social Buying", "Mass Customization", "Green Consumer Goods", "Consumer Goods", "Mobile Shopping", "Groceries", "Auctions", "Storage", "Toys", "Distribution"],
    "Finance & Enterprise": ["Bitcoin", "Human resources", "Commercial real estate", "Finance", "Real estate", "Financial services", "Local businesses", "Stock exchanges", "Online rental", "Startups", "Accounting", "Nonprofits", "Crowdfunding", "Monetization", "Risk Management", "Personal Finance", "Venture Capital", "Social Commerce", "Business Services", "Lead Generation", "Banking", "Outsourcing", "Payments", "Mobile Payments", "Brokers", "Credit", "Freemium", "Intellectual Asset Management", "Innovation Management", "Loyalty Programs", "B2B", "Licensing", "Freelancers", "Virtual Currency", "Business Development", "Social Business", "Investment Management", "Franchises", "P2P Money Transfer", "Credit Cards", "Real Estate Investors", "Entrepreneur", "Wealth Management", "Billing", "Optimization", "Logistics"],
    "CleanTech, Energy & Infrastructure": ["Clean technology", "Clean energy", "Carbon", "Solar", "Water", "Oil & Gas", "Construction", "Data Centers", "Energy", "Residential Solar", "Environmental Innovation", "Renewable Energies", "Utilities", "Energy Management", "Batteries", "Energy Efficiency", "Infrastructure", "Internet Infrastructure", "Energy IT", "Data Center Infrastructure", "Electrical Distribution"],
    "Travel & Lifestyle": ["Tourism", "Online travel", "Adventure travel", "Hospitality", "Personal branding", "Social Travel", "Fashion", "Home & Garden", "Lifestyle", "Personalization", "Elder Care", "Content Creators", "Events", "Weddings", "Gambling", "Virtual Workforces", "Restaurants", "Beauty", "Pets", "Cooking", "Specialty Foods", "Organic Food", "Hotels", "Cosmetics", "Parenting", "Families", "Event Management", "Online Dating", "Home Decor", "Vacation Rentals", "Home Renovation", "Wine And Spirits", "Travel", "Timeshares", "Kids", "Women", "Craft Beer", "Sex", "Twin-Tip Skis"],
    "Hardware & Devices": ["Telecommunications", "Communications hardware", "Semiconductors", "Wireless", "Computers", "Consumer Electronics", "Displays", "Electronics", "Tablets", "Hardware"],
    "Media, Games, & Entertainment": ["Music", "Social games", "Video", "Television", "Games", "News", "Broadcasting", "Entertainment", "Video Games", "Music services", "Online Gaming", "In-Flight Entertainment", "Video Streaming", "Motion Capture", "Internet TV", "Media", "Social Television", "Entertainment Industry", "Music Venues", "MMO Games", "Mobile Games", "High Schools", "Digital Entertainment", "Film", "Collaborative Consumption", "Creative", "Journalism", "Contests", "Opinions"],
    "Art & Design": ["Design", "3D", "Printing", "Photography", "User Experience Design", "Artists Globally", "Art", "3D Printing", "Interior Design", "Interface Design", "Architecture", "Product Design", "Graphics", "CAD", "Visualization", "Human Computer Interaction"],
    "Education & Employment": ["Education", "Services", "Publishing", "College Recruiting", "Tutoring", "Textbooks", "Content Delivery", "Language Learning", "E-Books", "Educational Games", "Content", "Career Planning", "K-12 Education", "Career management", "Colleges", "Certification Test", "Professional Services", "Presentations", "Translation", "Teachers", "College Campuses", "All Students", "Social Recruiting", "Recruiting", "Employment", "Skill Assessment", "Employer Benefits Programs"],
}

# Classify rows into clusters
def clean_text(text):
    return re.sub(r"[^a-zA-Z0-9 ]", "", str(text)).lower()

def classify_cluster(market, category_list):
    text = f"{market} {category_list}"
    tokens = set(clean_text(text).split())
    scores = {
        cluster: len(tokens & set(map(str.lower, keywords)))
        for cluster, keywords in cluster_keywords_raw.items()
    }
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "Unclassified"

df["cluster"] = df.apply(lambda row: classify_cluster(row["market"], row["category_list"]), axis=1)

# Top 10 markets by funding per cluster
df_grouped = df.groupby(["cluster", "market"], as_index=False)["total_funding"].sum()
df_grouped["rank"] = df_grouped.groupby("cluster")["total_funding"].rank(method="first", ascending=False)
df_top10 = df_grouped[(df_grouped["rank"] <= 10) & (df_grouped["cluster"] != "Unclassified")]

# Dropdown for cluster selection
dropdown = widgets.Dropdown(
    options=sorted(df_top10["cluster"].unique()),
    description="Select Cluster:",
    layout=widgets.Layout(width="60%"),
    style={"description_width": "initial"}
)

output = widgets.Output()

# Display chart on selection
def show_chart(change):
    with output:
        clear_output(wait=True)
        cluster = change["new"]
        data = df_top10[df_top10["cluster"] == cluster]
        chart = (
            alt.Chart(data)
            .mark_bar(color="steelblue")
            .encode(
                y=alt.Y("market:N", sort="-x", title="Market"),
                x=alt.X("total_funding:Q", title="Total Funding (USD)"),
                tooltip=[
                    alt.Tooltip("market:N", title="Market"),
                    alt.Tooltip("total_funding:Q", title="Total Funding", format="~s"),
                    alt.Tooltip("cluster:N", title="Cluster")
                ]
            )
            .properties(width=750, height=400, title=f"Top 10 Markets in {cluster}")
        )
        display(chart)

dropdown.observe(show_chart, names="value")
display(dropdown)
display(output)

# Initial chart trigger
initial_chart = show_chart({"new": dropdown.value})