In [6]:
import pandas as pd
import altair as alt
import re

# Load dataset
df = pd.read_csv('data.csv', encoding='latin1')
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# Clean funding column
df['funding_total_usd'] = (
    df['funding_total_usd']
    .astype(str)
    .str.replace('[\$,]', '', regex=True)
    .str.strip()
    .replace('-', '0')
)
df['funding_total_usd'] = pd.to_numeric(df['funding_total_usd'], errors='coerce').fillna(0.0)

# Filter valid rows
df = df[df['market'].notna() & (df['market'] != '') & (df['funding_total_usd'] > 0)]

# Top 30 markets by total funding
top_markets = (
    df.groupby('market', as_index=False)['funding_total_usd']
    .sum()
    .sort_values('funding_total_usd', ascending=False)
    .head(30)['market']
)
df = df[df['market'].isin(top_markets)]

# Extract first valid letter of name
df['name'] = df['name'].astype(str)

# Use regex to clean initial — remove digits, symbols like '.', '?', '[', etc.
df['name_initial'] = df['name'].str.strip().str.upper().str[0]
df['name_initial'] = df['name_initial'].apply(lambda x: x if re.match(r'[A-Z]', x) else None)
df = df[df['name_initial'].notna()]

# Group by name initial and market
heatmap_data_alpha = df.groupby(['name_initial', 'market'], as_index=False)['funding_total_usd'].sum()

# Plot with blue color scale
heatmap = alt.Chart(heatmap_data_alpha).mark_rect().encode(
    x=alt.X(
        'name_initial:N',
        title='First Letter of Startup Name',
        sort='ascending',
        axis=alt.Axis(labelAngle=0)  # 🔧 makes the labels upright
    ),
    y=alt.Y('market:N', title='Market', sort='-x'),
    color=alt.Color(
        'funding_total_usd:Q',
        title='Total Funding (USD)',
        scale=alt.Scale(
            scheme='blues',
            domain=[0, 2000000000],
            clamp=True
        ),
        legend=alt.Legend(
            title='Funding Amount',
            format='$,.0f',
            orient='right'
        )
    ),
    tooltip=[
        alt.Tooltip('name_initial:N', title='Startup Initial'),
        alt.Tooltip('market:N'),
        alt.Tooltip('funding_total_usd:Q', title='Funding (USD)', format='$,.0f')
    ]
).properties(
    width=700,
    height=800,
    title='Funding by Startup Initial and Market (Top 30 Markets)'
)

heatmap
heatmap.save("heatmap_plot.html")