In [None]:
import plotly.express as px
from dash import Dash, html, dcc, Input, Output, callback
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import pycountry
import geopandas as gpd
import json

In [None]:
filename = "World_Wide_Unicorn_Startups.csv"
basic_su_df = pd.read_csv(f"../data/{filename}")
basic_su_df.head()
basic_su_df.rename(columns={"year": "Year"}, inplace=True)
basic_su_df.head()

In [None]:
import pandas as pd
import plotly.express as px


industry_val = basic_su_df.groupby("Industry")["Valuation"].sum().sort_values(ascending=False).reset_index()

fig = px.bar(
    industry_val,
    x="Valuation",
    y="Industry",
    orientation='h',
    title="Total Valuation by Industry",
    labels={"Valuation": "Total Valuation (Billion USD)"},
    color="Valuation",
    color_continuous_scale="viridis"
)

fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

In [None]:
import pandas as pd
import plotly.express as px

# Filter for a specific country
country = "United States"
country_df = basic_su_df[basic_su_df["Country"] == country]

# Group by industry and sum valuations
industry_val = country_df.groupby("Industry")["Valuation"].sum().sort_values(ascending=False).reset_index().head()

# Create horizontal bar chart
fig = px.bar(
    industry_val,
    x="Valuation",
    y="Industry",
    orientation='h',
    title=f"Total Valuation by Industry in {country}",
    labels={"Valuation": "Total Valuation (Billion USD)"},
    color="Valuation",
    color_continuous_scale="viridis"
)

fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()


In [None]:
import pandas as pd
import plotly.express as px

# Filter for a specific country
country = "United States"
country_df = basic_su_df[basic_su_df["Country"] == country]
industry_year_val = country_df.groupby(["Year", "Industry"])["Valuation"].sum().reset_index()

fig = px.treemap(
    industry_year_val,
    path=["Industry"],
    values="Valuation",
    color="Valuation",
    color_continuous_scale="viridis",
    title=f"Industry Valuation Over Time in {country}",
    #animation_frame="Year"
)

fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))
fig.show()

In [None]:
filename = "Unicorn_Startups_Per_Country.csv"
preprocessed_su_df = pd.read_csv(f"../data/preprocessed/{filename}")
preprocessed_su_df.head()


In [None]:
filename = "QOL.csv"
preprocessed_qol_df = pd.read_csv(f"../data/preprocessed/{filename}")
preprocessed_qol_df.head()

In [None]:
merged_df = pd.merge(preprocessed_qol_df, preprocessed_su_df, on=["Country", "Year", "ISO3"])
merged_df['N_Unicorns'] = merged_df['N_Unicorns'].apply(lambda x: round(x)).astype(int)
# merged_df = merged_df[merged_df['n_unicorns'] > 0]
# merged_df = merged_df[merged_df['total_val'] > 0]
merged_df.head()

In [None]:
def country_to_iso3(name):
    try: return pycountry.countries.lookup(name).alpha_3
    except: return None

In [None]:
def normalize_country(name):
    name = str(name).strip()
    name = name.replace('U.S.', 'United States').replace('USA', 'United States')
    return name.title()

In [None]:
world = gpd.read_file("../data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp").to_crs(4326)
print(world.columns)

In [None]:
import pyproj

countries_gdf = world[['ISO_A3','geometry']].rename(columns={'ISO_A3':'ISO3'})

unicorn_agg = (
    merged_df
      .groupby(["ISO3", "Country"], as_index=False)
      .agg(
          n_unicorns = ("ISO3", "size"),      # count
          total_val  = ("Total_Val", "sum")   # sum
      )
)

unicorn_agg_geo = pd.merge(unicorn_agg, countries_gdf, on='ISO3', how='left')
unicorn_agg_gdf = gpd.GeoDataFrame(unicorn_agg_geo, geometry='geometry').to_crs(4326)
unicorn_agg_gdf.info()

In [None]:
merged_df_new = pd.merge(preprocessed_qol_df, preprocessed_su_df, on=["Country", "Year", "ISO3"])
merged_df_new = pd.merge(merged_df_new, countries_gdf, on=[ "ISO3"])
merged_df_new = merged_df_new.rename(columns={"geometry": "Country_Geom"})
merged_df_new.info()





In [None]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler

# Define metrics and year
metric_1 = "Purchasing_Power_Index"
metric_2 = "N_Unicorns_Cumulative"
year = 2021

# Filter data based on year
df = merged_df_new[merged_df_new["Year"] == year].copy()
df = df[df["Country_Geom"].notna()].reset_index(drop=True)

# Create GeoDataFrame - Handle the geometry properly
# Check if Country_Geom is already a geometry object
if hasattr(df["Country_Geom"].iloc[0], 'geom_type'):
    # Already a geometry object - use directly
    gdf = gpd.GeoDataFrame(df, geometry="Country_Geom", crs="EPSG:4326")
else:
    # It's a WKT string - convert to geometry
    df["geometry"] = df["Country_Geom"].apply(wkt.loads)
    gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")

# Simplify geometries for better performance
gdf["geometry"] = gdf.geometry.simplify(tolerance=0.05, preserve_topology=True)

# Normalize the metric for color mapping
scaler = MinMaxScaler((0, 1))
gdf["scaled_metric"] = scaler.fit_transform(gdf[[metric_1]])

# Create choropleth map using built-in GeoJSON conversion
fig = px.choropleth(
    gdf,
    geojson=gdf.geometry.__geo_interface__,
    locations=gdf.index,
    color="scaled_metric",
    hover_name="Country",
    hover_data={metric_2: True, metric_1: True, "scaled_metric": False},
    color_continuous_scale="YlOrRd",
    labels={metric_1: metric_1, metric_2: metric_2, "scaled_metric": metric_1},
)

# Update layout for better appearance
fig.update_layout(
    margin=dict(l=0, r=0, t=0, b=0),
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type='equirectangular'
    )
)

# Display the map
fig.show()

In [None]:
gdf.head(1)

In [None]:
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
import geopandas as gpd
from shapely import wkt
from sklearn.preprocessing import MinMaxScaler

# metric_1 = "Purchasing_Power_Index"
# metric_2 = "Total_Val"

# List of selectable metrics
metric_options = [
    "Purchasing_Power_Index",
    "GDP_Per_Capita",
    "Total_Val",
    "Quality_of_Life_Index", 
    "Cost_of_Living_Index", 
    "Property_Price_to_Income_Ratio", 
    "N_Unicorns",
    "N_Unicorns_Cumulative",
    "Total_Val_Cumulative"
]
country_options = [{"label": country, "value": country} for country in merged_df_new["Country"].unique()]

available_metrics = [ "GDP_Per_Capita", "Cost_of_Living_Index", "Quality_of_Life_Index"]
available_metrics2 = ["N_Unicorns_Cumulative", "Total_Val"]

# Precompute KPIs
total_unicorns = merged_df_new['N_Unicorns'].sum().max()
highest_valuation_country = merged_df_new.groupby('Country')['Total_Val'].sum().idxmax()
highest_valuation_value = merged_df_new.groupby('Country')['Total_Val'].sum().max()
country_most_unicorns = merged_df_new.groupby('Country')['N_Unicorns'].sum().idxmax()
clicked_countries = ["United States"]

# App setup
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Global Unicorn Dashboard", style={
        "textAlign": "center", "marginBottom": "8px", "fontFamily": "Arial", "color": "#333"
    }),
    
    # KPI section - made more compact
    html.Div([
        html.H4(f"Total Companies: {total_unicorns}", style={"color": "#191061", "display": "inline-block", "marginRight": "15px"}),
        html.H4(f"Highest Valuation: {highest_valuation_country} (${highest_valuation_value:,.2f}B)", style={"color": "#026628", "display": "inline-block", "marginRight": "15px"}),
        html.H4(f"Most Unicorns: {country_most_unicorns}", style={"color": "#dc3545", "display": "inline-block"})
    ], style={"textAlign": "center", "marginBottom": "5px", "fontFamily": "Arial", "color": "#333", "whiteSpace": "nowrap"}),
    
   
    html.Div([
        # Left panel - Map and line chart
        html.Div([
            html.Label("Map Color Metric:", style={"fontSize": "14px"}),
            dcc.Dropdown(
                id='metric-1-dropdown',
                options=[{"label": m.replace("_", " "), "value": m} for m in metric_options],
                value='Total_Val',
                style={"width": "100%", "marginBottom": "10px"}
            ),
            dcc.Graph(id="world-map", config={"clickmode": "event+select"}, style={
                "height": "45vh", "marginBottom": "10px", "boxShadow": "0 2px 8px rgba(0,0,0,0.1)"
            }),
            dcc.Slider(
                id='year-slider',
                min=merged_df_new['Year'].min(),
                max=merged_df_new['Year'].max(),
                step=1,
                value=merged_df_new['Year'].min(),
                marks={str(year): str(year) for year in sorted(merged_df_new['Year'].unique())},
                tooltip={"placement": "bottom", "always_visible": True}
            ),
            dcc.Graph(id="fig_bar", style={
                "height": "22vh", "boxShadow": "0 2px 8px rgba(0,0,0,0.1)"
            }),
            # dcc.Graph(id="fig_line", style={
            #     "height": "23vh", "boxShadow": "0 2px 8px rgba(0,0,0,0.1)"
            # }),
        ], style={"width": "57%", "padding": "1px", "backgroundColor": "#fafafa"}),
        
        # Right panel
        html.Div([
            html.Div([
                html.Label("Metric 1 (bubble size):", style={"fontSize": "14px"}),
                dcc.Dropdown(
                    id="metric-dropdown1",
                    options=[{"label": m.replace('_', ' '), "value": m} for m in metric_options],
                    value="N_Unicorns_Cumulative"
                )
            ], style={"width": "45%", "display": "inline-block"}),
            
            html.Div([
                html.Label("Metric 2 (y axis):", style={"fontSize": "14px"}),
                dcc.Dropdown(
                    id="metric-dropdown",
                    options=[{"label": m.replace('_', ' '), "value": m} for m in metric_options],
                    value="GDP_Per_Capita"
                )
            ], style={"width": "45%", "display": "inline-block"}),
            
            # First graph (bubble chart)
    html.Div([
        dcc.Graph(id="bubble-plot", style={
            "height": "25vh", "marginBottom": "5px", "boxShadow": "0 2px 8px rgba(0,0,0,0.1)"
        })
    ]),
    
    # Second graph
    #html.Div([
    #        dcc.Graph(id="industry-bar-chart", style={
    #            "height": "23vh", "marginBottom": "5px", "boxShadow": "0 2px 8px rgba(0,0,0,0.1)"
    #        })
    #    ]),
    
    # Third graph
    #html.Div([
    #    dcc.Graph(id="valuation-trend", style={
    #        "height": "25vh", "boxShadow": "0 2px 8px rgba(0,0,0,0.1)"
    #    })
    #])
], style={"width": "40%", "padding": "5px", "backgroundColor": "#ffffff"})
    ], style={
        "display": "flex", 
        "gap": "1px", 
        "justifyContent": "space-between", 
        "margin": "0 auto", 
        "maxWidth": "98%"  
    }),
])

# Callback to update map and bar chart by year
@app.callback(
    [Output("world-map", "figure"),
     Output("fig_bar", "figure")],
    [Input("year-slider", "value"),
     Input("metric-1-dropdown", "value")]
)
def update_dashboard(selected_year, selected_metric):
    # Filter data for selected year
    df = merged_df_new[merged_df_new["Year"] == selected_year].copy()
    df = df[df["Country_Geom"].notna()].reset_index(drop=True)
    
    # Fix for MultiPolygon JSON serialization issue
    # Convert geometry objects to GeoJSON-compatible format
    if hasattr(df["Country_Geom"].iloc[0], 'geom_type'):
        # Already a geometry object - use directly
        gdf = gpd.GeoDataFrame(df, geometry="Country_Geom", crs="EPSG:4326")
    else:
        # It's a WKT string - convert to geometry
        df["geometry"] = df["Country_Geom"].apply(wkt.loads)
        gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
    
    # Simplify geometries for better performance
    gdf["geometry"] = gdf.geometry.simplify(tolerance=0.05, preserve_topology=True)
    
    # Add color metric
    gdf["color_metric"] = gdf[selected_metric]
    
    # Convert GeoDataFrame to a format that can be JSON serialized
    # Use __geo_interface__ to make shapes JSON serializable
    geojson = {
        "type": "FeatureCollection",
        "features": [
            {
                "type": "Feature",
                "id": i,
                "properties": {
                    "Country": row["Country"],
                    "color_metric": row["color_metric"],
                    selected_metric: row[selected_metric],
                    "Total_Val": row["Total_Val"]
                },
                "geometry": row["geometry"].__geo_interface__
            }
            for i, row in gdf.iterrows()
        ]
    }
    
    # Create choropleth map
    fig_map = px.choropleth_map(
        gdf,
        geojson=geojson,
        locations=gdf.index,  # Use index as location key
        featureidkey="id",    # Use id as the feature key in GeoJSON
        color="color_metric",
        hover_name="Country",
        hover_data={selected_metric: True, "Total_Val": True},
        color_continuous_scale="YlOrRd",
        map_style="carto-positron",
        zoom=0.6,
        center={"lat": 20, "lon": 0},
        opacity=0.8,
        labels={"color_metric": selected_metric.replace("_", " ")}
    )

    hover_label = selected_metric.replace("_", " ")

    fig_map.update_traces(
        hovertemplate="%{hovertext}<br>" +
                    f"{hover_label}: " + "%{customdata[0]:,.2f}" + "B<br>",
        marker=dict(opacity=0.85),
        unselected=dict(marker=dict(opacity=0.85)),
        selected=dict(marker=dict(opacity=1.0))
    )

    fig_map.update_layout(
        margin=dict(l=0, r=10, t=0, b=0),
        mapbox=dict(
            zoom=0.65, 
            center=dict(lat=20, lon=0), 
            style="carto-positron"
        ),
        dragmode='zoom',
        modebar_add=['pan', 'select', 'lasso2d', 'zoom', 'zoomIn', 'zoomOut', 'resetScale'],
        uirevision='constant',
        coloraxis_colorbar=dict(
            thickness=15,
            len=0.75,
            x=0.99,
            xanchor='left',
            y=0.5,
            yanchor='middle',
            title=selected_metric.replace("_", " "),
            title_side='top'
        )
    )
    
    # Bar chart for top 10 countries by total valuation in selected year
    top_counts = df.groupby('Country').agg({selected_metric: 'sum'}).reset_index()
    top_counts['Country'] = top_counts['Country'].replace({'United Arab Emirates': 'UAE'})
    top_counts = top_counts[top_counts[selected_metric] > 0]
    top_counts = top_counts.sort_values(by=selected_metric, ascending=False).head(10).reset_index(drop=True)

    # Generate red-to-yellow RGB gradient colors
    colors = [
        f'rgb({int(128 + (127 * i / 9))}, {int(255 * i / 9)}, 0)'
        for i in range(len(top_counts))
    ]

    # Use go.Bar for custom colors
    fig_bar = go.Figure(
        data=[
            go.Bar(
                x=top_counts['Country'],
                y=top_counts[selected_metric],
                marker_color=colors
            )
        ]
    )

    fig_bar.update_layout(
        title=f'Top 10 Countries by Total Valuation in {selected_year}',
        margin=dict(t=30, l=20, r=20, b=20),
        plot_bgcolor='#f9f9f9',
        paper_bgcolor='#f9f9f9',
        font=dict(family='Arial', size=12),
        xaxis_title='Country',
        yaxis_title=selected_metric.replace("_", " ")
    )

    return fig_map, fig_bar


# Callback for bubble plot
@app.callback(
    Output("bubble-plot", "figure"),
    Input("metric-dropdown1", "value"),
    Input("metric-dropdown", "value"),
    Input("world-map", "clickData"),
)
def update_bubble_plot(metric_1, metric_2, clickData):
    if len(clicked_countries) > 0:
        # Extract countries from map selection
        #selected_countries = []
        #   if selectedData and "points" in selectedData:
        #        selected_countries = list({pt["hovertext"] for pt in selectedData["points"]})
        selected_countries = clicked_countries
        # Fallback if no map country selected
        if not selected_countries:
            selected_countries = ["United States"]
        df = merged_df[merged_df["Country"].isin(selected_countries)].copy()
        data = np.array(df[metric_2]).reshape((-1,1))
        scaler = MinMaxScaler(feature_range=(0,1))
        scaled = scaler.fit_transform(data).flatten()
        df["scaled_metric"] = scaled 
        metric_1_space = None
        metric_2_space = None
        if metric_1 is not None:
            metric_1_space = metric_1.replace("_", " ")
        if metric_2 is not None:
            metric_2_space = metric_2.replace("_", " ")
            
        fig = px.scatter(
            df,
            x="Year",
            y=metric_1,
            size="scaled_metric",
            color="Country",
            hover_name="Country",        
            hover_data={                 
                "Year": False,
                metric_2: True,
                metric_1: False,
                "scaled_metric": False
            },
            title=f"{metric_1_space} & {metric_2_space}"
        )
        fig.update_layout(template="plotly_white", width=700, height=500)
        
        return fig
    else:
        return None
        
# Callback for industry bar chart
"""@app.callback(
    Output("industry-bar-chart", "figure"),
    Input("world-map", "clickData")
)
def update_industry_bar_chart(clickData):
    # Default to United States
    selected_country = "United States"

    # Extract full country name from clickData hovertext
    if clickData and "points" in clickData:
        selected_country = clickData["points"][0].get("hovertext", selected_country)


    # Filter industry data for the selected country
    industry_df = basic_su_df[basic_su_df["Country"] == selected_country]

    # Group by industry and calculate total valuation
    industry_val = (
        industry_df.groupby("Industry")["Valuation"]
        .sum()
        .sort_values(ascending=False)
        .reset_index()
        .head(10)
    )

    # Create horizontal bar chart
    fig_ind = px.bar(
        industry_val,
        x="Valuation",
        y="Industry",
        title=f"Top Industries in {selected_country}",
        labels={"Valuation": "Total Valuation (Billion USD)"},
        color="Valuation",
        color_continuous_scale="viridis"
    )

    fig_ind.update_layout(
        yaxis=dict(categoryorder="total ascending"),
        height=250,
        margin=dict(t=40, l=30, r=30, b=30)
    )

    return fig_ind"""

# Callback for valuation trend (this was missing in the original)
"""@app.callback(
    Output("valuation-trend", "figure"),
    [Input("world-map", "clickData")]
)
def update_valuation_trend(clickData):
    # Default to United States
    selected_country = "United States"
    
    # Extract selected country from clickData
    if clickData and "points" in clickData:
        selected_country = clickData["points"][0].get("hovertext", selected_country)
    
    # Filter data for the selected country
    country_data = merged_df_new[merged_df_new["Country"] == selected_country]
    
    # Create line chart for valuation trend over years
    fig_trend = px.line(
        country_data,
        x="Year",
        y="Total_Val",
        title=f"Valuation Trend: {selected_country}",
        markers=True,
        labels={"Total_Val": "Total Valuation (B USD)", "Year": "Year"}
    )
    
    fig_trend.update_traces(
        line=dict(width=3),
        marker=dict(size=8)
    )
    
    fig_trend.update_layout(
        height=200,
        margin=dict(t=40, l=30, r=30, b=30),
        xaxis=dict(tickmode='linear', dtick=1)
    )
    
    return fig_trend"""

@app.callback(
    Output("world-map", "clickData", allow_duplicate=True),
    Input("world-map", "clickData"),
    prevent_initial_call=True
)
def handle_map_click(clickData):
    if clickData and clickData.get("points"):
        country = clickData["points"][0]["hovertext"]
        if country in clicked_countries:
            clicked_countries.pop(clicked_countries.index(country))
        else:    
            clicked_countries.append(country)
    else:
        clicked_countries.clear()
    
    
# Run app
if __name__ == "__main__":
    app.run(port=8053, mode="inline")