#Setup, Imports and Theme setup

In [146]:
!pip install -q pycountry
!pip install -q dash
!pip install -q dash_bootstrap_components
!pip install -q dash_core_components
!pip install -q dash_html_components
!pip install -q dash_table
!pip install -q plotly
!pip install -q dash
!pip install -q jupyter_dash
!pip install -q pycountry-convert
!pip install -q countryinfo

In [None]:
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from dash import Dash, dcc, html
import dash_bootstrap_components as dbc
from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output, State

import pandas as pd
import numpy as np

from countryinfo import CountryInfo
import pycountry
import pycountry_convert as pc

In [299]:
#ref: https://plotly.com/python/templates/#saving-and-distributing-custom-themes
#ref: https://plotly.com/python/discrete-color/#color-sequences-in-plotly-express
#ref: https://plotly.com/python/builtin-colorscales/
pio.templates["custome_colors"] = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='white',
        plot_bgcolor='rgba(0,0,0,0)',
        colorway= px.colors.qualitative.Set3 + px.colors.qualitative.Pastel,
        colorscale={
            'diverging': px.colors.diverging.Tropic,
            'sequential': px.colors.sequential.Brwnyl,
            'sequentialminus': px.colors.sequential.Burg,
            },
        geo={
            'bgcolor': 'white',
            'lakecolor': 'white',
            'landcolor':'white',# '#E5ECF6',
            'showlakes': True,
            'showland': True,
            'subunitcolor': 'white'
            },
   
    )
)
pio.templates.default = 'ggplot2+custome_colors'

#Code Start

In [223]:
df = pd.read_csv('Unicorn_Companies.csv')
df['comp_n'] = 1
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1037 entries, 0 to 1036
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Company            1037 non-null   object
 1   Valuation ($B)     1037 non-null   object
 2   Date Joined        1037 non-null   object
 3   Country            1037 non-null   object
 4   City               1037 non-null   object
 5   Industry           1037 non-null   object
 6   Select Inverstors  1037 non-null   object
 7   Founded Year       1037 non-null   object
 8   Total Raised       1037 non-null   object
 9   Financial Stage    1037 non-null   object
 10  Investors Count    1037 non-null   object
 11  Deal Terms         1037 non-null   object
 12  Portfolio Exits    1037 non-null   object
 13  comp_n             1037 non-null   int64 
dtypes: int64(1), object(13)
memory usage: 113.5+ KB


In [224]:
df.head()

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n
0,Bytedance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",2012,$7.44B,IPO,28,8,5.0,1
1,SpaceX,$100.3,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2002,$6.874B,,29,12,,1
2,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",2010,$2.901B,Asset,39,12,1.0,1
3,Klarna,$45.6,12/12/2011,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita...",2005,$3.472B,Acquired,56,13,1.0,1
4,Epic Games,$42,10/26/2018,United States,Cary,Other,"Tencent Holdings, KKR, Smash Ventures",1991,$4.377B,Acquired,25,5,2.0,1


###Data cleaning

In [225]:
def parse_total_raised(money):
    new_money = money.strip('$')
    if new_money == 'None':
        return 0
    elif 'M' in new_money:
        return float(new_money.strip('M'))
    elif 'B' in new_money:
        return float(new_money.strip('B'))*1000
    elif 'K' in new_money:
        return float(new_money.strip('K'))*0.001
    
df['Valuation ($B)'] = df['Valuation ($B)'].str.strip('$').astype('float64')
df['Total Raised'] = df['Total Raised'].apply(parse_total_raised)

In [226]:
df['Date Joined'] = df['Date Joined'].astype('datetime64')
df['Founded Year'] = df['Founded Year'].apply(lambda x: int(x) if x != 'None' else None)
df['Investors Count'] = df['Investors Count'].apply(lambda x: int(x) if x != 'None' else None)

In [227]:
df['Deal Terms'] = pd.to_numeric(df['Deal Terms'], 'coerce')

####Cleaning industry

In [228]:
df['Industry'] = df['Industry'].str.replace('Finttech', 'Fintech')
sorted(df['Industry'].unique())

['500 Global, Rakuten Ventures, Golden Gate Ventures',
 'Andreessen Horowitz, DST Global, IDG Capital',
 'Artificial Intelligence',
 'Artificial intelligence',
 'Auto & transportation',
 "B Capital Group, Monk's Hill Ventures, Dynamic Parcel Distribution",
 'Consumer & retail',
 'Cybersecurity',
 'Data management & analytics',
 'Dragonfly Captial, Qiming Venture Partners, DST Global',
 'E-commerce & direct-to-consumer',
 'Edtech',
 'Fintech',
 'Hardware',
 'Health',
 'Hopu Investment Management, Boyu Capital, DC Thomson Ventures',
 'Internet software & services',
 'Jungle Ventures, Accel, Venture Highway',
 'Kuang-Chi',
 'Mobile & telecommunications',
 'Mundi Ventures, Doqling Capital Partners, Activant Capital',
 'Other',
 'Sequoia Capital China, ING, Alibaba Entrepreneurs Fund',
 'Sequoia Capital China, Shunwei Capital Partners, Qualgro',
 'Sequoia Capital, Thoma Bravo, Softbank',
 'SingTel Innov8, Alpha JWC Ventures, Golden Gate Ventures',
 'Supply chain, logistics, & delivery',
 'T

In [229]:
industry = df.groupby('Industry')['Company'].count().reset_index().sort_values('Company')
odd_industries = industry[industry['Company'] <= 1].Industry
data_needs_shifting = df[df.Industry.isin(odd_industries)]
data_needs_shifting

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n
10,FTX,32.0,2021-07-20,Bahamas,Fintech,"Sequoia Capital, Thoma Bravo, Softbank",,2018.0,1829.0,Acq,40.0,3.0,1.0,1
217,HyalRoute,3.5,2020-05-26,Singapore,Mobile & telecommunications,Kuang-Chi,,2015.0,263.12,,1.0,1.0,,1
292,Amber Group,3.0,2021-06-21,Hong Kong,Fintech,"Tiger Global Management, Tiger Brokers, DCM Ve...",,2015.0,328.0,,18.0,3.0,,1
318,Moglix,2.6,2021-05-17,Singapore,E-commerce & direct-to-consumer,"Jungle Ventures, Accel, Venture Highway",,2015.0,470.71,,16.0,6.0,,1
438,Advance Intelligence Group,2.0,2021-09-23,Singapore,Artificial intelligence,"Vision Plus Capital, GSR Ventures, ZhenFund",,2016.0,536.0,,13.0,1.0,,1
639,Trax,1.3,2019-07-22,Singapore,Artificial intelligence,"Hopu Investment Management, Boyu Capital, DC T...",,2010.0,1013.0,,10.0,5.0,,1
757,Carousell,1.1,2021-09-15,Singapore,E-commerce & direct-to-consumer,"500 Global, Rakuten Ventures, Golden Gate Vent...",,2012.0,443.0,,16.0,7.0,,1
814,WeLab,1.0,2017-11-08,Hong Kong,Fintech,"Sequoia Capital China, ING, Alibaba Entreprene...",,2013.0,896.0,,13.0,3.0,,1
882,PatSnap,1.0,2021-03-16,Singapore,Internet software & services,"Sequoia Capital China, Shunwei Capital Partner...",,2007.0,352.32,,10.0,3.0,,1
911,Matrixport,1.05,2021-06-01,Singapore,Fintech,"Dragonfly Captial, Qiming Venture Partners, DS...",,,126.55,,22.0,1.0,,1


In [230]:
def get_city(country):
    if country == 'Bahamas':
        return 'Nassau'
    # elif country == 'United States':
    #     return 'San Francisco'
    else:
        return CountryInfo(country).capital()
data_needs_shifting[['Industry', 'Select Inverstors']] = data_needs_shifting[['City', 'Industry']]
data_needs_shifting['City'] = data_needs_shifting['Country'].apply(get_city)
data_needs_shifting



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n
10,FTX,32.0,2021-07-20,Bahamas,Nassau,Fintech,"Sequoia Capital, Thoma Bravo, Softbank",2018.0,1829.0,Acq,40.0,3.0,1.0,1
217,HyalRoute,3.5,2020-05-26,Singapore,Singapore,Mobile & telecommunications,Kuang-Chi,2015.0,263.12,,1.0,1.0,,1
292,Amber Group,3.0,2021-06-21,Hong Kong,City of Victoria,Fintech,"Tiger Global Management, Tiger Brokers, DCM Ve...",2015.0,328.0,,18.0,3.0,,1
318,Moglix,2.6,2021-05-17,Singapore,Singapore,E-commerce & direct-to-consumer,"Jungle Ventures, Accel, Venture Highway",2015.0,470.71,,16.0,6.0,,1
438,Advance Intelligence Group,2.0,2021-09-23,Singapore,Singapore,Artificial intelligence,"Vision Plus Capital, GSR Ventures, ZhenFund",2016.0,536.0,,13.0,1.0,,1
639,Trax,1.3,2019-07-22,Singapore,Singapore,Artificial intelligence,"Hopu Investment Management, Boyu Capital, DC T...",2010.0,1013.0,,10.0,5.0,,1
757,Carousell,1.1,2021-09-15,Singapore,Singapore,E-commerce & direct-to-consumer,"500 Global, Rakuten Ventures, Golden Gate Vent...",2012.0,443.0,,16.0,7.0,,1
814,WeLab,1.0,2017-11-08,Hong Kong,City of Victoria,Fintech,"Sequoia Capital China, ING, Alibaba Entreprene...",2013.0,896.0,,13.0,3.0,,1
882,PatSnap,1.0,2021-03-16,Singapore,Singapore,Internet software & services,"Sequoia Capital China, Shunwei Capital Partner...",2007.0,352.32,,10.0,3.0,,1
911,Matrixport,1.05,2021-06-01,Singapore,Singapore,Fintech,"Dragonfly Captial, Qiming Venture Partners, DS...",,126.55,,22.0,1.0,,1


In [231]:
mask = df.Company.isin(data_needs_shifting.Company.values)
df[mask] = data_needs_shifting

In [232]:
df['Industry'].unique()

array(['Artificial intelligence', 'Other', 'Fintech',
       'Internet software & services',
       'Supply chain, logistics, & delivery',
       'Data management & analytics', 'E-commerce & direct-to-consumer',
       'Edtech', 'Hardware', 'Consumer & retail', 'Health',
       'Auto & transportation', 'Cybersecurity',
       'Mobile & telecommunications', 'Travel', 'Artificial Intelligence'],
      dtype=object)

In [233]:
mapper = {
 'Artificial Intelligence': 'AI',
 'Other': 'Other',
 'Fintech': 'FinTech',
 'Internet Software & Services': 'Internet Software',
 'Supply Chain, Logistics, & Delivery': 'Supply Chain',
 'Data Management & Analytics': 'Data Management',
 'E-Commerce & Direct-To-Consumer': 'E-Commerce',
 'Edtech': 'EduTech', 
 'Hardware': 'Hardware', 
 'Consumer & Retail': 'Retail', 
 'Health': 'Health',
 'Auto & Transportation': 'Automotives',
 'Cybersecurity': 'Cybersecurity',
 'Mobile & Telecommunications':'Telecomm', 
 'Travel':'Travel',
}
df['Industry'] = df['Industry'].str.title().map(mapper)

In [234]:
df.isna().sum()

Company               0
Valuation ($B)        0
Date Joined           0
Country               0
City                  0
Industry              0
Select Inverstors     0
Founded Year         43
Total Raised          0
Financial Stage       0
Investors Count       1
Deal Terms           29
Portfolio Exits       0
comp_n                0
dtype: int64

In [235]:
df.head()

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n
0,Bytedance,140.0,2017-04-07,China,Beijing,AI,"Sequoia Capital China, SIG Asia Investments, S...",2012.0,7440.0,IPO,28.0,8.0,5.0,1
1,SpaceX,100.3,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2002.0,6874.0,,29.0,12.0,,1
2,Stripe,95.0,2014-01-23,United States,San Francisco,FinTech,"Khosla Ventures, LowercaseCapital, capitalG",2010.0,2901.0,Asset,39.0,12.0,1.0,1
3,Klarna,45.6,2011-12-12,Sweden,Stockholm,FinTech,"Institutional Venture Partners, Sequoia Capita...",2005.0,3472.0,Acquired,56.0,13.0,1.0,1
4,Epic Games,42.0,2018-10-26,United States,Cary,Other,"Tencent Holdings, KKR, Smash Ventures",1991.0,4377.0,Acquired,25.0,5.0,2.0,1


In [236]:
df = df.drop(df[ df.Country == 'Israel'].index, axis=0)

####Creating investor df

In [237]:
values = df['Industry'].value_counts()
# df[df['Select Inverstors'].isin(values[values <= 1].index.values)]
dic = {}
def build_dic(investors):
    for investor in investors:
        try:
            dic[investor] += 1
        except:
            dic[investor] = 1

df['Select Inverstors'].str.split(', ').apply(build_dic)
investors = pd.DataFrame(dic, index=[0])
investors = investors.transpose().reset_index().sort_values(0, ascending=False)
investors.columns = ['investors', 'count']

In [238]:
investors.head()

Unnamed: 0,investors,count
42,Accel,58
18,Tiger Global Management,51
0,Sequoia Capital China,46
23,Andreessen Horowitz,45
11,Sequoia Capital,42


In [239]:
investors['investors'] = investors['investors'].str.strip()
investors.sort_values('investors').head(100).values

array([['.406 Ventures', 1],
       ['01 Advisors', 1],
       ['01 Advisors', 1],
       ['10T Fund', 1],
       ['14W', 2],
       ['1955 Capital', 1],
       ['360 Capital Partners', 1],
       ['3G Capital Management', 1],
       ['3L', 2],
       ['3i Group', 1],
       ['3one4 Capital Partners', 1],
       ['468 Capital', 1],
       ['500 Global', 2],
       ['500 Startups', 3],
       ['58.com', 2],
       ['5Y Capital', 3],
       ['83North', 4],
       ['8VC', 8],
       ['A&E Television Networks', 1],
       ['A&NN', 1],
       ['A91 Partners', 1],
       ['ACE & Company', 1],
       ['AME Cloud Ventures', 3],
       ['ARCH Venture Partners', 2],
       ['AU21', 1],
       ['AXA Venture Partners', 1],
       ['Accel', 58],
       ['Accel India', 1],
       ['Accel Partners', 5],
       ['Accelm Scania Growth Capital', 1],
       ['Access Industries', 4],
       ['Accomplice', 4],
       ['Acero Capital', 1],
       ['Activant Capital', 3],
       ['Activant Capital Group', 1]

In [240]:
dic = {x: 0 for x in df['Industry'].unique()}
keys = df['Industry'].unique()
temp = dic.copy()
investors_ind = investors.copy()
def split_and_strip(text):
    texts = text.split(',')
    text_stripped = [x.strip() for x in texts]
    return text_stripped
     
for ind, row in investors.iterrows():

    filter = df['Select Inverstors'].apply(lambda x: row['investors'] in split_and_strip(x))
    rr = df[filter]['Industry'].value_counts()
    res = rr.to_dict()
    temp = dic.copy()
    temp.update(res)
    investors_ind.loc[ind, keys] = list(temp.values())

####Country cleanup

In [241]:
country_city_company = df.groupby(['Country', 'City'])[['Company']].count().reset_index().sort_values('Company')
country_city_company[country_city_company['Company'] > 1].tail(20)

Unnamed: 0,Country,City,Company
76,India,Gurgaon,8
108,South Korea,Seoul,10
216,United States,Redwood City,10
224,United States,San Mateo,10
17,Brazil,Sao Paulo,11
103,Singapore,Singapore,12
35,China,Hangzhou,14
206,United States,Palo Alto,16
154,United States,Chicago,16
141,United States,Boston,16


In [242]:
def find_country (country_name):
    try:
        return pycountry.countries.get(name=country_name).alpha_3
    except:
        if country_name == 'South Korea':
            return 'KOR'
        elif country_name == 'Vietnam':
            return 'VNM'
        elif country_name == 'Czech Republic':
            return 'CZE'
        print(country_name)
        return ("not founded")
df['iso_alpha'] = df['Country'].apply(find_country)
df

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n,iso_alpha
0,Bytedance,140.0,2017-04-07,China,Beijing,AI,"Sequoia Capital China, SIG Asia Investments, S...",2012.0,7440.00,IPO,28.0,8.0,5,1,CHN
1,SpaceX,100.3,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2002.0,6874.00,,29.0,12.0,,1,USA
2,Stripe,95.0,2014-01-23,United States,San Francisco,FinTech,"Khosla Ventures, LowercaseCapital, capitalG",2010.0,2901.00,Asset,39.0,12.0,1,1,USA
3,Klarna,45.6,2011-12-12,Sweden,Stockholm,FinTech,"Institutional Venture Partners, Sequoia Capita...",2005.0,3472.00,Acquired,56.0,13.0,1,1,SWE
4,Epic Games,42.0,2018-10-26,United States,Cary,Other,"Tencent Holdings, KKR, Smash Ventures",1991.0,4377.00,Acquired,25.0,5.0,2,1,USA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1032,Timescale,1.0,2022-02-22,United States,New York,Internet Software,"New Enterprise Associates, Benchmark, Two Sigm...",2015.0,181.06,,7.0,2.0,,1,USA
1033,Scalapay,1.0,2022-02-23,Italy,Milan,FinTech,"Fasanara Capital, Tiger Global Management, Bal...",2019.0,700.00,,10.0,2.0,,1,ITA
1034,Omada Health,1.0,2022-02-23,United States,San Francisco,Health,"U.S. Venture Partners, dRx Capital, Andreessen...",2011.0,449.72,,30.0,6.0,,1,USA
1035,BlueVoyant,1.0,2022-02-23,United States,New York,Cybersecurity,"8VC, Liberty Strategic Capital, Eden Global Pa...",2017.0,525.50,,6.0,2.0,,1,USA


In [243]:
count_df = df.groupby('iso_alpha', as_index=False)[['Company']].count()
# count_df['Country'] = df['Country'].unique()
map = df[['Country', 'iso_alpha']].drop_duplicates().set_index('iso_alpha')
count_df['Country'] = map.loc[count_df['iso_alpha'], 'Country'].values
count_df

Unnamed: 0,iso_alpha,Company,Country
0,ARE,3,United Arab Emirates
1,ARG,1,Argentina
2,AUS,6,Australia
3,AUT,2,Austria
4,BEL,3,Belgium
5,BHS,1,Bahamas
6,BMU,1,Bermuda
7,BRA,16,Brazil
8,CAN,19,Canada
9,CHE,5,Switzerland


In [244]:
def country_to_continent(country_name):    
    country_alpha2 = pc.country_alpha3_to_country_alpha2(country_name)

    country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)

    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)

    return country_continent_name

df['Continent'] = df['iso_alpha'].apply(country_to_continent)

In [245]:
count_df = df.groupby('iso_alpha', as_index=False)[['Company']].count()
count_df = count_df.sort_values('Company')
# count_df['Country'] = df['Country'].unique()
map = df[['Country', 'iso_alpha']].drop_duplicates().set_index('iso_alpha')
count_df['Country'] = map.loc[count_df['iso_alpha'], 'Country'].values
fig = px.choropleth(
                    count_df, 
                    locations="iso_alpha",
                    color="Company",
                    hover_name="Country",
                )
fig.show()

####Date cleanup

In [246]:
df['Year Joined'] = df['Date Joined'].dt.year

In [247]:
# df['MS'] = df['Date Joined'].dt.strftime("%Y-%m-01")
df['MS'] = df['Date Joined'].apply(lambda x: x.replace(day=1))

#Graph code and Dash

###Year founded

In [248]:
founded_year = df[['Founded Year', 'Company']].groupby('Founded Year', as_index=False).count().drop(0, 0)
founded_year = founded_year[founded_year['Founded Year'] >= 2000]
founded_year_line = px.line(founded_year, 
             x='Founded Year', 
             y='Company', 
             # hover_data={'Company': '%d Companies were founded'}
             )
founded_year_line.show()
founded_year.head()


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only



Unnamed: 0,Founded Year,Company
14,2000.0,12
15,2001.0,7
16,2002.0,3
17,2003.0,7
18,2004.0,8


In [249]:
df[df['Founded Year'] != 0][df['Founded Year'] <= 1990]

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n,iso_alpha,Continent,Year Joined,MS
215,Otto Bock HealthCare,3.52,2017-06-24,Germany,Duderstadt,Health,EQT Partners,1919.0,815.49,,2.0,1.0,,1,DEU,Europe,2017,2017-06-01
390,Avant,2.0,2012-12-17,United States,Chicago,AI,"RRE Ventures, Tiger Global, August Capital",1973.0,1730.0,,14.0,6.0,,1,USA,North America,2012,2012-12-01
520,Promasidor Holdings,1.59,2016-11-08,South Africa,Bryanston,Retail,"IFC, Ajinomoto",1979.0,556.0,,2.0,1.0,,1,ZAF,Africa,2016,2016-11-01
609,Five Star Business Finance,1.4,2021-03-26,India,Chennai,Other,"Sequoia Capital India, Tiger Global Management...",1984.0,459.78,,7.0,4.0,,1,IND,Asia,2021,2021-03-01
774,Radius Payment Solutions,1.07,2017-11-27,United Kingdom,Crewe,FinTech,Inflexion Private Equity,1990.0,200.11,,1.0,1.0,,1,GBR,Europe,2017,2017-11-01


###Year joined

In [250]:
year_joined = df[['Year Joined', 'Company']].groupby('Year Joined', as_index=False).count()
fig = px.bar(year_joined, x='Year Joined', y='Company', log_y=True)
fig.show()

In [251]:
year_2021 = df[df['Year Joined'] == 2021]
year_2021 = year_2021[['Date Joined', 'Company']].resample('MS', on='Date Joined').count()
fig = px.bar(year_2021, x=year_2021.index, y='Company')
# fig.update_xaxes(ticklabelposition="inside top", title=None)
fig.layout.xaxis.tickvals = year_2021.index
fig.layout.xaxis.tickformat = '%b'
fig.show()
year_2021

Unnamed: 0_level_0,Date Joined,Company
Date Joined,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-01,33,33
2021-02-01,20,20
2021-03-01,48,48
2021-04-01,44,44
2021-05-01,44,44
2021-06-01,51,51
2021-07-01,50,50
2021-08-01,31,31
2021-09-01,52,52
2021-10-01,43,43


In [252]:
px.bar(
    df.groupby('Industry', as_index=False).sum().sort_values('comp_n'),
    x='Industry',
    y='comp_n',
)

###Industry per continent

In [253]:
industry_country_count = df.groupby(['Continent', 'Country', 'Industry'], as_index=False).count()[['Continent', 'Country', 'Industry', 'Company']]

In [254]:
industry_per_unicorn = px.bar(
    # df,
    df.groupby(['Industry', 'Continent'], as_index=False).count().sort_values('Company', ascending=False), 
    y='Industry', 
    x='Company', 
    color='Continent',
    # color='Country',
    orientation='h', 
    labels={'Company':'Unicorns count'},
    title='Unicorn\'s Continents Main Industries',
)
industry_per_unicorn = industry_per_unicorn.update_layout(
    barmode='stack', 
    yaxis = dict(
        tickmode='array',
        # tickangle=45,
        categoryorder='total descending'
    )
)
industry_per_unicorn

In [255]:
company_per_country_hbar = px.bar(
    count_df.tail(10), 
    x="Company",
    y="Country",
    hover_name="Country",
    orientation='h',
    title='Top 10 unicorn producing countries',
)

In [256]:
year_joined_count = px.bar(year_joined, x='Year Joined', y='Company')

In [257]:
country_city_company = df.groupby(['Continent', 'Country'], as_index=False).count()
px.sunburst(
    # country_city_company[country_city_company['City'].isin(temp[temp['Country'] == 1]['City'])][country_city_company['Company']>3],
    country_city_company,
    path=['Continent', 'Country'],
    values='Company',
)

###Cholopeth

In [258]:
company_per_country_choropleth = px.choropleth(
    count_df, 
    locations="iso_alpha",
    color="Company",
    hover_name="Country",
    title='Unicorn density map',
)
company_per_country_choropleth


###Unicorns per year


In [259]:
# date_series = df.groupby(['MS', 'Continent', 'Industry'])['Company'].count().reset_index()
date_series = df.groupby(['MS', 'Continent'])['Company'].count().reset_index()
month_year_continent_count = px.scatter(
        date_series[date_series['MS'].dt.year>2010],
        x='MS',
        y='Company',
        color='Continent', 
        labels={'Company':'Unicorns count', 'MS': 'Year'},
        title='Number of Unicorns per year'
    )
month_year_continent_count = month_year_continent_count.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickangle=45,
        tickvals = date_series[date_series['MS'].dt.year>2010]['MS'].dt.year,
        ticktext = date_series[date_series['MS'].dt.year>2010]['MS'].dt.year,
    )
)
month_year_continent_count

###top 10 unicorns per industry

In [260]:
top_unicorns = df[['Company', 'Valuation ($B)', 'Industry']].sort_values('Valuation ($B)')
top_unicorns_bar = px.bar(
    top_unicorns.tail(15),
    y='Valuation ($B)',
    x='Company',
    # orientation='h',
    color='Industry',
    title='Top 15 Unicorns'
)
top_unicorns_bar = top_unicorns_bar.update_layout(
    barmode='stack', 
    xaxis={'categoryorder':'total ascending'}, 
    # showlegend=False,
    legend=dict(
        orientation="h",
        yanchor="top",
        title=None,
        y=0.999,
        # xanchor="top",
        # x=0.99,
    ),
    title={
        'x':0.5,
        'xanchor': 'center'
    },
    margin=dict(l=30, r=20, t=30, b=20),
)
top_unicorns_bar

### investor per industry

In [261]:
display(investors_ind.head())
melted_investors = investors_ind.melt(id_vars=['investors', 'count'])
display(melted_investors.head())
barmode = ['group', 'overlay', 'relative']
investor_per_industry_bar = px.bar(
    melted_investors.sort_values('count').tail(15*15), 
    x='investors', 
    y='value', 
    color='variable', 
    # barmode='overlay', 
    # opacity=0.7
)
investor_per_industry_bar = investor_per_industry_bar.update_layout(
    legend=dict(
        orientation="h",
        yanchor="top",
        title=None,
        y=0.999,
        # xanchor="top",
        # x=0.99,
    ),
    margin=dict(l=30, r=20, t=30, b=20),
)
investor_per_industry_bar

Unnamed: 0,investors,count,AI,Other,FinTech,Internet Software,Supply Chain,Data Management,E-Commerce,EduTech,Hardware,Retail,Health,Automotives,Cybersecurity,Telecomm,Travel
42,Accel,58,2.0,2.0,13.0,18.0,2.0,2.0,9.0,2.0,0.0,0.0,3.0,0.0,4.0,1.0,0.0
18,Tiger Global Management,51,3.0,1.0,16.0,13.0,1.0,0.0,5.0,2.0,2.0,3.0,1.0,1.0,1.0,2.0,0.0
0,Sequoia Capital China,46,9.0,2.0,3.0,5.0,3.0,0.0,9.0,5.0,3.0,2.0,1.0,1.0,0.0,2.0,1.0
23,Andreessen Horowitz,45,4.0,2.0,7.0,9.0,2.0,6.0,5.0,0.0,1.0,0.0,3.0,0.0,2.0,2.0,1.0
11,Sequoia Capital,42,1.0,1.0,8.0,10.0,2.0,4.0,5.0,0.0,3.0,0.0,4.0,1.0,1.0,2.0,0.0


Unnamed: 0,investors,count,variable,value
0,Accel,58,AI,2.0
1,Tiger Global Management,51,AI,3.0
2,Sequoia Capital China,46,AI,9.0
3,Andreessen Horowitz,45,AI,4.0
4,Sequoia Capital,42,AI,1.0


###industries pie

In [262]:
industries_pie = px.pie(
    df,
    names='Industry',
    values='comp_n',
    title='Industry portion of unicorns'
)
industries_pie

###companies's joining rate

In [263]:
df.head()

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n,iso_alpha,Continent,Year Joined,MS
0,Bytedance,140.0,2017-04-07,China,Beijing,AI,"Sequoia Capital China, SIG Asia Investments, S...",2012.0,7440.0,IPO,28.0,8.0,5.0,1,CHN,Asia,2017,2017-04-01
1,SpaceX,100.3,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2002.0,6874.0,,29.0,12.0,,1,USA,North America,2012,2012-12-01
2,Stripe,95.0,2014-01-23,United States,San Francisco,FinTech,"Khosla Ventures, LowercaseCapital, capitalG",2010.0,2901.0,Asset,39.0,12.0,1.0,1,USA,North America,2014,2014-01-01
3,Klarna,45.6,2011-12-12,Sweden,Stockholm,FinTech,"Institutional Venture Partners, Sequoia Capita...",2005.0,3472.0,Acquired,56.0,13.0,1.0,1,SWE,Europe,2011,2011-12-01
4,Epic Games,42.0,2018-10-26,United States,Cary,Other,"Tencent Holdings, KKR, Smash Ventures",1991.0,4377.0,Acquired,25.0,5.0,2.0,1,USA,North America,2018,2018-10-01


In [264]:
d=df[['Year Joined','Company']].groupby(['Year Joined'],as_index=False).count()
d


Unnamed: 0,Year Joined,Company
0,2007,1
1,2011,2
2,2012,4
3,2013,3
4,2014,13
5,2015,35
6,2016,21
7,2017,44
8,2018,102
9,2019,106


In [265]:
filterd = d[:-1]

In [266]:
founding_joining_rate = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
founding_joining_rate.add_trace(
    go.Scatter(
        name="Joined", 
        mode='lines',
        x=filterd['Year Joined'],
        y=filterd['Company'], 
        ),
    secondary_y=False,
)

founding_joining_rate.add_trace(
    go.Line(
        name="Founded", 
        x=founded_year['Founded Year'], 
        y=founded_year['Company'], 
        line=dict(color=palette[-3], width=4, dash='dot')
        ),
    secondary_y=False,
)

founding_joining_rate.update_layout(
    title_text="Company Founding Rate to Joining Rate"
)

founding_joining_rate.update_xaxes(title_text="Year")
founding_joining_rate.update_yaxes(title_text="Number of Companies")
founding_joining_rate.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [267]:
companies_joining_rate=px.line(filterd,x=['Year Joined'],y='Company',
                labels={'Company':'no of companies joined','Year Joined':'year joined in'})#, title="Companies's joining rate")
companies_joining_rate.update_layout(
     title={
            'text' : "Companies's joining rate",
            'x':0.5,
            'xanchor': 'center'},
)
companies_joining_rate.show()

###Deal terms

In [268]:
d=df[['Valuation ($B)','Deal Terms']].groupby(['Deal Terms'],as_index=False).mean()

In [269]:
deal_term_disribution=px.bar(d, x="Deal Terms",y='Valuation ($B)')
deal_term_disribution.update_layout(
     title={
            'text' : " Deal term's disribution for Unicorns",
            'x':0.5,
            'xanchor': 'center'},
)

###industries based on the Valuation

In [270]:
df.head()

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Inverstors,Founded Year,Total Raised,Financial Stage,Investors Count,Deal Terms,Portfolio Exits,comp_n,iso_alpha,Continent,Year Joined,MS
0,Bytedance,140.0,2017-04-07,China,Beijing,AI,"Sequoia Capital China, SIG Asia Investments, S...",2012.0,7440.0,IPO,28.0,8.0,5.0,1,CHN,Asia,2017,2017-04-01
1,SpaceX,100.3,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2002.0,6874.0,,29.0,12.0,,1,USA,North America,2012,2012-12-01
2,Stripe,95.0,2014-01-23,United States,San Francisco,FinTech,"Khosla Ventures, LowercaseCapital, capitalG",2010.0,2901.0,Asset,39.0,12.0,1.0,1,USA,North America,2014,2014-01-01
3,Klarna,45.6,2011-12-12,Sweden,Stockholm,FinTech,"Institutional Venture Partners, Sequoia Capita...",2005.0,3472.0,Acquired,56.0,13.0,1.0,1,SWE,Europe,2011,2011-12-01
4,Epic Games,42.0,2018-10-26,United States,Cary,Other,"Tencent Holdings, KKR, Smash Ventures",1991.0,4377.0,Acquired,25.0,5.0,2.0,1,USA,North America,2018,2018-10-01


In [271]:
d=df[['Industry','Valuation ($B)','Continent']].groupby(['Industry','Continent'],as_index=False)['Valuation ($B)'].sum()

In [272]:
d=d.sort_values('Valuation ($B)',ascending=False)

In [273]:
f=d['Valuation ($B)']<=1
d=d[~f]

In [274]:
industriesBasedOnValuation = px.bar(d, x="Industry", y="Valuation ($B)",color="Continent")
      
industriesBasedOnValuation.update_layout(barmode='stack')
#AA.show()
industriesBasedOnValuation.update_layout(
    barmode='stack', 
    xaxis={'categoryorder':'total ascending'}, 
     title={
            'text' : " Top industries based on the Valuation",
            'x':0.5,
            'xanchor': 'center'},
    yaxis = dict(
        tickmode='array',
        # tickangle=45,
        categoryorder='total descending'

    ),
     legend=dict(
        orientation="h",
        yanchor="top",
        title=None,
        y=0.999,
        # xanchor="top",
        # x=0.99,
    ),
    margin=dict(l=30, r=20, t=30, b=20),
)



##Dash

In [303]:
bg = '#D6CCC2'
cbg = '#EDEDE9'
text = '#564839'

In [276]:
def card(content, className=None):
    return dbc.Card(content, color="secondary", outline=True, className=className, style={'background':cbg})

In [325]:
def ban(value, title, class_name=None, small_text=None):
    span = html.Span(
        small_text, 
        style={
            'color':text,
            'fontSize': 26,
            # 'fontStyle': 'italic',
            'fontWeight': 'normal'
        }
    )
    return dbc.Col(
        card(
            html.Div([
                html.H1([value, span], className=f"text-{class_name}", style={'color':text}),
                html.H3(title, className=f"text-{class_name}", style={'color':text}),
            ],
                className='p-2'
            )
        )
    )

In [333]:
import os
path = '/content/assets/typography.css'
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w') as writefile:
    writefile.write("body {background: #FEFFDD;}") #d6ccc2 #E2E0E1

In [334]:
app = JupyterDash(external_stylesheets=[dbc.themes.JOURNAL])
map_button = html.Div(
                        dbc.Switch(
                            id="map_view_switch",
                            label="toggle map view",
                            # class_name='text-align-center mr-1 pr-0 text-center',
                            style={'textAlign':'center', 'float':'left'},
                            value=False,
                        ),
                        style={'float':'right', 'width':'60%', 'margin': 'auto'},
                        # class_name='text-align-center mr-1 pr-0 text-center',
                    )
title = "Unicorn Companies Overview"
paragraph = 'Unicorn companies are those that reach a valuation of \
$1 billion without being listed on \
the stock market and are the dream of any tech startup.'
app.title = title
app.layout = html.Div([
                            html.H1(title, style={'color':'#312921'}),
                            html.P(paragraph, style={'color':'#312921'}),
                            dbc.Row([
                                        ban(
                                            title='Unicorns Count',
                                            value=df['Company'].value_counts().sum(),
                                            # class_name='dark'
                                        ),
                                        ban(
                                            title='Mean Valuation',
                                            value=round(df['Valuation ($B)'].mean(), 2),
                                            small_text="$B"
                                            # class_name='danger'
                                        ),
                                        ban(
                                            title='Mean Total Raised',
                                            value=round(df['Total Raised'].mean(), 2),
                                            small_text="$M"
                                            # class_name=''
                                        ),
                                        ban(
                                            title='Total Investors Count',
                                            value=investors.shape[0],
                                            # class_name=''
                                        )
                            ]),
                            html.Br(),
                            dbc.Row([
                                     dbc.Col([
                                        card(
                                            dcc.Graph(
                                                id='companies_joining_rate',
                                                # figure=companies_joining_rate,
                                                figure=founding_joining_rate,
                                                ),
                                             className='h-100 justify-content-center'
                                            )
                                        # dcc.Graph(
                                        #         id='industry_per_unicorn',
                                        #         figure=industry_per_unicorn,
                                        #         #responsive=True,
                                        #     ),
                                     ]),
                                     dbc.Col([
                                        card(
                                            [dcc.Dropdown(
                                                id='select_industry', 
                                                options=df['Industry'].unique(), 
                                                value=[],
                                                multi=True,
                                                searchable=True,
                                                className='mt-3 mb-2 mx-4'
                                            ),        
                                            dcc.Graph(
                                                # id='industry_per_unicorn', 
                                                # figure=industry_per_unicorn,
                                                id='top_unicorns_bar',
                                                figure=top_unicorns_bar,
                                                responsive=True,
                                            ),
                                        ])          
                                     ]),
                            ]),
                            html.Br(),
                            dbc.Row([
                                     dbc.Col(
                                         card([
                                            dcc.Dropdown(
                                                id='select_industry_investor', 
                                                options=df['Industry'].unique(), 
                                                value=[],
                                                multi=True,
                                                searchable=True,
                                                className='mt-3 mb-2 mx-4'
                                            ),      
                                            dcc.Graph(
                                                    id='investor_per_industry_bar', 
                                                    figure=investor_per_industry_bar,
                                                    # responsive=True,
                                                ),
                                        ])
                                    ),
                            ]),
                            html.Br(),
                            dbc.Row([
                                        dbc.Col([
                                                    card(dcc.Graph(
                                                        # id='industries_pie',
                                                        # figure=industries_pie,
                                                        # id='month_year_continent_count', 
                                                        # figure=month_year_continent_count,
                                                        id='deal_term_disribution',
                                                        figure=deal_term_disribution
                                                    ),)
                                                ],
                                                md=6,
                                                sm=12,
                                                width=6,
                                                ),
                                        dbc.Col([
                                                    card(dcc.Graph(
                                                        id='industriesBasedOnValuation', 
                                                        figure=industriesBasedOnValuation,
                                                    ),)
                                                    # map_button
                                                    
                                                ], 
                                                md=6,
                                                width=6,
                                                sm=12,
                                                ),
                            ],
                                justify="center",
                                align="start",
                            ),
    ], className="mx-5 my-4")


# @app.callback(
#     Output("industriesBasedOnValuation", "figure"),
#     Input("map_view_switch", "value"),
# )
# def map_view_switch(value):
#     return company_per_country_choropleth if value else industriesBasedOnValuation

@app.callback(
    Output("top_unicorns_bar", "figure"),
    Input("select_industry", "value"),
)
def industry_filter(value):
    title='Top 15 Unicorns in All Industries' 
    if len(value) != 0:
        title =f'Top 15 Unicorns in {" and ".join(value)}'
    top_unicorns_bar = px.bar(
        top_unicorns.tail(15) if len(value) == 0 else top_unicorns[top_unicorns['Industry'].isin(value)].tail(15),
        y='Valuation ($B)',
        x='Company',
        # orientation='h',
        color='Industry',
        title=title
    )
    top_unicorns_bar = top_unicorns_bar.update_layout(
        barmode='stack', 
        xaxis={'categoryorder':'total ascending'}, 
        # showlegend=False,
        legend=dict(
            orientation="h",
            yanchor="top",
            title=None,
            y=0.999,
            # xanchor="top",
            # x=0.99,
        ),
        margin=dict(l=30, r=20, t=30, b=20),
    )
    top_unicorns_bar = top_unicorns_bar.update_xaxes(tickangle=45)
    return top_unicorns_bar

@app.callback(
    Output("investor_per_industry_bar", "figure"),
    Input("select_industry_investor", "value"),
)
def investor_industry_filter(value):
    temp_df = melted_investors.copy()
    if len(value) == 0:
        plot =  px.bar(
                        temp_df[['investors', 'count']].drop_duplicates().sort_values('count').tail(15), 
                        x='investors', 
                        y='count', 
                        # color='variable',
                        title='Top 10 Investors'
                    )
        return plot.update_layout(
        barmode='stack', 
        xaxis={'categoryorder':'total ascending'},
        legend=dict(
            orientation="h",
            yanchor="top",
            title=None,
            y=0.999,
            # xanchor="top",
            # x=0.99,
        ),
        margin=dict(l=30, r=20, t=30, b=20),
        )
    
    
    if len(value) != 0:
        title = f'Top 15 Investors in {" and ".join(value)}'
    temp_df = temp_df[temp_df['variable'].isin(value)]
    plot = px.bar(
                    temp_df.sort_values('count').tail(15*len(value)), 
                    x='investors', 
                    y='value', 
                    color='variable',
                    title=title
                )
    
    plot = plot.update_layout(
        barmode='stack', 
        xaxis={'categoryorder':'total ascending'},
        legend=dict(
            orientation="h",
            yanchor="top",
            title=None,
            y=0.999,
            # xanchor="top",
            # x=0.99,
        ),
        margin=dict(l=30, r=20, t=30, b=20),
        )
    return plot

app.run_server(debug=True)

Dash app running on:


<IPython.core.display.Javascript object>