In [6]:
import requests
import os
import re

import pandas as pd
import numpy as np


import plotly.express as px
import plotly.graph_objects as go


pd.set_option('display.max_columns', 70)

import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)
import plotly.io as pio
pio.renderers.default = "vscode"

years = [str(num) for num in range(2019, 2025)]



# Load in df

In [60]:
current_directory = os.getcwd()

# Define the file path for saving the CSV
file_path = os.path.join(current_directory, 'data.csv')


df = pd.read_csv(file_path,
                delimiter=',',            # Comma as delimiter
                encoding='utf-8',
                )


df['Absolute Growth Rate'] = pd.to_numeric(df['Absolute Growth Rate'],errors = 'coerce')


df.head()



Unnamed: 0,Rank,Name,Country,Sector,Absolute Growth Rate,Compound Annual Growth Rate,Revenue 2017,Employees 2017,Founded,Revenue 2018,Revenue 2015,Employees 2018,Revenue 2019,Revenue 2016,Employees 2019,Employees 2016,Revenue 2020,Employees 2020,Revenue 2021,Employees 2021,Revenue 2022,Employees 2022,Year
0,1,Blue Motor Finance,United Kingdom,Financial Services,51364.0,701.4,61.4,155,1992.0,,,,,,,,,,,,,,2019
1,2,Deliveroo,United Kingdom,Food & Beverage,15749.0,441.2,316.1,1664,2013.0,,,,,,,,,,,,,,2019
2,3,Taxify,Estonia,Technology,12231.0,397.7,17.8,350,2013.0,,,,,,,,,,,,,,2019
3,4,Solectric,Germany,Technology,7772.0,328.6,40.9,30,2012.0,,,,,,,,,,,,,,2019
4,5,Psioxus Therapeutics,United Kingdom,Pharmaceuticals,7126.0,316.5,64.4,69,2006.0,,,,,,,,,,,,,,2019


# Analysis

# top 5 sectors in each year

In [77]:
n = 5

def get_top_n_sectors_by_num(df, n, col):
    return df[col].value_counts()[:n].index.tolist()

def get_top_n_sectors_by_val(df, n, col, val):
    grouped=df.groupby([col])[val].mean().reset_index().sort_values(by=[val], ascending=False)
    return grouped[col][:n].tolist()

print('-'*80)
print('-'*80)

##### TOP SECTORS BY NUMBER OF STARTUPS #####
for year in years:
    print('-'*80)

    temp_df = df.loc[df['Year'] == int(year)]
    print(f'{year} ==  {get_top_n_sectors_by_num(temp_df, n, 'Sector')}')

print('-'*80)
print('-'*80)

##### TOP COUNTRIES BY NUMBER OF STARTUPS #####
for year in years:
    print('-'*80)

    temp_df = df.loc[df['Year'] == int(year)]
    print(f'{year} ==  {get_top_n_sectors_by_num(temp_df, n, 'Country')}')


print('-'*80)
print('-'*80)


##### TOP SECTORS BY NUMBER OF GROWTH RATE #####
for year in years:
    print('-'*80)

    temp_df = df.loc[df['Year'] == int(year)]
    print(f'{year} ==  {get_top_n_sectors_by_val(temp_df, n, 'Sector', 'Compound Annual Growth Rate')}')


print('-'*80)
print('-'*80)



##### TOP COUNTRIES BY NUMBER OF GROWTH RATE #####
for year in years:
    print('-'*80)

    temp_df = df.loc[df['Year'] == int(year)]
    print(f'{year} ==  {get_top_n_sectors_by_val(temp_df, n, 'Country', 'Compound Annual Growth Rate')}')



print('-'*80)
print('-'*80)


--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
2019 ==  ['Technology', 'Support Services', 'Construction', 'Sales & Marketing', 'Ecommerce']
--------------------------------------------------------------------------------
2020 ==  ['Technology', 'Support Services', 'Construction', 'Retail', 'Financial Services']
--------------------------------------------------------------------------------
2021 ==  ['Technology', 'Construction', 'Support Services', 'Retail', 'Financial Services']
--------------------------------------------------------------------------------
2022 ==  ['Technology', 'Construction', 'Retail', 'Ecommerce', 'Support Services']
--------------------------------------------------------------------------------
2023 ==  ['It & Software', 'Ecommerce', 'Construction', 'Financial Servi

In [74]:
col = 'Sector'
val = 'Compound Annual Growth Rate'


df.groupby([col])[val].median().reset_index().sort_values(by=['Compound Annual Growth Rate'], ascending=False)[:10]


Unnamed: 0,Sector,Compound Annual Growth Rate
3,Batteries,81.9
26,Leisure & Entertainment,75.35
34,Pharmaceuticals,73.3
10,Education,72.4
33,Personal & Household Goods,71.7
15,Financial Services,71.1
13,Energy,71.0
11,Electrical Manufacturing,70.95
20,Hospitality,68.65
23,Interiors,67.4


In [61]:
temp_df.dtypes

Rank                            object
Name                            object
Country                         object
Sector                          object
Absolute Growth Rate            object
Compound Annual Growth Rate    float64
Revenue 2017                    object
Employees 2017                  object
Founded                        float64
Revenue 2018                    object
Revenue 2015                   float64
Employees 2018                  object
Revenue 2019                    object
Revenue 2016                    object
Employees 2019                 float64
Employees 2016                 float64
Revenue 2020                    object
Employees 2020                 float64
Revenue 2021                    object
Employees 2021                  object
Revenue 2022                    object
Employees 2022                 float64
Year                             int64
dtype: object

In [54]:
# get top 10 most frequent names
n = 20
frequenct_sector = df['Sector'].value_counts()[:n].index.tolist()

frequenct_sector

['Technology',
 'Construction',
 'Ecommerce',
 'It & Software',
 'Financial Services',
 'Sales & Marketing',
 'Support Services',
 'Retail',
 'Energy',
 'Management Consulting',
 'Transport',
 'Industrial Goods',
 'Food & Beverage',
 'Media',
 'Automotive',
 'Health',
 'Real Estate',
 'Wholesale',
 'Travel & Leisure',
 'Education']

In [55]:
df.head()

Unnamed: 0,Rank,Name,Country,Sector,Absolute Growth Rate,Compound Annual Growth Rate,Revenue 2017,Employees 2017,Founded,Revenue 2018,Revenue 2015,Employees 2018,Revenue 2019,Revenue 2016,Employees 2019,Employees 2016,Revenue 2020,Employees 2020,Revenue 2021,Employees 2021,Revenue 2022,Employees 2022,Year
0,1,Blue Motor Finance,United Kingdom,Financial Services,51364,701.4,61.4,155,1992.0,,,,,,,,,,,,,,2019
1,2,Deliveroo,United Kingdom,Food & Beverage,15749,441.2,316.1,1664,2013.0,,,,,,,,,,,,,,2019
2,3,Taxify,Estonia,Technology,12231,397.7,17.8,350,2013.0,,,,,,,,,,,,,,2019
3,4,Solectric,Germany,Technology,7772,328.6,40.9,30,2012.0,,,,,,,,,,,,,,2019
4,5,Psioxus Therapeutics,United Kingdom,Pharmaceuticals,7126,316.5,64.4,69,2006.0,,,,,,,,,,,,,,2019
