# **Importing libraries**

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/indian-startups-funding-data-januarymay-2022/Indian Startups - Funding  Investors Data January 2022.csv
/kaggle/input/indian-startups-funding-data-januarymay-2022/Indian Startups - Funding  Investors Data March 2022.csv
/kaggle/input/indian-startups-funding-data-januarymay-2022/Indian Startups - Funding  Investors Data April 2022.csv
/kaggle/input/indian-startups-funding-data-januarymay-2022/Indian Startups - Funding  Investors Data February 2022.csv
/kaggle/input/indian-startups-funding-data-januarymay-2022/Indian Startups - Funding  Investors Data May 2022.csv


**Importing visualization libraries**

In [2]:
import plotly.graph_objects as go
#import seaborn as sns
import matplotlib.pyplot as plt
#import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import iplot,init_notebook_mode,plot

init_notebook_mode(connected=True)

%matplotlib inline

 **Importing Data**

In [3]:
startup_India = pd.read_csv("/kaggle/input/indian-startups-funding-data-januarymay-2022/Indian Startups - Funding  Investors Data January 2022.csv",encoding= 'unicode_escape')
startup_India.head()

Unnamed: 0,Company Name,Founded,Location,Sector,What it does,Founders,Investor,Amount,Stage
0,Mamaearth,2016,Gurgaon,"Health, Wellness & Fitness","Making products which are mum-baby friendly, t...",Varun Alagh,Sequoia,"$52,000,000",
1,EsportsXO,2020,Bangalore,Mobile Games,An esports company which offers tournament man...,"Utsav Umang, Rohit Raj, Vikas Goel","We Founder Circle, Wami Capital, SucSEED Indov...","$1,000,000",Seed
2,Geniemode,2021,Gurgaon,Import & Export,Geniemode is building an extensive online plat...,"Tanuj Gangwani, Amit Sharma","Deepinder Goyal, Kunal Shah, Prashant Malik, P...","$7,000,000",Series A
3,Exotel,2011,Bangalore,Telecommunications,"Customer engagement platform, streamlining cus...",Shivakumar Ganesan,Steadview Capital,"$40,000,000",Series D
4,Innoviti Payment Solutions,2002,Bangalore,Financial Services,Innoviti is Indias most exciting payments com...,Rajeev Agrawal,"Trifecta Debt Fund, Patni Advisors, Bessemer V...","$10,000,000",


# **Data Cleaning**

**In our dataset we need to clean the FOUNDED and AMOUNT columns**

* **Changing the Founded col from int64 to datatime and extracting the year, as only year is avaliable**
* **Removing the dollar sign and then changing the type from string to float** 

In [4]:
#Changing the Founded col from int64 to datatime and extracting year
startup_India['Founded']= pd.to_datetime(startup_India['Founded'],format='%Y').map(lambda x: x.year)

In [5]:
# Clean the Amount text, only keep numeric values
startup_India['Amount'] = startup_India['Amount'].replace({'\$':'',',':''}, regex = True)
# Change amount column data type to float
startup_India['Amount'] = pd.to_numeric(startup_India['Amount'], errors='coerce').astype(float)

# **Checking for Null values in the Data**

In [6]:
# Missing values table
def missing_values_table(df):
        # Getting total missing values
        mis_val = df.isnull().sum()
        
        # Calculating the percentage of missing values
        mis_val_percent = 100 * df.isnull().sum() / len(df)
        
        # New table with both missing values and the percentage of missing values
        mis_val_tab = pd.concat([mis_val, mis_val_percent], axis=1)
        
        # Rename the columns
        ren_mis_val_table_columns = mis_val_tab.rename(columns = {0 : 'Missing Values', 1 : '% of Total Missing Values'})
        
        # Sort the table in descending order by percentage of missing values
        ren_mis_val_table_columns = ren_mis_val_table_columns[ren_mis_val_table_columns.iloc[:,1] != 0].sort_values(
        '% of Total Missing Values', ascending=False).round(1)
        
        # Printing some summary 
        print ("\033[1mCurrent dataframe has total " + str(df.shape[1]) + " columns.\033[0m \n")
        print ("\033[1mThere are " 
               + str(ren_mis_val_table_columns.shape[0]) + " columns that have missing values.\033[0m \n")
        
        # Return the dataframe with missing info
        return ren_mis_val_table_columns

missing_val= missing_values_table(startup_India)
missing_val.style.background_gradient(cmap='Oranges')

[1mCurrent dataframe has total 9 columns.[0m 

[1mThere are 3 columns that have missing values.[0m 



Unnamed: 0,Missing Values,% of Total Missing Values
Stage,43,37.4
Amount,7,6.1
Investor,5,4.3


# **Sectors Info**

> **Unique sectors**

In [7]:
x = startup_India['Sector'].nunique()
print(f'\033[1mTotal number of unique sectors : {x}\033[0m')

[1mTotal number of unique sectors : 42[0m


# **Top funded Sectors**

In [8]:
# Most common sectors
sectors = startup_India['Sector'].value_counts().head(10)
#print(sectors)
colrs = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig=go.Figure(data=[go.Pie(labels=sectors.index,values=sectors.values)],layout=go.Layout(title='TOP 10 SECTORS WITH HIGHEST FUNDING '))
fig.update_traces(hoverinfo='label+percent+value', textinfo='label+percent', textfont_size=10,marker=dict(colors=colrs))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black"                
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')
#iplot(fig)

# **Top Stages of funding**

> Here the data might not be accurate as higher percentage of STAGE data is missing

In [9]:
# Most common types of funding
new_df_updated_funding = startup_India.copy()
new_df_updated_funding['Stage'].fillna(value='UNKNOWN', inplace=True)
funding = new_df_updated_funding['Stage'].value_counts()
#print(funding.values.size)
colr=np.arange(0,funding.values.size)
fig=go.Figure(data=[go.Bar(y=funding.values,x=funding.index, marker={'color':colr})], layout=go.Layout(title='Most common stages of funding recieved',
          xaxis=dict(title='Fundings Stages'),
          yaxis = dict(title='Value Counts')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

# **Fundraising info**

In [10]:
new_df_updated_amount = startup_India.copy()
new_df_updated_amount['Amount'].fillna(startup_India['Amount'].median(),inplace=True)

# Highest amount of Fundraised
max_fund_startup = new_df_updated_amount[new_df_updated_amount['Amount']==new_df_updated_amount['Amount'].max()][['Company Name','Founders','Investor','Amount']]
max_fund_startup = max_fund_startup.reset_index(drop=True)

print(f"\033[1mHighest amount of FUND raised is $ {max_fund_startup.iloc[0]['Amount']} by {max_fund_startup.iloc[0]['Company Name']}.\033[0m")
print(f"\033[1m{max_fund_startup.iloc[0]['Company Name']} is founded by {max_fund_startup.iloc[0]['Founders']}.\033[0m")
print(f"\033[1m{max_fund_startup.iloc[0]['Company Name']} is invested by {max_fund_startup.iloc[0]['Investor']}.\033[0m \n")

# Minimum amount of Fundraised
min_fund_startup = new_df_updated_amount[new_df_updated_amount['Amount']==new_df_updated_amount['Amount'].min()][['Company Name','Founders','Investor','Amount']]
min_fund_startup = min_fund_startup.reset_index(drop=True)

# print(new_df_updated_amount['Amount'].min())
print(f"\033[1mMinimum amount of FUND raised is $ {min_fund_startup.iloc[0]['Amount']} by {min_fund_startup.iloc[0]['Company Name']}\033[0m")
print(f"\033[1m{min_fund_startup.iloc[0]['Company Name']} is founded by {min_fund_startup.iloc[0]['Founders']}.\033[0m\n")
# print(f"\033[1m{min_fund_startup.iloc[0]['Company Name']} is invested by {min_fund_startup.iloc[0]['Investor']}.\033[0m \n")
# Average Funding
print(f"\033[1mOn Average Indian startups got funding of $ {new_df_updated_amount['Amount'].sort_values().mean():.2f}\033[0m")

[1mHighest amount of FUND raised is $ 700000000.0 by Swiggy.[0m
[1mSwiggy is founded by Sriharsha Majety, Rahul Bothra, Dale Vaz.[0m
[1mSwiggy is invested by Invesco.[0m 

[1mMinimum amount of FUND raised is $ 140000.0 by CellStrat[0m
[1mCellStrat is founded by Vivek, Vishal Singhal.[0m

[1mOn Average Indian startups got funding of $ 35975260.87[0m


In [11]:
# company_total_amount = startup_India.groupby(['Company Name'])['Amount'].sum()
# unicorn_company = company_total_amount[company_total_amount == 1000000000]
# print(company_total_amount)

# **Startups vs No. of Fundraised**

In [12]:
# lets check the no. of funding each startsup got
ln = len(startup_India["Company Name"].unique())
print(f"\033[1mTotal numbers of startups got funded : {ln}\033[0m \n")
print("\033[1mNumbers of times each startup got funded :-\033[0m \n")
names = startup_India["Company Name"].value_counts().sort_values(ascending=False).head(10)
print(f"\033[1m {names} \033[0m")
startup_names = startup_India['Company Name'].value_counts().sort_values(ascending=False).head(20)

colr=np.arange(0,startup_names.values.size)
fig=go.Figure(data=[go.Bar(y=startup_names.values,x=startup_names.index, marker={'color':colr})], layout=go.Layout(title='Number of fundings Startups Have',
          xaxis=dict(title='Startup Names'),
          yaxis = dict(title='Number of fundings made')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mTotal numbers of startups got funded : 114[0m 

[1mNumbers of times each startup got funded :-[0m 

[1m NowPurchase                   2
Glamplus                      1
Exotel                        1
Innoviti Payment Solutions    1
Pine Labs                     1
Moneyboxx                     1
CASHe                         1
iD Fresh Foods                1
Freecultr                     1
Udaan                         1
Name: Company Name, dtype: int64 [0m


# **Startups over the years**

In [13]:
# Value counts of the years, the start-ups were founded
founded = startup_India['Founded'].value_counts(ascending=True)
#print(founded.values.size)
#print(founded.index)

colr=np.arange(0,founded.values.size)
fig=go.Figure(data=[go.Bar(y=founded.values,x=founded.index, marker={'color':colr})], layout=go.Layout(title='Number of Startups Founded over years',
          xaxis=dict(title='Years'),
          yaxis = dict(title='Number of Startups Founded')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

# **Comany vs Funding**

**We will divide funding Mean in different size**
* Above 100M+     
* Between 100M to 50M     
* Below 50M 

In [14]:
company_amount = startup_India.groupby(['Company Name'])['Amount'].mean()

In [15]:
company_above_100M = company_amount[company_amount>100000000]
print("\033[1mCompanies above $100M :-\033[0m \n")
print(f"\033[1m {company_above_100M} \033[0m")
colr=np.arange(0,company_above_100M.values.size)
fig=go.Figure(data=[go.Scatter(x=company_above_100M.index,y=company_above_100M.values,mode='markers+lines', marker=dict(size=company_above_100M.values / 2500000 ,
                color=colr))],layout=go.Layout(title='Funding (100M+) Vs Companies'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mCompanies above $100M :-[0m 

[1m Company Name
Addverb Technologies    132000000.0
Dunzo                   240000000.0
HEAPS                   550000000.0
Moglix'                 250000000.0
Ola Electric            200000000.0
Swiggy                  700000000.0
Udaan                   200000000.0
o9 Solutions            295000000.0
Name: Amount, dtype: float64 [0m


In [16]:
company_below_100M = company_amount[(company_amount<100000000) & (company_amount>50000000)]

print("\033[1mCompanies below $100M :-\033[0m \n")
print(f"\033[1m {company_below_100M} \033[0m")

colr=np.arange(0,company_below_100M.values.size)
fig=go.Figure(data=[go.Scatter(x=company_below_100M.index,y=company_below_100M.values,mode='markers+lines', marker=dict(size=company_below_100M.values / 600000 ,
                color=colr))],layout=go.Layout(title='Funding (100M - 50M) Vs Companies'))                                       #50000000
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mCompanies below $100M :-[0m 

[1m Company Name
Arya.Ag               60000000.0
Darwinbox             72000000.0
Epigeneres Biotech    60000000.0
INDmoney              75000000.0
Lummo                 80000000.0
Mamaearth             52000000.0
Refyne                82000000.0
iD Fresh Foods        60000000.0
Name: Amount, dtype: float64 [0m


In [17]:
company_below_50M = company_amount[company_amount<50000000]

print("\033[1mCompanies below $50M :-\033[0m \n")
print(f"\033[1m {company_below_50M} \033[0m")

colr=np.arange(0,company_below_50M.values.size)
fig=go.Figure(data=[go.Scatter(x=company_below_50M.index,y=company_below_50M.values,mode='markers+lines', marker=dict(size=company_below_50M.values / 700000 ,
                color=colr))],layout=go.Layout(title='Funding (Below 50M) Vs Companies'))                                       #50000000
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mCompanies below $50M :-[0m 

[1m Company Name
AarogyaAI                 700000.0
Aerostrovilos Energy      400000.0
AppX                     1300000.0
Aqgromalin               5000000.0
Avataar                 45000000.0
                           ...    
Zuper                   13000000.0
iMocha                  14000000.0
nCORE Games             10000000.0
toothsi                  9000000.0
zeda.io                  1200000.0
Name: Amount, Length: 89, dtype: float64 [0m


# **Sectors vs Funding**

**We will divide funding Mean in different size**
* Above 100M+     
* Between 100M to 50M     
* Below 50M 

In [18]:
sector_amount = startup_India.groupby(['Sector'])['Amount'].mean()

In [19]:
sector_above_100M = sector_amount[sector_amount>100000000]
print("\033[1mSectors above $100M :-\033[0m \n")
print(f"\033[1m {sector_above_100M} \033[0m")
colr=np.arange(0,sector_above_100M.values.size)
fig=go.Figure(data=[go.Scatter(x=sector_above_100M.index,y=sector_above_100M.values,mode='markers', marker=dict(size=sector_above_100M.values / 2500000 ,
                color=colr))],layout=go.Layout(title='Funding (100M+) Vs Sectors'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mSectors above $100M :-[0m 

[1m Sector
B2B E-commerce                200000000.0
FoodTech                      700000000.0
Health, Wellness & Fitness    123840000.0
Industrial Automation         132000000.0
Software                      125500000.0
Name: Amount, dtype: float64 [0m


In [20]:
sector_below_100M = sector_amount[(sector_amount<100000000) & (sector_amount>50000000)]

print("\033[1mSectors below $100M :-\033[0m \n")
print(f"\033[1m {sector_below_100M} \033[0m")

colr=np.arange(0,sector_below_100M.values.size)
fig=go.Figure(data=[go.Scatter(x=sector_below_100M.index,y=sector_below_100M.values,mode='markers', marker=dict(size=sector_below_100M.values / 600000 ,
                color=colr))],layout=go.Layout(title='Funding (100M - 50M) Vs Sectors'))                                       #50000000
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mSectors below $100M :-[0m 

[1m Sector
AgriTech           6.000000e+07
Automotive         5.742500e+07
Biotechnology      6.000000e+07
E-commerce         8.066667e+07
Entreprenurship    8.000000e+07
Name: Amount, dtype: float64 [0m


In [21]:
sectors_below_50M = sector_amount[sector_amount<50000000]

print("\033[1mSectors below $50M :-\033[0m \n")
print(f"\033[1m {sectors_below_50M} \033[0m")

colr=np.arange(0,sectors_below_50M.values.size)
fig=go.Figure(data=[go.Scatter(x=sectors_below_50M.index,y=sectors_below_50M.values,mode='markers', marker=dict(size=sectors_below_50M.values / 400000 ,
                color=colr))],layout=go.Layout(title='Funding (Below 50M) Vs Sectors'))                                       #50000000
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=600,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mSectors below $50M :-[0m 

[1m Sector
3D AI company                        4.500000e+07
Apparel & Fashion                    2.600000e+06
Aviation & Aerospace                 5.000000e+06
B2B marketplace                      2.400000e+06
Computer Games                       5.100000e+06
Computer Software                    3.988545e+07
D2C                                  4.150000e+05
E-learning                           2.850000e+06
EdTech                               3.446667e+07
Education Management                 4.000000e+05
Entertainment                        1.500000e+07
Farming                              5.000000e+06
Financial Services                   1.889783e+07
Food & Beverages                     4.133333e+06
Food Production                      3.300000e+07
Hospital & Health Care               8.066667e+06
Hospitality                          1.000000e+07
Import & Export                      7.000000e+06
Information Technology & Services    1.204167e+07
Logist

# **Cities vs Funding**

**We will divide funding Mean in different size**
* Above 100M+     
* Between 100M to 50M     
* Below 50M 

In [22]:
city_amount = startup_India.groupby(['Location'])['Amount'].mean() 
city_amount = city_amount.sort_values(ascending=False)

In [23]:
city_highest_fund = city_amount.head(5)

print("\033[1m Top five highest funded cities :-\033[0m \n")
print(f"\033[1m {city_highest_fund} \033[0m")

colr=np.arange(0,city_highest_fund.values.size)
fig=go.Figure(data=[go.Scatter(x=city_highest_fund.index,y=city_highest_fund.values,mode='markers', marker=dict(size=city_highest_fund.values / 600000 ,
                color=colr))],layout=go.Layout(title='Top five HIGHEST funded CITIES'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1m Top five highest funded cities :-[0m 

[1m Location
Noida         8.420000e+07
Bangalore     7.349100e+07
Chandigarh    2.800000e+07
Gurgaon       1.450500e+07
Mumbai        1.411765e+07
Name: Amount, dtype: float64 [0m


In [24]:
city_lowest_fund = city_amount.tail(5)

print("\033[1m Top five lowest funded cities :-\033[0m \n")
print(f"\033[1m {city_lowest_fund} \033[0m")

colr=np.arange(0,city_lowest_fund.values.size)
fig=go.Figure(data=[go.Scatter(x=city_lowest_fund.index,y=city_lowest_fund.values,mode='markers', marker=dict(size=city_lowest_fund.values / 60000 ,
                color=colr))],layout=go.Layout(title='Top five LOWEST funded CITIES'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1m Top five lowest funded cities :-[0m 

[1m Location
New Delhi    8.501250e+06
Hyderabad    8.416667e+06
Chennai      5.536364e+06
Ahmedabad    4.450000e+06
Kolkata      2.400000e+06
Name: Amount, dtype: float64 [0m


**Top cities for startup**

In [25]:
startup_city = startup_India['Location'].value_counts().head(10)

print("\033[1mTop 10 number of Cities Startup have\033[0m \n")
print(f"\033[1m {startup_city} \033[0m")

colr=np.arange(0,startup_city.values.size)
fig=go.Figure(data=[go.Scatter(x=startup_city.index,y=startup_city.values,mode='markers', marker=dict(size=startup_city.values *3 ,
                color=colr))],layout=go.Layout(title='TOP 10 CITIES STARTUP HAVE'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mTop 10 number of Cities Startup have[0m 

[1m Bangalore     41
Mumbai        20
New Delhi     13
Gurgaon       11
Chennai       11
Hyderabad      7
Noida          6
Ahmedabad      2
Kolkata        2
Chandigarh     1
Name: Location, dtype: int64 [0m


# **Investors vs Funding**

> Here the data might be slightly inaccurate as smaller percentage of Investor data is missing

**We will divide funding Mean in different size**
* Above 100M+
* Between 100M to 50M
* Below 50M

In [26]:
investor_amount = startup_India.groupby(['Investor'])['Amount'].mean() #[['Location','Amount']]

In [27]:
investor_above_100M = investor_amount[investor_amount>100000000]

print("\033[1m Investors above $100M :-\033[0m \n")
print(f"\033[1m {investor_above_100M} \033[0m")

colr=np.arange(0,investor_above_100M.values.size)
fig=go.Figure(data=[go.Bar(y=investor_above_100M.values,x=investor_above_100M.index, marker={'color':colr})], layout=go.Layout(title='Funding (100M+) Vs Investors',
          xaxis=dict(title='Investors'),
          yaxis = dict(title='Investment made')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1m Investors above $100M :-[0m 

[1m Investor
General Atlantic, BeyondNetZero                               295000000.0
Invesco                                                       700000000.0
NVS Wealth Managers                                           550000000.0
Reliance                                                      132000000.0
Reliance Retail Ventures Limited                              240000000.0
Tekne Private Ventures, Alpine Opportunity Fund, Edelweiss    200000000.0
Tiger Global, Alpha Wave Global                               250000000.0
Name: Amount, dtype: float64 [0m


In [28]:
investor_below_100M = investor_amount[(investor_amount<100000000) & (investor_amount>50000000)]

print("\033[1mInvestors below $100M :-\033[0m \n")
print(f"\033[1m {investor_below_100M} \033[0m")

colr=np.arange(0,investor_below_100M.values.size)
fig=go.Figure(data=[go.Bar(y=investor_below_100M.values,x=investor_below_100M.index, marker={'color':colr})], layout=go.Layout(title='Funding (100M - 50M) Vs Investors',
          xaxis=dict(title='Investors'),
          yaxis = dict(title='Investment made')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mInvestors below $100M :-[0m 

[1m Investor
Asia Impact SA, Lightrock India, Quona Capital                 60000000.0
LetsVenture Angel Fund, Moez Daya                              60000000.0
NewQuest Capital Partner                                       60000000.0
Sequoia                                                        52000000.0
Steadview Capital, Tiger Global, Dragoneer Investment Group    75000000.0
Technology Crossover Ventures                                  72000000.0
Tiger Global                                                   82000000.0
Tiger Global, Sequoia Capital India                            62500000.0
WestBridge Capital                                             65000000.0
Name: Amount, dtype: float64 [0m


In [29]:
investor_below_50M = investor_amount[investor_amount<50000000]

print("\033[1mInvestors below $50M :-\033[0m \n")
print(f"\033[1m {investor_below_50M} \033[0m")

colr=np.arange(0,investor_below_50M.values.size)
fig=go.Figure(data=[go.Bar(y=investor_below_50M.values,x=investor_below_50M.index, marker={'color':colr})], layout=go.Layout(title='Funding (Below 50M) Vs Investors',
          xaxis=dict(title='Investors'),
          yaxis = dict(title='Investment made')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=800,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mInvestors below $50M :-[0m 

[1m Investor
2 AM VC, Magic Fund                                           600000.0
35North Ventures, Mergerdomo                                 1000000.0
3Lines Venture Capital, C4D Asia Fund                        6000000.0
9Unicorns, Wipro                                             6000000.0
Accel India, Navus Ventures                                  2400000.0
                                                               ...    
Venture Partners, Tiger Global                              25000000.0
WaterBridge Ventures                                         3000000.0
We Founder Circle, Wami Capital, SucSEED Indovation Fund     1000000.0
Y Combinator                                                 1300000.0
Yogesh Chaudhary, Pallavi Nadhani                             500000.0
Name: Amount, Length: 82, dtype: float64 [0m


In [30]:
# Most common types of investors
new_df_updated_investor = startup_India.copy()
new_df_updated_investor['Investor'].fillna(value='UNKNOWN', inplace=True)

investor = new_df_updated_investor['Investor'].value_counts().sort_values(ascending=False).head(20)
#print(funding.values.size)
colr=np.arange(0,investor.values.size)
fig=go.Figure(data=[go.Bar(y=investor.values,x=investor.index, marker={'color':colr})], layout=go.Layout(title='Most common Investors',
          xaxis=dict(title='Investors'),
          yaxis = dict(title='Number of Investments')))
fig.data[0].marker.line.width = 2
fig.data[0].marker.line.color = "black" 
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

# **Founded(Years) vs Funding**

**We will divide funding Mean in different size**
* Above 100M+
* Between 100M to 50M
* Below 50M

In [31]:
founded_amount = startup_India.groupby(['Founded'])['Amount'].mean()

In [32]:
founded_above_100M = founded_amount[founded_amount>100000000]

print("\033[1mStartups founded above $100M :-\033[0m \n")
print(f"\033[1m {founded_above_100M} \033[0m")

colr=np.arange(0,founded_above_100M.values.size)
fig=go.Figure(data=[go.Scatter(x=founded_above_100M.index,y=founded_above_100M.values,mode='markers+lines', marker=dict(size=founded_above_100M.values / 2000000 ,
                color=colr))],layout=go.Layout(title='Funding (100M+) Vs Founded(Years)'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mStartups founded above $100M :-[0m 

[1m Founded
2009    2.950000e+08
2014    1.114000e+08
2015    1.021167e+08
Name: Amount, dtype: float64 [0m


In [33]:
founded_below_100M = founded_amount[(founded_amount<100000000) & (founded_amount>50000000)]

print("\033[1mStartups founded below $100M :-\033[0m \n")
print(f"\033[1m {founded_below_100M} \033[0m")

colr=np.arange(0,founded_below_100M.values.size)
fig=go.Figure(data=[go.Scatter(x=founded_below_100M.index,y=founded_below_100M.values,mode='markers+lines', marker=dict(size=founded_below_100M.values / 300000 ,
                color=colr))],layout=go.Layout(title='Funding (100M - 50M) Vs Founded(Years)'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mStartups founded below $100M :-[0m 

[1m Founded
2006    60000000.0
2021    59745000.0
Name: Amount, dtype: float64 [0m


In [34]:
founded_below_50M = founded_amount[founded_amount<50000000]

print("\033[1mStartups founded below $50M :-\033[0m \n")
print(f"\033[1m {founded_below_50M} \033[0m")

colr=np.arange(0,founded_below_50M.values.size)
fig=go.Figure(data=[go.Scatter(x=founded_below_50M.index,y=founded_below_50M.values,mode='markers+lines', marker=dict(size=founded_below_50M.values / 400000 ,
                color=colr))],layout=go.Layout(title='Funding (Below 50M) Vs Founded(Years)'))
fig.update_traces(hoverinfo='x+y', textfont_size=10)               
fig.update_layout(height=500,autosize=True ,plot_bgcolor='rgb(275, 275, 275)')

[1mStartups founded below $50M :-[0m 

[1m Founded
1998    2.000000e+07
2002    1.000000e+07
2008    3.650000e+07
2011    2.150000e+07
2012    3.385000e+07
2013    2.106667e+07
2016    4.015000e+07
2017    2.551818e+07
2018    5.667500e+06
2019    1.478333e+07
2020    8.522391e+06
Name: Amount, dtype: float64 [0m
