###  Exploring and processing the company raise datasheet

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Load CSV from data folder

In [5]:
url = "data/funding_data.csv"

df = pd.read_csv(url)
df.head()

Unnamed: 0,DATE RAISE ANNOUNCED,COMPANY,AMOUNT,HQ Location,TOP INVESTORS (in this round),LINK,Website,Round,Category,NOTES,Expansion Plans,Founder First Name,Founder Last Name,Founder LinkedIn,Founder Twitter,Founder AngelList,Unnamed: 16
0,8/12/2021,pumpspotting,"$1,150,000","Kittery, Me.","MooDoos Investments, participation from Maine ...",https://www.finsmes.com/2021/08/pumpspotting-r...,https://www.pumpspotting.com/,Seed,Femtech,community-driven breastfeeding support platform,,,,,,,
1,8/12/2021,BLDUP,"$2,000,000","Boston, Mass.",U,https://www.finsmes.com/2021/08/bldup-raises-2...,http://bldup.com/,Pre-Seed,Enterprise,system to highlight the relationship between b...,,,,,,,
2,8/12/2021,Sicona Battery,"$2,700,000","Sydney, Australia","Artesian, Riverstone Ventures, participation f...",https://www.finsmes.com/2021/08/sicona-battery...,http://www.siconabattery.com/,A,Energy,battery materials technology,,,,,,,
3,8/12/2021,Part Analytics,"$3,000,000","Milwaukee, Wisc.",MK Capital,https://www.finsmes.com/2021/08/part-analytics...,https://partanalytics.com/,U,Industrial,AI platform for sclaing global manufacturing c...,,,,,,,
4,8/12/2021,Anthill,"$3,000,000","Chicago, Ill.","Rethink Education, participation from Origin V...",https://www.finsmes.com/2021/08/anthill-raises...,https://www.anthillai.com/,Seed,Enterprise,talent management platform designed for remote...,,,,,,,


#### Initial check on data - duplicated and unnamed column

It looks like there are some entries with missing raise amounts and company - this is due to the poor format of the data with total rows below the individual entries. I also looks like the unnamed column includes public email addresses when available.

In [6]:
len(df)
df = df.drop_duplicates()
len(df)

28855

In [7]:
df.isnull().sum().sort_values(ascending=False)/len(df)

Unnamed: 16                      0.992341
Expansion Plans                  0.943857
Founder AngelList                0.900225
Founder Twitter                  0.884665
Founder LinkedIn                 0.665916
Founder Last Name                0.629596
Founder First Name               0.629111
Website                          0.261722
NOTES                            0.105493
HQ Location                      0.105389
Round                            0.104037
TOP INVESTORS (in this round)    0.104003
Category                         0.103864
LINK                             0.103864
DATE RAISE ANNOUNCED             0.069277
AMOUNT                           0.000243
COMPANY                          0.000069
dtype: float64

In [8]:
df['Unnamed: 16'].unique() # unnamed column is founder_email

array([nan, 'cameron@tradegecko.com', 'alberto.dalmasso@satispay.com',
       'duncan@xage.com', 'derekocarroll@brightpearl.com',
       'terry.drayton@livible.com', 'lars.albright@sessionm.com',
       'mcooper@skillshare.com', 'scott.mercer@voltacharging.com',
       'jfeast@cogitocorp.com', 'dpreston@metromile.com',
       'alex@parabola.io', 'natalies@viewpath.com', 'thar@ambersi.com',
       'jake@realtimecases.com', 'omer.molad@vervoe.com',
       'roy@bdsanalytics.com', 'nmondrow@lockstate.com',
       'john.kohl@tunego.com', 'raoulpal@realvision.com',
       'marc.castel@fiixsoftware.com', 'assaf@nanit.com',
       'melbourne@bestow.life', 'diego.farias@amuse.io',
       'mihkel@realeyesit.com', 'assaf@superpedestrian.com',
       'atomarchio@cuebiq.com', 'dgloba@tradingview.com',
       'manny.medina@outreach.io', 'alex.atallah@opensea.io',
       'georgene.huang@fairygodboss.com', 'jjoraanstad@myriadmobile.com',
       'ikkjin@molocoads.com', 'nate.phillips@nomnomnow.com',
  

As the column names are not clear, we rename these to identify which of these relate to the raise, company and founder specifically.

In [9]:
display(list(df.columns.values))

['DATE RAISE ANNOUNCED',
 'COMPANY',
 'AMOUNT',
 'HQ Location',
 'TOP INVESTORS (in this round)',
 'LINK',
 'Website',
 'Round ',
 'Category',
 'NOTES',
 'Expansion Plans',
 'Founder First Name',
 'Founder Last Name',
 'Founder LinkedIn',
 'Founder Twitter',
 'Founder AngelList',
 'Unnamed: 16']

In [10]:
df.rename(columns = {'DATE RAISE ANNOUNCED':'raise_date',
 'COMPANY':'company_name',
 'AMOUNT':'raise_amount_mill_dollars',
 'HQ Location':'company_hq_location',
 'TOP INVESTORS (in this round)':'raise_top_investors',
 'LINK':'raise_link',
 'Website':'company_website',
 'Round ':'company_funding_round',
 'Category': 'company_category',
 'NOTES':'company_description',
 'Expansion Plans':'company_expansion_plans',
 'Founder First Name':'founder_first_name',
 'Founder Last Name':'founder_last_name',
 'Founder LinkedIn':'founder_linkedin',
 'Founder Twitter':'founder_twitter',
 'Founder AngelList':'founder_angelist',
 'Unnamed: 16':'founder_email'},inplace=True)

In [11]:
display(list(df.columns.values))

['raise_date',
 'company_name',
 'raise_amount_mill_dollars',
 'company_hq_location',
 'raise_top_investors',
 'raise_link',
 'company_website',
 'company_funding_round',
 'company_category',
 'company_description',
 'company_expansion_plans',
 'founder_first_name',
 'founder_last_name',
 'founder_linkedin',
 'founder_twitter',
 'founder_angelist',
 'founder_email']

In [12]:
df.isnull().sum().sort_values(ascending=False)

founder_email                28634
company_expansion_plans      27235
founder_angelist             25976
founder_twitter              25527
founder_linkedin             19215
founder_last_name            18167
founder_first_name           18153
company_website               7552
company_description           3044
company_hq_location           3041
company_funding_round         3002
raise_top_investors           3001
company_category              2997
raise_link                    2997
raise_date                    1999
raise_amount_mill_dollars        7
company_name                     2
dtype: int64

In [13]:
"""
df.loc[df['company_funding_round'].isnull()] # filters rows where funding_round is null
"""

df = df[df['company_funding_round'].notna()] # select rows where funding_round is not empty
df = df.reset_index(drop=True) # reset index and remove old one
df.head()
df.shape

(25853, 17)

Inspecting the data, it looks like the data in the raise amount is a string, so we need to remove the $ and , characters so that the column can be converted to numeric type. 

The raise date also has some inconsistent values in, which will not convert to datetime objects - for the moment these are left in the data - as the data is ordered by date, it would be reasonable to use the previous entry date if the raise data is 'NaT' not a valid time.

In [14]:
df['company_hq_location'] = df['company_hq_location'].str.replace(',',';') # needed for csv upload to postgres
df['raise_top_investors'] = df['raise_top_investors'].str.replace(',',';') # needed for csv upload to postgres
df['company_description'] = df['company_description'].str.replace(',',';') # needed for csv upload to postgres
df['company_expansion_plans'] = df['company_expansion_plans'].str.replace(',',';') # needed for csv upload to postgres
df['founder_first_name'] = df['founder_first_name'].str.replace(',',';') # needed for csv upload to postgres
df['raise_link'] = df['raise_link'].str.replace(',',' ') # needed for csv upload to postgres
df['company_name'] = df['company_name'].str.replace(',',' ') # needed for csv upload to postgres

In [15]:
df['raise_amount_mill_dollars'] = df['raise_amount_mill_dollars'].map(lambda x: x.replace('$',''))
df['raise_amount_mill_dollars'] = df['raise_amount_mill_dollars'].map(lambda x: x.replace(',',''))
df.head()

Unnamed: 0,raise_date,company_name,raise_amount_mill_dollars,company_hq_location,raise_top_investors,raise_link,company_website,company_funding_round,company_category,company_description,company_expansion_plans,founder_first_name,founder_last_name,founder_linkedin,founder_twitter,founder_angelist,founder_email
0,8/12/2021,pumpspotting,1150000,Kittery; Me.,MooDoos Investments; participation from Maine ...,https://www.finsmes.com/2021/08/pumpspotting-r...,https://www.pumpspotting.com/,Seed,Femtech,community-driven breastfeeding support platform,,,,,,,
1,8/12/2021,BLDUP,2000000,Boston; Mass.,U,https://www.finsmes.com/2021/08/bldup-raises-2...,http://bldup.com/,Pre-Seed,Enterprise,system to highlight the relationship between b...,,,,,,,
2,8/12/2021,Sicona Battery,2700000,Sydney; Australia,Artesian; Riverstone Ventures; participation f...,https://www.finsmes.com/2021/08/sicona-battery...,http://www.siconabattery.com/,A,Energy,battery materials technology,,,,,,,
3,8/12/2021,Part Analytics,3000000,Milwaukee; Wisc.,MK Capital,https://www.finsmes.com/2021/08/part-analytics...,https://partanalytics.com/,U,Industrial,AI platform for sclaing global manufacturing c...,,,,,,,
4,8/12/2021,Anthill,3000000,Chicago; Ill.,Rethink Education; participation from Origin V...,https://www.finsmes.com/2021/08/anthill-raises...,https://www.anthillai.com/,Seed,Enterprise,talent management platform designed for remote...,,,,,,,


In [16]:
df.isnull().sum().sort_values(ascending=False)

founder_email                25632
company_expansion_plans      24233
founder_angelist             22976
founder_twitter              22525
founder_linkedin             16216
founder_last_name            15169
founder_first_name           15155
company_website               4554
company_description             47
company_hq_location             44
raise_top_investors              8
company_category                 1
company_name                     0
company_funding_round            0
raise_link                       0
raise_amount_mill_dollars        0
raise_date                       0
dtype: int64

In [17]:
df['raise_amount_mill_dollars'] = pd.to_numeric(df['raise_amount_mill_dollars']).astype(np.int64)
df['raise_amount_mill_dollars'] = df['raise_amount_mill_dollars'].map(lambda x: x/1_000_000)

df['raise_date'] = pd.to_datetime(df['raise_date'], errors='coerce', format="%m/%d/%Y")
df['raise_date'] = df['raise_date'].bfill(axis=0) # back fill incorrect dates
df.dtypes

raise_date                   datetime64[ns]
company_name                         object
raise_amount_mill_dollars           float64
company_hq_location                  object
raise_top_investors                  object
raise_link                           object
company_website                      object
company_funding_round                object
company_category                     object
company_description                  object
company_expansion_plans              object
founder_first_name                   object
founder_last_name                    object
founder_linkedin                     object
founder_twitter                      object
founder_angelist                     object
founder_email                        object
dtype: object

In [18]:
df['raise_date'].dt.strftime('%Y-%M-%d')

0        2021-00-12
1        2021-00-12
2        2021-00-12
3        2021-00-12
4        2021-00-12
            ...    
25848    2016-00-07
25849    2016-00-09
25850    2016-00-08
25851    2016-00-09
25852    2016-00-10
Name: raise_date, Length: 25853, dtype: object

In [19]:
df.head()

Unnamed: 0,raise_date,company_name,raise_amount_mill_dollars,company_hq_location,raise_top_investors,raise_link,company_website,company_funding_round,company_category,company_description,company_expansion_plans,founder_first_name,founder_last_name,founder_linkedin,founder_twitter,founder_angelist,founder_email
0,2021-08-12,pumpspotting,1.15,Kittery; Me.,MooDoos Investments; participation from Maine ...,https://www.finsmes.com/2021/08/pumpspotting-r...,https://www.pumpspotting.com/,Seed,Femtech,community-driven breastfeeding support platform,,,,,,,
1,2021-08-12,BLDUP,2.0,Boston; Mass.,U,https://www.finsmes.com/2021/08/bldup-raises-2...,http://bldup.com/,Pre-Seed,Enterprise,system to highlight the relationship between b...,,,,,,,
2,2021-08-12,Sicona Battery,2.7,Sydney; Australia,Artesian; Riverstone Ventures; participation f...,https://www.finsmes.com/2021/08/sicona-battery...,http://www.siconabattery.com/,A,Energy,battery materials technology,,,,,,,
3,2021-08-12,Part Analytics,3.0,Milwaukee; Wisc.,MK Capital,https://www.finsmes.com/2021/08/part-analytics...,https://partanalytics.com/,U,Industrial,AI platform for sclaing global manufacturing c...,,,,,,,
4,2021-08-12,Anthill,3.0,Chicago; Ill.,Rethink Education; participation from Origin V...,https://www.finsmes.com/2021/08/anthill-raises...,https://www.anthillai.com/,Seed,Enterprise,talent management platform designed for remote...,,,,,,,


In [20]:
df[df['raise_date'].isnull()]

Unnamed: 0,raise_date,company_name,raise_amount_mill_dollars,company_hq_location,raise_top_investors,raise_link,company_website,company_funding_round,company_category,company_description,company_expansion_plans,founder_first_name,founder_last_name,founder_linkedin,founder_twitter,founder_angelist,founder_email


#### Checking data within columns

There are a number of incorrect manual entries in the category column - in the short term, these are replaced for more consistent groupings. However, in practice validation should be added to inputs for consistency.

In [21]:
df['company_category'].replace({
        'Agtech':'Agritech', 'Fermtech':'Femtech', 'COnsumer':'Consumer',
        'Foodtecvh':'Foodtech', 'INsurance':'Insurance','biotech/Health':'Biotech/Health',
        'insurtech':'Insurtech','ADtech/Martech':'Adtech/Martech','FIntech':'Fintech', 
        'enterprise':'Enterprise','Analyitcs':'Analytics', 'Consumert':'Consumer', 
        'Enteprrise':'Enterprise', 'Cosnumer':'Consumer', 'Energyteech':'Energytech',
        'industrial':'Industrial', 'INdustrial':'Industrial','Aerospacee':'Aerospace',
        'consumer':'Consumer', 'Ai/ML':'AI/ML', 'DevOPs':'DevOps', 
        'cannabis':'Cannabis', 'Data/Analtics':'Data/Analytics','Aadtech/Martech':'Adtech/Martech', 
        'ENterprise':'Enterprise', 'CLimatetech':'Climatetech', 'BIotech/Health':'Biotech/Health', 
        'Transportationn':'Trasportation', 'INsurtech':'Insuretech', 'Aeroospace':'Aerospace', 
        'Coonsumer':'Consumer', 'Logistcs':'Logistics','Fitnech':'Fintech', 
        'DEvOps':'Devops','Transportaiton':'Transportation', 'Transportatioon':'Transportation',
        'AI//ML':'AI/ML', 'Adtech/Marthech':'Adtech/Martech','Cybersercurity':'Cybersecurity', 
        'Insurtech ':'Insurtech','transportation':'Transportation', 'Consumer ':'Consumer',
        'Cyber-Security':'Cybersecurity', 'Ed-tech':'Edtech','Ed-Tech':'Edtech', 'Agech':'Agritech',
        'Cybsersecurity':'Cybersecurity','Roboticis':'Robotics','Finteech':'Fintech',
        'Eneterprise':'Enterprise', 'Cybesecurity':'Cybersecurity','Ai/mL':'AI/ML', 
        'Adtech/Marthec':'Adtech/Martech','BIOtech/Health':'Biotech/Health', 'data/Analytics':'Data/Analytics',
        'fintech':'Fintech', 'BLockchain':'Blockchain','Ccannabis':'Cannabis', 'robotics':'Robotics', 
        'Spacetechc':'Spacetech', 'Data/Analttics':'Data/Analytics','Consumer Internnet':'Consumer Internet', 
        'adtech/Martech':'Adtech/Martech', 'Biotech/Healthy':'Biotech/Health', 'COnsumer Internet':'Consumer Internet', 
        'Space':'Spacetech', 'DevOpds':'DevOps', 'Enterpris':'Enterprise','Consuner Internet':'Consumer Internet', 
        'Lifestyler':'Lifestyle','SpaceTech':'Spacetech','Govtech':'GovTech','Insuretech':'Insurtech',
        'Regtech':'RegTech', 'Enterprise Solution':'Enterprise Solutions','Martech':'Adtech/Martech', 
        'Transporatation':'Transportation','Biotech/Helath':'Biotech/Health', 'Devops':'DevOps',
        'Cybersecurity ':'Cybersecurity','Consumer INternet':'Consumer Internet', 'CYbersecurity':'Cybersecurity',
        'Ai/Machine Learning':'AI/ML', 'consumer Internet':'Consumer Internet', 'Enterprising':'Enterprise Solutions',
        'DAta/Analytics':'Data/Analytics', 'Enterprise solutions':'Enterprise Solutions','AI/Machine Learnign':'AI/ML',
        'bIOtech/Health':'Biotech/Health','bIotech/Health':'Biotech/Health', 'DEv':'Dev',
        'Quantum Computing ':'Quantum Computing','AI/Machine Leaning':'AI/ML', 'gaming':'Gaming',
        'Data/StorageAnalytics':'Data/Analytics', 'Consumer Interet':'Consumer Internet', 'Devops':'DevOps',
        'Cybersecuity':'Cybersecurity', 'AI/Machine Learing':'AI/ML', 'Data/Anlalytics':'Data/Analytics',
        'CLeantech':'Cleantech', 'Adtech':'Adtech/Martech', 'Footech':'Foodtech', 'COnsumer INternet':'Consumer Internet',
        'Bioetech/Health':'Biotech/Health', 'Data':'Data/Analytics', 'edtech':'Edtech', 'AR/Vr':'AR/VR', 
        'Enteprise':'Enterprise', 'Industry.40':'Industry4.0', 'Biotech/Heath':'Biotech/Health', 
        'Cyberecurity':'Cybersecurity', 'Food Tech':'Foodtech', 'Blochchain':'Blockchain', 'Gamng':'Gaming',
        'Customer Internet':'Consumer Internet', 'Entperprise':'Enterprise', 'PRoptech':'Proptech',
        'Real Esate':'Real estate', 'Trasportation':'Transportation', 'Edtec':'Edtech',
        'Ar/VR':'AR/VR', 'LIfestyle':'Lifestyle', 'lifestyle':'Lifestyle', 'Isurtech':'Insurtech',
        'Cleawntech':'Cleantech', 'Consumer Internt':'Consumer Internet', 'industral':'Industrial',
        'Industrials':'Industrial', 'Data/Analytis':'Data/Analytics', 'proptech':'Proptech',
        'Cybersecurit':'Cybersecurity', 'SPacetech':'Spacetech', 'REal Estate':'Real estate',
        'Insurteh':'Insurtech', 'INdustry4.0':'Industry4.0', 'RObotics':'Robotics',
        'Customer Products':'Consumer Products', 'Cybersecurirty':'Cybersecurity','cOnsumer Internet':'Consumer Internet',
        'Consumer Intenet':'Consumer Internet', 'Enterpise':'Enterprise', 'Biotech/Heatlh':'Biotech/Health',
        'Industral':'Industrial', 'dev':'Dev', 'Clean Tech':'CleanTech', 'Consumer Goods': 'Consumer Products',
        'Socialtecch':'Socialtech', 'blockchain':'Blockchain', 'AIML':'AI/ML', 'COnsumer Products':'Consumer Products',
        'FOodtech':'Foodtech', 'Gamers':'Gaming', 'Consumner Internet':'Consumer Internet',
        'Insrutech':'Insurtech', 'Insuretech':'Insurtech', 'Real estate':'Real Estate',
        'Real Estate Tech':'Proptech', 'Proptech':'PropTech','FinTech':'Fintech', 'Transporation':'Transportation',
        'TravelTech':'Traveltech','Telecom':'Telecoms', 'CleanTech':'Cleantech',
        'ClimateTech':'Climatetech','GreenTech':'Greentech', 'LegalTech':'Legaltech', 
        'SportTech':'SportsTech', 'Sportstech':'SportsTech','Sporttech':'SportsTech', 'DeepTech':'Deeptech',
        'Socialtech':'SocialTech','SalesTech':'Salestech', 'Biotech/Health ':'Biotech/Health',
        'Analytics':'Data/Analytics', 'Robots':'Robotics', 'Consumer products':'Consumer Products', 
        'Insuretech ':'Insurtech', 'Devops':'DevOps', 'Proptech':'PropTech'
}, inplace=True)

In [22]:
pd.options.display.max_rows = 1000
df['company_category'].value_counts()
# df['company_category'].unique()

Biotech/Health            4941
Enterprise                4687
Fintech                   2628
Consumer Internet         1846
Cybersecurity             1281
Edtech                     643
Industrial                 640
Data/Analytics             634
Consumer                   604
Adtech/Martech             598
Transportation             527
Blockchain                 451
Lifestyle                  426
PropTech                   420
DevOps                     398
Foodtech                   375
AI/Machine Learning        369
Insurtech                  362
Gaming                     330
Agritech                   327
AI/ML                      320
Cleantech                  294
Logistics                  276
Dev                        252
Robotics                   245
AR/VR                      217
Industry4.0                209
Cannabis                   138
Energy                     135
SocialTech                 124
Deeptech                   109
Data/Storage               108
Spacetec

In [23]:
# cat_table = df[['company_category','raise_amount_dollars']].groupby('company_category').count()/1_000_000


cat_table = df[['company_category','raise_amount_mill_dollars']].groupby('company_category').agg({'raise_amount_mill_dollars': ['mean','count']})
cat_table.head()

Unnamed: 0_level_0,raise_amount_mill_dollars,raise_amount_mill_dollars
Unnamed: 0_level_1,mean,count
company_category,Unnamed: 1_level_2,Unnamed: 2_level_2
AI/ML,38.361781,320
AI/Machine Learning,32.649986,369
AR/VR,22.928659,217
Adtech/Martech,24.155587,598
Aerospace,49.018302,53


In [24]:
save_url = "data/funding_data_clean.csv"
df.to_csv(save_url)

In [25]:
df= df.head(9500) # Keep max 10K entries for free heroku tier
df

Unnamed: 0,raise_date,company_name,raise_amount_mill_dollars,company_hq_location,raise_top_investors,raise_link,company_website,company_funding_round,company_category,company_description,company_expansion_plans,founder_first_name,founder_last_name,founder_linkedin,founder_twitter,founder_angelist,founder_email
0,2021-08-12,pumpspotting,1.15,Kittery; Me.,MooDoos Investments; participation from Maine ...,https://www.finsmes.com/2021/08/pumpspotting-r...,https://www.pumpspotting.com/,Seed,Femtech,community-driven breastfeeding support platform,,,,,,,
1,2021-08-12,BLDUP,2.00,Boston; Mass.,U,https://www.finsmes.com/2021/08/bldup-raises-2...,http://bldup.com/,Pre-Seed,Enterprise,system to highlight the relationship between b...,,,,,,,
2,2021-08-12,Sicona Battery,2.70,Sydney; Australia,Artesian; Riverstone Ventures; participation f...,https://www.finsmes.com/2021/08/sicona-battery...,http://www.siconabattery.com/,A,Energy,battery materials technology,,,,,,,
3,2021-08-12,Part Analytics,3.00,Milwaukee; Wisc.,MK Capital,https://www.finsmes.com/2021/08/part-analytics...,https://partanalytics.com/,U,Industrial,AI platform for sclaing global manufacturing c...,,,,,,,
4,2021-08-12,Anthill,3.00,Chicago; Ill.,Rethink Education; participation from Origin V...,https://www.finsmes.com/2021/08/anthill-raises...,https://www.anthillai.com/,Seed,Enterprise,talent management platform designed for remote...,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9495,2019-12-18,Pathios Therapeutics,8.80,Oxford; UK,Canaan; Brandon Capital,http://www.finsmes.com/2019/12/pathios-therape...,http://www.pathios.com/,A,Biotech/Health,therapies for autoimmune diseases and cancer,,,,,,,
9496,2019-12-18,BillingPlatform,10.00,Denver; CO,Columbia Capital,http://www.finsmes.com/2019/12/billingplatform...,https://billingplatform.com/,B,Fintech,cloud monetization and billing software,,,,,,,
9497,2019-12-18,Teraki,11.00,Berlin; Germany,Horizons Ventures; participation from Auto Lab...,http://www.finsmes.com/2019/12/teraki-secures-...,https://teraki.com/,A,Industrial,industrial/auto IoT platform,,,,,,,
9498,2019-12-18,BC Platform,15.00,Zurich; Switzerland,IQVIA; participation from Debiopharm Innovatio...,http://www.finsmes.com/2019/12/bc-platforms-ra...,http://www.bcplatforms.com/,U,Biotech/Health,genomic data management and analytics,,,,,,,


In [26]:
save_url = "data/funding_data_cleancut.csv"
df.to_csv(save_url)

In [28]:
df.dtypes

raise_date                   datetime64[ns]
company_name                         object
raise_amount_mill_dollars           float64
company_hq_location                  object
raise_top_investors                  object
raise_link                           object
company_website                      object
company_funding_round                object
company_category                     object
company_description                  object
company_expansion_plans              object
founder_first_name                   object
founder_last_name                    object
founder_linkedin                     object
founder_twitter                      object
founder_angelist                     object
founder_email                        object
dtype: object