In [2]:
# Dependencies
import numpy as np
import pandas as pd
import seaborn as sns
import random as rd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from matplotlib.figure import Figure
from datetime import datetime

np.random.seed(sum(map(ord, "aesthetics")))

In [3]:
# ORGANIZING THE DATA 1: the first step to obtain the correct data is to convert the file
# companies.csv into a pandas dataframe.

In [4]:
# Read csv file companies into a dataframe 
csv_file = "companies.csv"
companies_df = pd.read_csv(csv_file, encoding="iso-8859-1",
                           parse_dates=["Founded Date","Closed Date","Last Funding Date"])

# Sort dataframe by company's name
companies_df = companies_df.sort_values("Company Name").reset_index(drop=True)

In [5]:
# Display dataframe to verify information
print(len(companies_df.index))
companies_df.head()

8664


Unnamed: 0,Company Name,Company Name URL,Category Groups,Headquarters Location,Description,Crunchbase Rank,Founded Date,Closed Date,Number of Funding Rounds,Last Funding Date,Last Funding Amount,Last Funding Type,Total Funding Amount,Status
0,#waywire,https://www.crunchbase.com/organization/waywire,"Media and Entertainment, Government and Milita...","New York, New York, United States",#waywire is an online community focused on soc...,21987,2012-06-01,NaT,1,2012-06-30,"$1,750,000",Seed,"$1,750,000",Was Acquired
1,*gram Labs,https://www.crunchbase.com/organization/gram-labs,"Science and Engineering, Hardware, Data and An...","New York, New York, United States","GramLabs is a start up, leveraging deep learni...",11897,2015-08-01,NaT,1,2016-07-27,"$1,000,000",Seed,"$1,000,000",Operating
2,.io,https://www.crunchbase.com/organization/io-rea...,Data and Analytics,"Alexandria, Virginia, United States",.io is a real-time intelligence platform.,16844,2015-03-01,NaT,1,2015-10-01,"$1,500,000",Seed,"$1,500,000",Operating
3,/dev/color,https://www.crunchbase.com/organization/dev-color,,"Menlo Park, California, United States",/dev/color is a non-profit organization that a...,35003,2015-05-01,NaT,1,2016-08-23,"$120,000",Seed,"$120,000",Operating
4,10 By 10,https://www.crunchbase.com/organization/10-by-10,"Software, Professional Services","San Francisco, California, United States",10by10 builds a marketplace to more quickly ma...,59822,2015-01-01,NaT,1,2017-07-01,"$120,000",Seed,"$120,000",Operating


In [6]:
# Display dataframe columns to verify information
companies_df.columns

Index(['Company Name', 'Company Name URL', 'Category Groups',
       'Headquarters Location', 'Description', 'Crunchbase Rank',
       'Founded Date', 'Closed Date', 'Number of Funding Rounds',
       'Last Funding Date', 'Last Funding Amount', 'Last Funding Type',
       'Total Funding Amount', 'Status'],
      dtype='object')

In [7]:
# Reorganizing dataframe with relavante information
companies_df = companies_df[['Company Name','Crunchbase Rank','Founded Date','Closed Date','Total Funding Amount','Status']]
companies_df.head()

Unnamed: 0,Company Name,Crunchbase Rank,Founded Date,Closed Date,Total Funding Amount,Status
0,#waywire,21987,2012-06-01,NaT,"$1,750,000",Was Acquired
1,*gram Labs,11897,2015-08-01,NaT,"$1,000,000",Operating
2,.io,16844,2015-03-01,NaT,"$1,500,000",Operating
3,/dev/color,35003,2015-05-01,NaT,"$120,000",Operating
4,10 By 10,59822,2015-01-01,NaT,"$120,000",Operating


In [8]:
# ORGANIZING THE DATA 2: Dataframe companies is missing some important information, 
# such as Crunchbase UUID and companies' homepage. 
# In order to fix this problem, we will gather the info from json.

# In order to fix this problem, we will combine two dataframes - companies and organizations.

In [9]:
# import json
# import requests as req

# # Created my api key for Crunchbase
# url = "https://api.crunchbase.com/v/3/funding-rounds/?user_key=" ### RACHEL ###
# api_key = "8a3efb5b5136f95bff0593ebb94994e3"

# # Build query URL
# query_url = url + api_key

# # Get Crunchbase data
# org_json = req.get(query_url).json()

# # Get the temperature from the response
# print("The Crunchbase API responded with: " + json.dumps(org_json, indent=2) + ".")
# print("org_json keys: ", org_json.keys())
# print(len(org_json))

In [10]:
# #creates a new dataframe that's empty
# fund_df = pd.DataFrame() 
# fund_df["uuid"] = "" ### RACHEL ###
# fund_df["Homepage"] = "" ### RACHEL ###

# index = 0

# # Created my api key for Crunchbase
# url = "https://api.crunchbase.com/v/3/funding-rounds/?user_key=" ### RACHEL ###
# api_key = "8a3efb5b5136f95bff0593ebb94994e3"

# # get 10 pages of org items
# for x in range(1,11):
#     # Build query URL
#     query_url = url + api_key + "&page=" + str(x)
#     # Get Crunchbase data
#     funding_json = req.get(query_url).json()
#     print(query_url)

#     # Loop through a page of items  ### RACHEL ###
#     for i in range(0,len(funding_json["data"]["items"])):
#         # Save the user class info into variables
#         uuid  = funding_json["data"]["items"][i]["uuid"]
#         homepage  = funding_json["data"]["items"][i]["properties"]["homepage"]
        
               
#         ### RACHEL ###
#         fund_df.set_value(index, "uuid", uuid)
#         fund_df.set_value(index, "Homepage", homepage)
#         index += 1

# fund_df.sort_values("uuid")    
# fund_df.head(10)

In [11]:
# funds = fund_df.rename(columns={
#     "uuid": "UUID"   
# })

# funds.head()

In [12]:
# data_set = pd.merge(merged_df, funds,on="UUID")
# data_set.head(10)

In [13]:
# END OF JSON

In [14]:
# Read csv file organizations into a dataframe and display it
org = "organizations.csv"
org_df = pd.read_csv(org, encoding="iso-8859-1")
org_df.head()                              

Unnamed: 0.1,Unnamed: 0,UUID,Company Name,Homepage
0,352124,ea359946-4db6-5eee-b466-bd0774491e2d,1,http://y.com
1,24310,019602e2-8727-7fc1-af9f-4134dc629ca5,39,http://39inc.com
2,62273,01eb53db-bcee-c57f-3ab1-54fb53f6de65,55,http://www.fifty-five.com
3,231876,c1723121-d8a2-457c-1b81-25fc42e78d95,99,http://www.99taxis.com
4,402099,934bd25a-4b91-9d78-12c3-524c679d3e69,101,http://www.101edu.co/


In [15]:
# To better display and manipulate the data we will eliminate the "-" from the column UUID.
for i, el in enumerate(org_df.iterrows()):
    val = el[1]['UUID'].replace("-", "")
    org_df.set_value(i, "UUID", val)

In [16]:
# Display dataframe columns to verify information    
org_df.head()

Unnamed: 0.1,Unnamed: 0,UUID,Company Name,Homepage
0,352124,ea3599464db65eeeb466bd0774491e2d,1,http://y.com
1,24310,019602e287277fc1af9f4134dc629ca5,39,http://39inc.com
2,62273,01eb53dbbceec57f3ab154fb53f6de65,55,http://www.fifty-five.com
3,231876,c1723121d8a2457c1b8125fc42e78d95,99,http://www.99taxis.com
4,402099,934bd25a4b919d7812c3524c679d3e69,101,http://www.101edu.co/


In [17]:
# Merge both dataframes into one and display contents
merged_df = pd.merge(companies_df, org_df,how="inner",on="Company Name")
merged_df.head()

Unnamed: 0.1,Company Name,Crunchbase Rank,Founded Date,Closed Date,Total Funding Amount,Status,Unnamed: 0,UUID,Homepage
0,#waywire,21987,2012-06-01,NaT,"$1,750,000",Was Acquired,81917,3ca068193ca9ce42ce20f0e150d563a0,http://www.waywire.com
1,*gram Labs,11897,2015-08-01,NaT,"$1,000,000",Operating,403731,18e8dd5efe1d5f8ef2edb254171d7f5d,http://www.gramlabs.ai
2,.io,16844,2015-03-01,NaT,"$1,500,000",Operating,324450,cf169e68c903c0898789bdd557d6a4c1,https://onthe.io
3,/dev/color,35003,2015-05-01,NaT,"$120,000",Operating,328998,165f412c361d7931fd3bcf5e2a1ccef3,http://www.devcolor.org/
4,10 By 10,59822,2015-01-01,NaT,"$120,000",Operating,496623,e1670a304ff5893d0ae3ceb761e1c251,https://www.10by10.io/


In [18]:
# Display dataframe columns to verify information
merged_df.columns

Index(['Company Name', 'Crunchbase Rank', 'Founded Date', 'Closed Date',
       'Total Funding Amount', 'Status', 'Unnamed: 0', 'UUID', 'Homepage'],
      dtype='object')

In [19]:
# Reorganizing dataframe with relavante information
merged_df = merged_df[['Company Name','UUID','Homepage','Crunchbase Rank','Founded Date','Closed Date','Total Funding Amount','Status']]
merged_df.head()

Unnamed: 0,Company Name,UUID,Homepage,Crunchbase Rank,Founded Date,Closed Date,Total Funding Amount,Status
0,#waywire,3ca068193ca9ce42ce20f0e150d563a0,http://www.waywire.com,21987,2012-06-01,NaT,"$1,750,000",Was Acquired
1,*gram Labs,18e8dd5efe1d5f8ef2edb254171d7f5d,http://www.gramlabs.ai,11897,2015-08-01,NaT,"$1,000,000",Operating
2,.io,cf169e68c903c0898789bdd557d6a4c1,https://onthe.io,16844,2015-03-01,NaT,"$1,500,000",Operating
3,/dev/color,165f412c361d7931fd3bcf5e2a1ccef3,http://www.devcolor.org/,35003,2015-05-01,NaT,"$120,000",Operating
4,10 By 10,e1670a304ff5893d0ae3ceb761e1c251,https://www.10by10.io/,59822,2015-01-01,NaT,"$120,000",Operating


In [20]:
# ORGANIZING THE DATA 3: We now are going to combine another dataframe: funding_rounds. 
# After that we should have all of our data ready to be use.

In [21]:
# Read csv file companies into a dataframe 
csv_file = "funding_rounds.csv"
funds_df = pd.read_csv(csv_file, encoding="iso-8859-1",
                           parse_dates=["Announced On Date"])

# Sort dataframe by company's name
funds_df = funds_df.sort_values("Company Name").reset_index(drop=True)

# Display dataframe to verify information
print(len(funds_df.index))
funds_df.head()

10901


Unnamed: 0,Company Name,Company Name URL,Funding Type,Money Raised,Announced On Date
0,#waywire,https://www.crunchbase.com/organization/waywire,Seed,"$1,750,000",2012-06-30
1,*gram Labs,https://www.crunchbase.com/organization/gram-labs,Seed,"$1,000,000",2016-07-27
2,.io,https://www.crunchbase.com/organization/io-rea...,Seed,"$1,500,000",2015-10-01
3,/dev/color,https://www.crunchbase.com/organization/dev-color,Seed,"$120,000",2016-08-23
4,10 By 10,https://www.crunchbase.com/organization/10-by-10,Seed,"$120,000",2017-07-01


In [22]:
# Merge both dataframes into one and display contents
all_data_df = pd.merge(merged_df, funds_df,how="inner",on="Company Name")
all_data_df.head(100)

Unnamed: 0,Company Name,UUID,Homepage,Crunchbase Rank,Founded Date,Closed Date,Total Funding Amount,Status,Company Name URL,Funding Type,Money Raised,Announced On Date
0,#waywire,3ca068193ca9ce42ce20f0e150d563a0,http://www.waywire.com,21987,2012-06-01,NaT,"$1,750,000",Was Acquired,https://www.crunchbase.com/organization/waywire,Seed,"$1,750,000",2012-06-30
1,*gram Labs,18e8dd5efe1d5f8ef2edb254171d7f5d,http://www.gramlabs.ai,11897,2015-08-01,NaT,"$1,000,000",Operating,https://www.crunchbase.com/organization/gram-labs,Seed,"$1,000,000",2016-07-27
2,.io,cf169e68c903c0898789bdd557d6a4c1,https://onthe.io,16844,2015-03-01,NaT,"$1,500,000",Operating,https://www.crunchbase.com/organization/io-rea...,Seed,"$1,500,000",2015-10-01
3,/dev/color,165f412c361d7931fd3bcf5e2a1ccef3,http://www.devcolor.org/,35003,2015-05-01,NaT,"$120,000",Operating,https://www.crunchbase.com/organization/dev-color,Seed,"$120,000",2016-08-23
4,10 By 10,e1670a304ff5893d0ae3ceb761e1c251,https://www.10by10.io/,59822,2015-01-01,NaT,"$120,000",Operating,https://www.crunchbase.com/organization/10-by-10,Seed,"$120,000",2017-07-01
5,10-4 Systems,35ca612cf370d2dfe3db63554192f84a,https://www.10-4.com/,6618,2012-01-01,NaT,"$13,900,000",Operating,https://www.crunchbase.com/organization/10-4-s...,Series A,"$13,900,000",2016-06-27
6,10X Genomics,cf3b00cfffbf0e4f6427a60e5060de39,http://10xgenomics.com,3393,2012-01-01,NaT,"$113,000,000",Operating,https://www.crunchbase.com/organization/10x-ge...,Series B,"$55,000,000",2015-01-12
7,10X Genomics,cf3b00cfffbf0e4f6427a60e5060de39,http://10xgenomics.com,3393,2012-01-01,NaT,"$113,000,000",Operating,https://www.crunchbase.com/organization/10x-ge...,Series C,"$55,000,000",2016-03-17
8,10X Genomics,cf3b00cfffbf0e4f6427a60e5060de39,http://10xgenomics.com,3393,2012-01-01,NaT,"$113,000,000",Operating,https://www.crunchbase.com/organization/10x-ge...,Series A,"$3,000,000",2012-10-08
9,10sec,79fd01f688736282a9ea27622d0bbdcb,https://10s.ec/,24952,2013-07-08,NaT,"$1,600,000",Operating,https://www.crunchbase.com/organization/10sec,Seed,"$1,600,000",2014-05-08


In [23]:
all_data_df.columns

Index(['Company Name', 'UUID', 'Homepage', 'Crunchbase Rank', 'Founded Date',
       'Closed Date', 'Total Funding Amount', 'Status', 'Company Name URL',
       'Funding Type', 'Money Raised', 'Announced On Date'],
      dtype='object')

In [24]:
# Reorganizing dataframe with relavante information
all_data_df = all_data_df[['Company Name', 'UUID', 'Homepage', 'Crunchbase Rank', 'Founded Date',
       'Closed Date', 'Total Funding Amount', 'Status',
       'Funding Type', 'Money Raised', 'Announced On Date']]
all_data_df.head()

Unnamed: 0,Company Name,UUID,Homepage,Crunchbase Rank,Founded Date,Closed Date,Total Funding Amount,Status,Funding Type,Money Raised,Announced On Date
0,#waywire,3ca068193ca9ce42ce20f0e150d563a0,http://www.waywire.com,21987,2012-06-01,NaT,"$1,750,000",Was Acquired,Seed,"$1,750,000",2012-06-30
1,*gram Labs,18e8dd5efe1d5f8ef2edb254171d7f5d,http://www.gramlabs.ai,11897,2015-08-01,NaT,"$1,000,000",Operating,Seed,"$1,000,000",2016-07-27
2,.io,cf169e68c903c0898789bdd557d6a4c1,https://onthe.io,16844,2015-03-01,NaT,"$1,500,000",Operating,Seed,"$1,500,000",2015-10-01
3,/dev/color,165f412c361d7931fd3bcf5e2a1ccef3,http://www.devcolor.org/,35003,2015-05-01,NaT,"$120,000",Operating,Seed,"$120,000",2016-08-23
4,10 By 10,e1670a304ff5893d0ae3ceb761e1c251,https://www.10by10.io/,59822,2015-01-01,NaT,"$120,000",Operating,Seed,"$120,000",2017-07-01


In [25]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
all_series = all_data_df.groupby(['Funding Type','Money Raised']) #.sem()["Tumor Volume (mm3)"]

In [26]:
all_series.head()

Unnamed: 0,Company Name,UUID,Homepage,Crunchbase Rank,Founded Date,Closed Date,Total Funding Amount,Status,Funding Type,Money Raised,Announced On Date
0,#waywire,3ca068193ca9ce42ce20f0e150d563a0,http://www.waywire.com,21987,2012-06-01,NaT,"$1,750,000",Was Acquired,Seed,"$1,750,000",2012-06-30
1,*gram Labs,18e8dd5efe1d5f8ef2edb254171d7f5d,http://www.gramlabs.ai,11897,2015-08-01,NaT,"$1,000,000",Operating,Seed,"$1,000,000",2016-07-27
2,.io,cf169e68c903c0898789bdd557d6a4c1,https://onthe.io,16844,2015-03-01,NaT,"$1,500,000",Operating,Seed,"$1,500,000",2015-10-01
3,/dev/color,165f412c361d7931fd3bcf5e2a1ccef3,http://www.devcolor.org/,35003,2015-05-01,NaT,"$120,000",Operating,Seed,"$120,000",2016-08-23
4,10 By 10,e1670a304ff5893d0ae3ceb761e1c251,https://www.10by10.io/,59822,2015-01-01,NaT,"$120,000",Operating,Seed,"$120,000",2017-07-01
5,10-4 Systems,35ca612cf370d2dfe3db63554192f84a,https://www.10-4.com/,6618,2012-01-01,NaT,"$13,900,000",Operating,Series A,"$13,900,000",2016-06-27
6,10X Genomics,cf3b00cfffbf0e4f6427a60e5060de39,http://10xgenomics.com,3393,2012-01-01,NaT,"$113,000,000",Operating,Series B,"$55,000,000",2015-01-12
7,10X Genomics,cf3b00cfffbf0e4f6427a60e5060de39,http://10xgenomics.com,3393,2012-01-01,NaT,"$113,000,000",Operating,Series C,"$55,000,000",2016-03-17
8,10X Genomics,cf3b00cfffbf0e4f6427a60e5060de39,http://10xgenomics.com,3393,2012-01-01,NaT,"$113,000,000",Operating,Series A,"$3,000,000",2012-10-08
9,10sec,79fd01f688736282a9ea27622d0bbdcb,https://10s.ec/,24952,2013-07-08,NaT,"$1,600,000",Operating,Seed,"$1,600,000",2014-05-08
