In [1]:
# Libraries used
from pymongo import MongoClient
import pandas as pd
from pandas.io.json import json_normalize
import requests

In [2]:
client = MongoClient ('localhost', 27017) 
db = client['companies']
cursor2 = db.companies.find({'$and': 
                             [{'number_of_employees': {'$gt': 1}},
                              {'deadpooled_year': {'$eq': None}},
                              {'offices.latitude': {'$exists': True,'$ne': None}},
                              {'offices.longitude': {'$exists': True,'$ne': None}},
                              {'funding_rounds': {'$exists': True,'$ne': None}},

                        {'$or': 
                               [{'category_code':'games_video'},
                                {'category_code':'software'},
                                {'category_code':'web'},
                               {'category_code':'social'}]
                        }]}
                            ,{'name':1, 'category_code':1, 'founded_year': 1, '_id': 0, 'number_of_employees':1, 
                              'offices.latitude':1, 'offices.longitude':1, 'offices.zip_code':1, 'ipo':1, 
                              'deadpooled_year':1, 'category_code':1, 'funding_rounds.funded_year':1, 
                              'funding_rounds.round_code':1, 'funding_rounds.raised_amount':1, 
                              'funding_rounds.raised_currency_code':1}) 

data2 = json_normalize(data = cursor2, record_path = 'funding_rounds', meta = ['name'], 
                      errors='ignore', record_prefix='_')

data2 = data2.fillna(0)
data2.head()

Unnamed: 0,_funded_year,_raised_amount,_raised_currency_code,_round_code,name
0,2007.0,1500000.0,USD,a,Geni
1,2007.0,10000000.0,USD,b,Geni
2,2009.0,5000000.0,USD,c,Geni
3,2007.0,1500000.0,USD,a,MeetMoi
4,2008.0,1500000.0,USD,b,MeetMoi


In [3]:
data2['_raised_currency_code'].value_counts()

USD    1229
0        99
EUR      37
GBP      16
CAD       3
SEK       1
Name: _raised_currency_code, dtype: int64

In [4]:
url = 'https://api.exchangerate-api.com/v4/latest/USD'
response = requests.get(url)
currency_data = response.json()

In [5]:
data2['currency_change']=data2['_raised_currency_code'].map(currency_data['rates'])
data2 = data2.fillna(1)

In [6]:
data2['raised_amount_USD']=data2['_raised_amount']*data2['currency_change']
data2.head()

Unnamed: 0,_funded_year,_raised_amount,_raised_currency_code,_round_code,name,currency_change,raised_amount_USD
0,2007.0,1500000.0,USD,a,Geni,1.0,1500000.0
1,2007.0,10000000.0,USD,b,Geni,1.0,10000000.0
2,2009.0,5000000.0,USD,c,Geni,1.0,5000000.0
3,2007.0,1500000.0,USD,a,MeetMoi,1.0,1500000.0
4,2008.0,1500000.0,USD,b,MeetMoi,1.0,1500000.0


In [7]:
data2=data2[['name', '_funded_year','_round_code', 'raised_amount_USD']]

In [8]:
data2['_round_code'].value_counts()

a                   301
unattributed        227
seed                210
b                   209
angel               156
c                   104
debt_round           71
d                    40
partial              22
private_equity       17
e                    10
secondary_market      5
grant                 4
f                     3
crowd                 2
post_ipo_equity       2
convertible           1
g                     1
Name: _round_code, dtype: int64

In [9]:
def drop(round_code):
    return data2.drop(data2[data2['_round_code']==round_code].index)

data2 = drop('debt_round')
data2 = drop('grant')
data2 = drop('secondary_market')
data2 = drop('post_ipo_equity')
data2 = drop('convertible')
data2 = drop('partial')

In [10]:
data2['last_raised_amount_MnUSD'] = (data2['raised_amount_USD']/(10**6)).round(1)
data2['total_raised_MnUSD'] = (data2.groupby('name')['last_raised_amount_MnUSD'].transform('sum')).round(1)
data2['round_average_MnUSD'] = (data2.groupby('name')['last_raised_amount_MnUSD'].transform('mean')).round(1)
data2['rounds'] = (data2['total_raised_MnUSD']/data2['round_average_MnUSD'])
data2['rounds'] = data2['rounds'].round(0)
data2 = data2.drop(['_funded_year','raised_amount_USD', '_round_code'], axis=1)

In [13]:
data2 = data2.drop_duplicates(subset='name', keep="last")
data2.head()

Unnamed: 0,name,last_raised_amount_MnUSD,total_raised_MnUSD,round_average_MnUSD,rounds
2,Geni,5.0,16.5,5.5,3.0
5,MeetMoi,2.6,5.6,1.9,3.0
12,Twitter,400.0,760.2,108.6,7.0
24,Facebook,1500.0,2325.7,232.6,10.0
28,Plaxo,9.0,28.3,7.1,4.0


In [15]:
def toCSV(data):
    data.to_csv('data_investments.csv')

toCSV(data2)