In [2]:
from pymongo import MongoClient
import pandas as pd
import re
import geopandas as gpd

#Import MongoDB collection and create DataFrame

def connectCollection(database, collection):
    client = MongoClient()
    db = client[database]
    coll = db[collection]
    return db, coll

db, coll = connectCollection('companies','companies')

pipeline = [
    { "$unwind": "$offices"},    
 ]

results = list(coll.aggregate(pipeline))
df = pd.DataFrame(results)
print("Collection imported succesfully, DataFrame Generated")
#Drop deadpooled companies

df = df[df.deadpooled_year.isnull()]
df.reset_index(inplace=True, drop= True)

#Unwind Office column
df = pd.concat([df.drop(['offices'], axis=1), df['offices'].apply(pd.Series)], axis=1)

#Re-classifying categories into Tech/Other
tech = ["web", "games_video", "mobile", "social", "photo_video", "network_hosting", "software", "ecommerce", "hardware", "semiconductor", "analytics", "biotech", "cleantech", "nanotech"]
df["Tech/Other"] = df["category_code"].apply(lambda x: "Tech" if x in tech else "Other")

#Clean money raised column

def moneyRaise(value):
    dicc_coin = {'CAD': 0.76,'RUB': 0.016, 'EUR': 1.11, 'GBP': 1.29}
    values_money = {'K':1000, 'M':1000000, 'B': 100000000000}
    value_number = float(re.search('[+-]?([0-9]*[.])?[0-9]+', value)[0])
    if value.endswith('B'):
        exchange = value_number*(values_money['B'])
    elif value.endswith('K[k]'):
        exchange = value_number*(values_money['K'])
    elif value.endswith('M'):
        exchange = value_number*(values_money['M'])
    elif value.startswith("C"):
        exchange =  value_number*(dicc_coin['CAD'])
    elif value.startswith("$"):
        exchange =  value_number
    elif value.startswith("€"):
        exchange = value_number*(dicc_coin['EUR'])
    elif value.startswith("£"):
        exchange = value_number*(dicc_coin['GBP'])
    elif value.startswith("r"):
        exchange = value_number*(dicc_coin['RUB'])
    else:
        exchange = value_number
    return int(exchange)

df.total_money_raised = df.total_money_raised.apply(moneyRaise)

drop_rows = df[((df.latitude.isnull() == True) | (df.longitude.isnull() == True))].index
df.drop(drop_rows, inplace=True)
df.to_csv("./Input/clean_df_companies.csv")
print("Cleaned DataFrame successfully exported to csv")

#Importing airports geoDataFrame

gdf_airports = gpd.read_file("./Input/ne_10m_airports/ne_10m_airports.shp")
print("Airports shp file imported.")

#Function for creating GeoJSON column

def getLocation(gdf):
    long = gdf.x
    lat = gdf.y
    loc = {
        'type':'Point',
        'coordinates':[long, lat]
    }
    return loc

#Convert df in geoDataFrame

def df_to_gdf(dataframe):
    gdf = gpd.GeoDataFrame(dataframe, geometry=gpd.points_from_xy(dataframe.longitude, dataframe.latitude))
    gdf.crs = {'init' :'epsg:4326'}
    gdf.reset_index(drop=True, inplace=True)
    return gdf

gdf_master = df_to_gdf(df)

gdf_airports["geoJSON"] = gdf_airports.geometry.apply(lambda x: getLocation(x))

#Exporting to JSON
df_master = pd.DataFrame(gdf_master)
df_airports = pd.DataFrame(gdf_airports)
df_airports.drop(df_airports.columns[1], axis=1, inplace=True)
# df_airports.to_json("airports.json", orient='records')
# print("JSON file generated.")


Collection imported succesfully, DataFrame Generated
Cleaned DataFrame successfully exported to csv
Airports shp file imported.


OverflowError: Maximum recursion level reached

In [3]:
df_airports

Unnamed: 0,scalerank,type,name,abbrev,location,gps_code,iata_code,wikipedia,natlscale,comments,...,name_pt,name_ru,name_sv,name_tr,name_vi,name_zh,wdid_score,ne_id,geometry,geoJSON
0,9,small,Sahnewal,LUH,terminal,VILD,LUH,http://en.wikipedia.org/wiki/Sahnewal_Airport,8.0,,...,,,Ludhiana Airport,,,,4,1159113785,POINT (75.95707 30.85036),"{'type': 'Point', 'coordinates': [75.957072240..."
1,9,mid,Solapur,SSE,terminal,VASL,SSE,http://en.wikipedia.org/wiki/Solapur_Airport,8.0,,...,,,,,,,4,1159113803,POINT (75.93306 17.62542),"{'type': 'Point', 'coordinates': [75.933059771..."
2,9,mid,Birsa Munda,IXR,terminal,VERC,IXR,http://en.wikipedia.org/wiki/Birsa_Munda_Airport,8.0,,...,,,M. O. Ranchi,,Sân bay Birsa Munda,蘭契,4,1159113831,POINT (85.32360 23.31772),"{'type': 'Point', 'coordinates': [85.323597036..."
3,9,mid,Ahwaz,AWZ,terminal,OIAW,AWZ,http://en.wikipedia.org/wiki/Ahwaz_Airport,8.0,,...,,,Ahwaz International Airport,Ahvaz Havalimanı,Sân bay Ahvaz,阿瓦士,4,1159113845,POINT (48.74711 31.34316),"{'type': 'Point', 'coordinates': [48.747106543..."
4,9,mid and military,Gwalior,GWL,terminal,VIGR,GWL,http://en.wikipedia.org/wiki/Gwalior_Airport,8.0,,...,,,Gwalior Airport,,Sân bay Gwalior,瓜廖爾,4,1159113863,POINT (78.21722 26.28549),"{'type': 'Point', 'coordinates': [78.217218654..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,2,major,Madrid Barajas,MAD,terminal,LEMD,MAD,http://en.wikipedia.org/wiki/Madrid_Barajas_In...,150.0,,...,Aeroporto de Adolfo Suárez Madrid-Barajas,Барахас,Madrid-Barajas flygplats,Madrid Barajas Uluslararası Havalimanı,Sân bay quốc tế Madrid Barajas,马德里－巴拉哈斯机场,4,1159127851,POINT (-3.56903 40.46813),"{'type': 'Point', 'coordinates': [-3.569026654..."
887,2,major,Luis Muñoz Marin,SJU,terminal,TJSJ,SJU,http://en.wikipedia.org/wiki/Luis_Mu%C3%B1oz_M...,150.0,,...,Aeroporto Internacional Luis Muñoz Marin,Каролина,Luis Muñoz Marín International Airport,,Sân bay quốc tế Luis Muñoz Marín,路易斯·穆尼奥斯·马林国际机场,4,1159127861,POINT (-66.00423 18.43808),"{'type': 'Point', 'coordinates': [-66.00422997..."
888,2,major,Arlanda,ARN,terminal,ESSA,ARN,http://en.wikipedia.org/wiki/Stockholm-Arlanda...,150.0,,...,Aeroporto de Arlanda,Стокгольм-Арланда,Stockholm Arlanda Airport,Stockholm-Arlanda Havalimanı,Sân bay Stockholm-Arlanda,斯德哥爾摩－阿蘭達機場,4,1159127877,POINT (17.93073 59.65112),"{'type': 'Point', 'coordinates': [17.930729901..."
889,2,major,Soekarno-Hatta Int'l,CGK,parking,WIII,CGK,http://en.wikipedia.org/wiki/Soekarno-Hatta_In...,150.0,,...,Aeroporto Internacional Soekarno-Hatta,Сукарно-Хатта,Soekarno-Hatta International Airport,Soekarno-Hatta Uluslararası Havalimanı,Sân bay quốc tế Soekarno-Hatta,蘇加諾－哈達國際機場,4,1159127891,POINT (106.65430 -6.12660),"{'type': 'Point', 'coordinates': [106.65429615..."
