In [95]:
import pandas as pd
from functions import connectCollection, searchNear, searchNearWithoutLimit

df_filtered = pd.read_csv("../input/clean_df_companies.csv")
print("clean_df_companies.csv successfully imported")
# Import airports around
db, airports = connectCollection('companies', 'airports')
print("airports collection successfully loaded")

df_filtered["Closest_Airport"] = df_filtered.apply(lambda x: searchNear(
    x.longitude, x.latitude, airports, 40000), axis=1)
df_filtered = df_filtered[df_filtered['Closest_Airport'].astype(bool)]

# Import old companies around
db, oldcos = connectCollection('companies', 'oldCos')
print("oldCos collection successfully loaded")

df_filtered["Closest_oldCo"] = df_filtered.apply(lambda x: searchNear(
    x.longitude, x.latitude, oldcos, 300), axis=1)
df_filtered = df_filtered[df_filtered['Closest_oldCo'].astype(
    bool) == False]

# Import Starbucks around
db, starbucks = connectCollection('companies', 'starbucks')
print("starbucks collection successfully loaded")

df_filtered["Closest_Starbucks"] = df_filtered.apply(lambda x: searchNear(
    x.longitude, x.latitude, starbucks, 1000), axis=1)
df_filtered = df_filtered[df_filtered['Closest_Starbucks'].astype(
    bool)]

# Import Tech Companies with >$1m raised around
db, techCos = connectCollection('companies', 'techCos')
print("techCos collection successfully loaded")

df_filtered["Closest_techCo"] = df_filtered.apply(lambda x: searchNearWithoutLimit(
    x.longitude, x.latitude, techCos, 400), axis=1)
df_filtered = df_filtered[df_filtered['Closest_techCo'].astype(
    bool)]
df_filtered.reset_index(inplace=True, drop=True)
df_filtered.to_csv("../input/df_filtered")
print("df_filtered.csv successfully exported")

# Checking which cities have more techCos around
df_filtered["Number_of_TechCo_around"] = df_filtered.Closest_techCo.apply(
    lambda x: len(x))
df_check_techCos = df_filtered[["city", "Number_of_TechCo_around"]]
df_check_techCos = df_check_techCos.groupby("city").sum().sort_values(
    by="Number_of_TechCo_around", ascending=False)

# Checking apartment rent prices
df_apartment = pd.read_csv("../input/apartment-rent-summary.csv")
df_apartment[df_apartment["Location"].str.contains(
    "Atlanta|Chicago|Denver|Austin|San Mateo") == True].sort_values(by=["Price_3br"], ascending=False)
'''
Austin will be chosen, as it has many tech companies around, good rental prices and outranked Silicon Valley as the top city for startups
http://austin.culturemap.com/news/innovation/07-03-19-austin-ranking-best-cities-startups-commercialcafe/
'''
df_austin = df_filtered[df_filtered.city == "Austin"]
df_austin.reset_index(inplace=True, drop=True)
df_austin.to_csv("../input/df_austin.csv")


clean_df_companies.csv successfully imported
airports collection successfully loaded
oldCos collection successfully loaded
starbucks collection successfully loaded
techCos collection successfully loaded
df_filtered.csv successfully exported


In [97]:
df_austin_final = df_austin.copy()

In [84]:
df_austin_trial.Closest_Airport[0][0]["geoJSON"]["coordinates"]

[{'_id': ObjectId('5dd675f33b0eaf5b22d77d95'),
  'name': "Austin-Bergstrom Int'l",
  'geoJSON': {'type': 'Point',
   'coordinates': [-97.6668367646, 30.2021081921]}}]

In [78]:
df_austin_trial.Closest_Starbucks[0][0]["geoJSON"]["coordinates"]

[-97.8251037598, 30.2924041748]

In [98]:
df_austin_final["Airport_Latitude"] = df_austin_final["Closest_Airport"].apply(lambda x: x[0]["geoJSON"]["coordinates"][1])
df_austin_final["Airport_Longitude"] = df_austin_final["Closest_Airport"].apply(lambda x: x[0]["geoJSON"]["coordinates"][0])
df_austin_final["Starbucks_Latitude"] = df_austin_final["Closest_Starbucks"].apply(lambda x: x[0]["geoJSON"]["coordinates"][1])
df_austin_final["Starbucks_Longitude"] = df_austin_final["Closest_Starbucks"].apply(lambda x: x[0]["geoJSON"]["coordinates"][0])
df_austin_final["TechCo_Latitude"] = df_austin_final["Closest_techCo"].apply(lambda x: x[0]["geoJSON"]["coordinates"][1])
df_austin_final["TechCo_Longitude"] = df_austin_final["Closest_techCo"].apply(lambda x: x[0]["geoJSON"]["coordinates"][0])


In [99]:
df_austin_final

Unnamed: 0.1,Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,...,Tech/Other,Closest_Airport,Closest_oldCo,Closest_Starbucks,Closest_techCo,Number_of_TechCo_around,Airport_Latitude,Airport_Longitude,Starbucks_Latitude,Starbucks_Longitude
0,890,52cdef7c4bab8bd67529809a,Shangby,shangby,http://www.crunchbase.com/company/shangby,http://www.shangby.com,http://www.shangby.com/site/news_letter,,,ecommerce,...,Tech,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf44024, 'city': 'Au...","[{'_id': '52cdef7c4bab8bd67529809a', 'Unnamed:...",2,30.202108,-97.666837,30.292404,-97.825104
1,1490,52cdef7c4bab8bd6752982f7,CyberRentals,cyberrentals,http://www.crunchbase.com/company/cyberrentals,http://www.cyberrentals.com,,,,web,...,Tech,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf42d9b, 'city': 'Au...","[{'_id': '52cdef7c4bab8bd6752982f7', 'Unnamed:...",2,30.202108,-97.666837,30.237675,-97.792549
2,1492,52cdef7c4bab8bd6752982f9,TripHomes,triphomes,http://www.crunchbase.com/company/triphomes,http://www.HomeAway.com,,,,,...,Other,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf42d9b, 'city': 'Au...","[{'_id': '52cdef7c4bab8bd6752982f7', 'Unnamed:...",2,30.202108,-97.666837,30.237675,-97.792549
3,1688,52cdef7c4bab8bd6752983ba,MessageOne,messageone,http://www.crunchbase.com/company/messageone,,,,,,...,Other,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf42990, 'city': 'Au...","[{'_id': '52cdef7c4bab8bd6752983ba', 'Unnamed:...",1,30.202108,-97.666837,30.401922,-97.746902
4,2571,52cdef7c4bab8bd67529873f,Absolute Software,absolute-software,http://www.crunchbase.com/company/absolute-sof...,http://www.absolute.com,http://blog.absolute.com,http://blog.absolute.com/feed/atom/,absolutecorp,software,...,Tech,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf44588, 'city': 'Au...","[{'_id': '52cdef7f4bab8bd67529c39e', 'Unnamed:...",2,30.202108,-97.666837,30.265875,-97.746445
5,10563,52cdef7e4bab8bd67529ad8b,NetStreams,netstreams,http://www.crunchbase.com/company/netstreams,,,,,games_video,...,Tech,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf411a3, 'city': 'Au...","[{'_id': '52cdef7e4bab8bd67529ad8b', 'Unnamed:...",1,30.202108,-97.666837,30.418882,-97.702713
6,12856,52cdef7e4bab8bd67529b8c0,Infochimps,infochimps,http://www.crunchbase.com/company/infochimps,http://infochimps.com,http://blog.infochimps.com/,http://blog.infochimps.com/feed,infochimps,enterprise,...,Other,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf41de4, 'city': 'Au...","[{'_id': '52cdef7e4bab8bd67529b8c0', 'Unnamed:...",1,30.202108,-97.666837,30.269783,-97.753914
7,13694,52cdef7e4bab8bd67529bcf8,AppUseful,appuseful,http://www.crunchbase.com/company/appuseful,http://appuseful.com,http://feeds2.feedburner.com/appuseful,http://appuseful.com/blog,usefulwebapps,web,...,Tech,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf41447, 'city': 'Au...","[{'_id': '52cdef7e4bab8bd67529bcf8', 'Unnamed:...",1,30.202108,-97.666837,30.30249,-97.73851
8,14406,52cdef7f4bab8bd67529c01d,Quintiles,quintiles,http://www.crunchbase.com/company/quintiles,http://www.quintiles.com,,,,biotech,...,Tech,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf4302f, 'city': 'Au...","[{'_id': '52cdef7f4bab8bd67529c01d', 'Unnamed:...",2,30.202108,-97.666837,30.220448,-97.757416
9,15124,52cdef7f4bab8bd67529c39e,Pelorus Technology,pelorus-technology,http://www.crunchbase.com/company/pelorus-tech...,http://pelorustechnology.com,,,,enterprise,...,Other,"[{'_id': 5dd675f33b0eaf5b22d77d95, 'name': 'Au...",[],"[{'_id': 5dd67f43e1d2f64cdaf44e60, 'city': 'Au...","[{'_id': '52cdef7f4bab8bd67529c39e', 'Unnamed:...",8,30.202108,-97.666837,30.264475,-97.743774
