# Mongo DB

#### Connect to Companies DataBase in MongoDB

In [1]:
from pymongo import MongoClient

dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"

client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database()

## Filters to find the ideal location - Nearby choices

### 1. Design companies

In [2]:
query = {"category_code":"design"}
companies_design = list(db.companies.find(query,{"offices":1, "name":1}))

In [3]:
import pandas as pd

df = pd.DataFrame(companies_design)
df

Unnamed: 0,_id,name,offices
0,52cdef7c4bab8bd675298447,99designs,"[{'description': 'United States (HQ)', 'addres..."
1,52cdef7e4bab8bd67529ba4e,Graticle,"[{'description': 'Office', 'address1': '100 E...."
2,52cdef7e4bab8bd67529bd1a,Kickstarter,"[{'description': '', 'address1': '58 Kent St',..."
3,52cdef7f4bab8bd67529c47a,Moonfruit,"[{'description': 'UK Office', 'address1': '', ..."


In [4]:
offices = df.explode("offices").apply(lambda e: e.offices,axis=1,result_type="expand")
offices

Unnamed: 0,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,United States (HQ),447 Battery St.,3rd Floor,94111.0,San Francisco,CA,USA,37.795531,-122.400598
0,Australia,204 Wellington St.,,3065.0,Collingwood,,AUS,-37.802659,144.986855
0,Europe,Schlesische Str. 29-30,,,Berlin,,DEU,52.49862,13.446903
1,Office,100 E. Third Ave. #373,,98926.0,Ellensburg,WA,USA,45.796841,-122.693144
2,,58 Kent St,,11222.0,Brooklyn,NY,USA,,
3,UK Office,,,,London,,GBR,,


In [5]:
design_offices = pd.concat([df[["name","_id",]], offices], axis=1)
design_offices

Unnamed: 0,name,_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,99designs,52cdef7c4bab8bd675298447,United States (HQ),447 Battery St.,3rd Floor,94111.0,San Francisco,CA,USA,37.795531,-122.400598
0,99designs,52cdef7c4bab8bd675298447,Australia,204 Wellington St.,,3065.0,Collingwood,,AUS,-37.802659,144.986855
0,99designs,52cdef7c4bab8bd675298447,Europe,Schlesische Str. 29-30,,,Berlin,,DEU,52.49862,13.446903
1,Graticle,52cdef7e4bab8bd67529ba4e,Office,100 E. Third Ave. #373,,98926.0,Ellensburg,WA,USA,45.796841,-122.693144
2,Kickstarter,52cdef7e4bab8bd67529bd1a,,58 Kent St,,11222.0,Brooklyn,NY,USA,,
3,Moonfruit,52cdef7f4bab8bd67529c47a,UK Office,,,,London,,GBR,,


In [6]:
clean_offices = design_offices.loc[design_offices["country_code"] == "USA"]
clean_offices

Unnamed: 0,name,_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,99designs,52cdef7c4bab8bd675298447,United States (HQ),447 Battery St.,3rd Floor,94111,San Francisco,CA,USA,37.795531,-122.400598
1,Graticle,52cdef7e4bab8bd67529ba4e,Office,100 E. Third Ave. #373,,98926,Ellensburg,WA,USA,45.796841,-122.693144
2,Kickstarter,52cdef7e4bab8bd67529bd1a,,58 Kent St,,11222,Brooklyn,NY,USA,,


In [7]:
import requests
def geocode(address):
    res = requests.get(f"https://geocode.xyz/{address}", params={"json":1})
    data = res.json()
    print(data)
    return {
        "type":"Point",
        "coordinates":[float(data["longt"]),float(data["latt"])]
    }

In [8]:
geocode("58 Kent St")

{'longt': '0.00000', 'standard': {'addresst': {}, 'prov': 'ST', 'city': {}, 'countryname': 'Sao Tome and Principe', 'postal': {}, 'confidence': '0.90'}, 'matches': None, 'alt': {}, 'error': {'description': '15. Your request did not produce any results.', 'code': '008'}, 'suggestion': {'region': 'ST', 'locate': {}}, 'latt': '0.00000'}


{'type': 'Point', 'coordinates': [0.0, 0.0]}

In [9]:
geocode(11222)

{'standard': {'addresst': '36 Linksmoji g', 'stnumber': '36', 'region': 'MM', 'postal': '11222', 'city': 'Vilnius', 'prov': 'LT', 'countryname': 'Lithuania', 'confidence': '0.25'}, 'longt': '25.43190', 'alt': {'loc': [{'staddress': '1 ဖဆပလအိမ်ယာ', 'stnumber': {}, 'postal': '11222', 'region': {}, 'latt': '16.78718', 'longt': '96.15988', 'city': 'Yangon', 'prov': 'MM', 'countryname': 'Myanmar'}, {'staddress': {}, 'stnumber': {}, 'postal': '11222', 'region': 'MM', 'latt': '59.32864', 'longt': '18.03730', 'city': 'Stockholm', 'prov': 'SE', 'countryname': 'Sweden'}, {'staddress': '162 Nawala Road', 'stnumber': {}, 'postal': '11222', 'region': 'MM', 'latt': '6.88238', 'longt': '79.88899', 'city': 'Nugegoda', 'prov': 'LK', 'countryname': 'Sri Lanka'}, {'staddress': '69 Саве Јовановића', 'stnumber': {}, 'postal': '11222', 'region': 'MM', 'latt': '44.74194', 'longt': '20.48691', 'city': 'Rušanj', 'prov': 'RS', 'countryname': 'Serbia'}, {'staddress': '981 JAVIER DE VIANA', 'stnumber': {}, 'posta

{'type': 'Point', 'coordinates': [25.4319, 54.68773]}

In [10]:
import numpy as np

def transformToGeoPoint(s):
    if np.isnan(s.latitude) or np.isnan(s.longitude):
        return None
    return {
        "type":"Point",
        "coordinates":[s.longitude, s.latitude]
    }
    

clean_offices["geopoint"] = clean_offices.apply(transformToGeoPoint, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [11]:
clean_offices

Unnamed: 0,name,_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,99designs,52cdef7c4bab8bd675298447,United States (HQ),447 Battery St.,3rd Floor,94111,San Francisco,CA,USA,37.795531,-122.400598,"{'type': 'Point', 'coordinates': [-122.4005983..."
1,Graticle,52cdef7e4bab8bd67529ba4e,Office,100 E. Third Ave. #373,,98926,Ellensburg,WA,USA,45.796841,-122.693144,"{'type': 'Point', 'coordinates': [-122.6931439..."
2,Kickstarter,52cdef7e4bab8bd67529bd1a,,58 Kent St,,11222,Brooklyn,NY,USA,,,


### 2. Successful tech startups

In [19]:
query = {
    "$or":[
        {"total_money_raised":{"$regex": "M$"}},
        {"total_money_raised":{"$regex": "B$"}}
    ]
}
successful_companies = list(db.companies.find(
    query,{"offices":1, "name":1}))

In [21]:
len(successful_companies)

4042

In [23]:
print(list(range(5)))

[0, 1, 2, 3, 4]


In [28]:
a=list(range(5))
for i in a:
    print(a[:2])

[0, 1]
[0, 1]
[0, 1]
[0, 1]
[0, 1]
