### Let's create a weight matrix for each company with latitude and longitude in Spain:

In [1]:
import json
import pandas as pd
from pymongo import GEOSPHERE
import src.functions as fun

from pymongo import MongoClient
conn = MongoClient("localhost:27017")
db = conn.get_database("ironhack")

In [3]:
f = open("../data/spain_companies.json",)
data = json.load(f)
f.close()

In [4]:
len(data)

129

In [18]:
data[128]

{'name': 'ITERNOVA',
 'category_code': 'consulting',
 'raised_amount': 0,
 'location': {'type': 'Point', 'coordinates': [40.3401332, -1.1059614]},
 'description': 'ITERNOVA TERUEL',
 'address1': 'Plaza Goya 4 bajos',
 'address2': '',
 'zip_code': '44001',
 'city': 'Teruel',
 'country_code': 'ESP',
 'latitude': 40.3401332,
 'longitude': -1.1059614}

#### Types of companies in Spain in this files:

In [14]:
type_companies = pd.DataFrame(data)

In [15]:
type_companies["category_code"].unique()

TypeError: list indices must be integers or slices, not str

#### We don't have any "design" companies so we will use in this case "games_video" because it's the same type of company that we are creating

#### We had imported the "spain_companies" JSON files as a new collection in our MongoDB database to use it and now we define the geospacial index:

In [21]:
db.spain_companies.create_index([("location", GEOSPHERE)])

'location_2dsphere'

In [46]:
c = db.get_collection("spain_companies")

In [121]:
coor = {"type": "Point", "coordinates": [data[0]["latitude"],data[0]["longitude"]]}

In [122]:
coor

{'type': 'Point', 'coordinates': [40.463667, -3.74922]}

In [123]:
query = {"location": {"$near": {"$geometry": coor, "$minDistance": 0, "$maxDistance": 1000}}}

In [124]:
query_final = c.find(query)

In [125]:
len(list(query_final))

2

In [151]:
otro_df = pd.DataFrame(list(query_final))

In [152]:
data[0]["name"]

'Negonation'

In [249]:
type(data[0])

dict

#### Creating a weight matrix:

In [237]:
new_list = list()

for company in data[0:3]:
    n = company["name"]
    coord = {"type": "Point", "coordinates": [company["latitude"], company["longitude"]]}
    
    # First query for videogames companies near:
    query1 = {"$and": 
             [{"location": 
               {"$near": 
                {"$geometry": coord, 
                "$minDistance": 1, 
                "$maxDistance": 1000
              }}},
               {"category_code": 
                "games_video"},
              {"name": 
               {"$ne": n}}
                     ]}
    final_query1 = c.find(query1)
    n_results1 = len(list(final_query1))
    
    # Second query for companies with more than 1M dollares raised:
    query2 = {"$and": 
             [{"location": 
               {"$near": 
                {"$geometry": coord, 
                "$minDistance": 1, 
                "$maxDistance": 1000
              }}},
               {"raised_amount": 
                {"$gte": 1000000}},
              {"name": 
               {"$ne": n}}
                     ]}
    final_query2 = c.find(query2)
    n_results2 = len(list(final_query2))   
    
    # Now we create the rest weighting columns with Google Maps API:
    
    # Starbucks near:
    response1 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 1000, 
        keyword = "starbucks"
    )

    api_results1 = len(response1["results"])
    
    # Airports near:
    response2 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 1000, 
        type_ = "airport"
    )
    
    api_results2 = len(response2["results"])    
    
    # Basketball stadiums near:
    response3 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 10000, 
        type_ = "stadium",
        keyword = "basketball"
    )
    
    api_results3 = len(response3["results"])   
 
    # Vegan restaurants near:
    response4 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 1000, 
        type_ = "restaurant",
        keyword = "vegan"
    )
    
    api_results4 = len(response4["results"])   
    
    # Schools near:
    response5 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 1000, 
        type_ = "school",
    )
    
    api_results5 = len(response5["results"])   
    
    # Party places near:
    response6 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 1000, 
        type_ = "night_club",
    )
    
    api_results6 = len(response6["results"])   
    
    # dog hairdressers near:
    response7 = fun.get_places(
        company["latitude"], 
        company["longitude"], 
        radius = 1000, 
        type_ = "pet_store",
        keyword = "hairdresser"
    )
    
    api_results7 = len(response7["results"])   
    
    # Let's create en new list
    new_list.append({
        "name": n,
        "latitude": company["latitude"],
        "longitude": company["longitude"],
        "location": coord, 
        "videogames_near": n_results1, 
        "1mcompanies_near": n_results2, 
        "starbucks_near": api_results1,
        "airports_near": api_results2,
        "basket_stad_near": api_results3,
        "vegans_near": api_results4,
        "schools_near": api_results5,
        "nightclubs_near": api_results6,
        "dog_hairdresser_near": api_results7,
    })

In [238]:
final_df = pd.DataFrame(new_list)

In [239]:
final_df

Unnamed: 0,name,latitude,longitude,location,videogames_near,1mcompanies_near,starbucks_near,airports_near,basket_stad_near,vegans_near,schools_near,nigthclubs_near,dog_hairdresser_near
0,Negonation,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0
1,Fon,40.530724,-3.632687,"{'type': 'Point', 'coordinates': [40.530724, -...",0,0,1,2,2,2,20,9,1
2,eyeOS,41.385486,2.170287,"{'type': 'Point', 'coordinates': [41.3854855, ...",2,1,17,0,8,20,20,20,1


In [244]:
columns = ["videogames_near", "1mcompanies_near", "starbucks_near", "airports_near", "basket_stad_near", "vegans_near", "schools_near", "nightclubs_near", "dog_hairdresser_near"]

final_df["conditions"] = final_df[columns].astype(bool).sum(axis=1)

In [241]:
final_df

Unnamed: 0,name,latitude,longitude,location,videogames_near,1mcompanies_near,starbucks_near,airports_near,basket_stad_near,vegans_near,schools_near,nigthclubs_near,dog_hairdresser_near,conditions
0,Negonation,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0,3
1,Fon,40.530724,-3.632687,"{'type': 'Point', 'coordinates': [40.530724, -...",0,0,1,2,2,2,20,9,1,6
2,eyeOS,41.385486,2.170287,"{'type': 'Point', 'coordinates': [41.3854855, ...",2,1,17,0,8,20,20,20,1,7


In [246]:
final_df["final_score"] = (final_df["videogames_near"] * 20) + (final_df["1mcompanies_near"] * 15) + (final_df["starbucks_near"] * 10) + (final_df["airports_near"] * 20) + (final_df["basket_stad_near"] * 5) + (final_df["vegans_near"] * 5) + (final_df["schools_near"] * 25) + (final_df["nightclubs_near"] * 40) + (final_df["dog_hairdresser_near"] * 5)

In [247]:
final_df.sort_values(by=["conditions", "final_score"], ascending = False)

Unnamed: 0,name,latitude,longitude,location,videogames_near,1mcompanies_near,starbucks_near,airports_near,basket_stad_near,vegans_near,schools_near,nigthclubs_near,dog_hairdresser_near,conditions,final_score
2,eyeOS,41.385486,2.170287,"{'type': 'Point', 'coordinates': [41.3854855, ...",2,1,17,0,8,20,20,20,1,8,1670
1,Fon,40.530724,-3.632687,"{'type': 'Point', 'coordinates': [40.530724, -...",0,0,1,2,2,2,20,9,1,7,935
0,Negonation,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0,4,695


In [253]:
def weight_matrix(list_dic, initial_index, last_index):
    """
    ARGS:
    data = list of dicctionaries, json structured.
    initial_index = first index of the list to loop.
    last_index = last index of the list to loop.
    """
    new_list = list()

    for company in list_dic[initial_index: last_index]:
        n = company["name"]
        coord = {"type": "Point", "coordinates": [company["latitude"], company["longitude"]]}

        # First query for videogames companies near:
        query1 = {"$and": 
                 [{"location": 
                   {"$near": 
                    {"$geometry": coord, 
                    "$minDistance": 1, 
                    "$maxDistance": 1000
                  }}},
                   {"category_code": 
                    "games_video"},
                  {"name": 
                   {"$ne": n}}
                         ]}
        final_query1 = c.find(query1)
        n_results1 = len(list(final_query1))

        # Second query for companies with more than 1M dollares raised:
        query2 = {"$and": 
                 [{"location": 
                   {"$near": 
                    {"$geometry": coord, 
                    "$minDistance": 1, 
                    "$maxDistance": 1000
                  }}},
                   {"raised_amount": 
                    {"$gte": 1000000}},
                  {"name": 
                   {"$ne": n}}
                         ]}
        final_query2 = c.find(query2)
        n_results2 = len(list(final_query2))   

        # Now we create the rest weighting columns with Google Maps API:

        # Starbucks near:
        response1 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 1000, 
            keyword = "starbucks"
        )

        api_results1 = len(response1["results"])

        # Airports near:
        response2 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 1000, 
            type_ = "airport"
        )

        api_results2 = len(response2["results"])    

        # Basketball stadiums near:
        response3 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 10000, 
            type_ = "stadium",
            keyword = "basketball"
        )

        api_results3 = len(response3["results"])   

        # Vegan restaurants near:
        response4 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 1000, 
            type_ = "restaurant",
            keyword = "vegan"
        )

        api_results4 = len(response4["results"])   

        # Schools near:
        response5 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 1000, 
            type_ = "school",
        )

        api_results5 = len(response5["results"])   

        # Party places near:
        response6 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 1000, 
            type_ = "night_club",
        )

        api_results6 = len(response6["results"])   

        # dog hairdressers near:
        response7 = fun.get_places(
            company["latitude"], 
            company["longitude"], 
            radius = 1000, 
            type_ = "pet_store",
            keyword = "hairdresser"
        )

        api_results7 = len(response7["results"])   

        # Let's create en new list
        new_list.append({
            "name": n,
            "latitude": company["latitude"],
            "longitude": company["longitude"],
            "location": coord, 
            "videogames_near": n_results1, 
            "1mcompanies_near": n_results2, 
            "starbucks_near": api_results1,
            "airports_near": api_results2,
            "basket_stad_near": api_results3,
            "vegans_near": api_results4,
            "schools_near": api_results5,
            "nightclubs_near": api_results6,
            "dog_hairdresser_near": api_results7,
        })

    return new_list

In [10]:
test2 = fun.weight_matrix(data, 3, 6)

In [8]:
pd_test1 = pd.DataFrame(test)

In [9]:
pd_test1

Unnamed: 0,name,latitude,longitude,location,videogames_near,1mcompanies_near,starbucks_near,airports_near,basket_stad_near,vegans_near,schools_near,nightclubs_near,dog_hairdresser_near
0,Negonation,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0
1,Fon,40.530724,-3.632687,"{'type': 'Point', 'coordinates': [40.530724, -...",0,0,1,2,2,2,20,9,1
2,eyeOS,41.385486,2.170287,"{'type': 'Point', 'coordinates': [41.3854855, ...",2,1,17,0,8,20,20,20,1


In [11]:
pd_test2 = pd.DataFrame(test2)
pd_test2

Unnamed: 0,name,latitude,longitude,location,videogames_near,1mcompanies_near,starbucks_near,airports_near,basket_stad_near,vegans_near,schools_near,nightclubs_near,dog_hairdresser_near
0,]project-open[,41.383182,2.163697,"{'type': 'Point', 'coordinates': [41.383182, 2...",2,1,14,0,8,20,20,20,1
1,Starfish Community Group,41.387917,2.169919,"{'type': 'Point', 'coordinates': [41.387917, 2...",0,4,16,0,8,20,20,20,1
2,Pixoo,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0


In [12]:
frames = [pd_test1, pd_test2]
final_df = pd.concat(frames)

In [13]:
final_df

Unnamed: 0,name,latitude,longitude,location,videogames_near,1mcompanies_near,starbucks_near,airports_near,basket_stad_near,vegans_near,schools_near,nightclubs_near,dog_hairdresser_near
0,Negonation,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0
1,Fon,40.530724,-3.632687,"{'type': 'Point', 'coordinates': [40.530724, -...",0,0,1,2,2,2,20,9,1
2,eyeOS,41.385486,2.170287,"{'type': 'Point', 'coordinates': [41.3854855, ...",2,1,17,0,8,20,20,20,1
0,]project-open[,41.383182,2.163697,"{'type': 'Point', 'coordinates': [41.383182, 2...",2,1,14,0,8,20,20,20,1
1,Starfish Community Group,41.387917,2.169919,"{'type': 'Point', 'coordinates': [41.387917, 2...",0,4,16,0,8,20,20,20,1
2,Pixoo,40.463667,-3.74922,"{'type': 'Point', 'coordinates': [40.463667, -...",0,0,1,0,5,0,20,4,0
