In [57]:
import numpy as np
import pandas as pd
import os
import requests
import json
import math
import urllib.error
import urllib.parse
import urllib.request
# Import the required library
from geopy.geocoders import Nominatim

pd.set_option('display.max_columns', 500)

In [3]:
def get_url_root(endpoint = "search", *args):
    root = "https://api.foursquare.com/v3/places/"
    if args:
        return root + "/".join(args) + "/" + endpoint
    return root + endpoint

def convert_miles_to_m(mile_distance: int):
    return math.floor(mile_distance * 1609.34)

def get_lat_long(location_keyword: str):
    geolocator = Nominatim(user_agent="Kaja_atlas")
    location = geolocator.geocode(location_keyword)
    return location

def get_radius(transit_mode, alloted_time):
    """
    Determines the distance (in meters) in which the location search will occur
    
    :param str transit_mode: how user is planning on traveling. Possible options include walk, drive, public_transport.
    :param int alloted_time: how much time the user is willing to spend on their trip. Possible options include 1, 2, or 4 hours.
    """
    radius_dict = {"walk": 2, "public_transport": 10, "drive": 25}
    
    time_dict = {1: 0.5, 2: 1, 4: 2}
    
    return convert_miles_to_m(radius_dict[transit_mode] * time_dict[alloted_time])

In [99]:
from typing import Optional
def generate_query_string(location,
                          keyword: str, 
                          sort = None,
                          transit_mode = "walk",
                          alloted_time = 2,
                          min_price = 1,
                          max_price = 4
                         ):
    
    root = get_url_root()
    params_dict = {#"query": keyword,
                    "categories":str(keyword),
                   "min_price": str(min_price),
                   "max_price": str(max_price),
                   "ll": urllib.parse.quote(str(location.latitude) + "," + str(location.longitude)) if location else None,
                   "sort": sort.toUppercase() if sort else None,
                   "radius": str(get_radius(transit_mode = transit_mode, alloted_time = alloted_time)),
                   "limit": str(50)
    }
    
    params_list = []
    for k in params_dict.keys():
        if params_dict[k]:
            params_list.append(k + "=" + params_dict[k])
    return root + "?" + "&".join(params_list)



def get_search_data(lat,lon,category,limit):
    headers = {
    "accept": "application/json",
    "Authorization": "fsq30KK92zlHroWCI4Rve9lTo/oEhYz6473u4fp4Kc99HSQ="
    }

    # url = generate_query_string(**kwargs)
    root = "https://api.foursquare.com/v3/places/search"


    url = root + "?ll={}%2C{}&categories={}&limit={}".format(lat,lon,category,limit)


    response = requests.get(url, headers=headers)

    print(response)
    
    results_json = json.loads(response.text)
    df = pd.json_normalize(results_json, record_path = ["results"])
    df = df[["fsq_id"]]['fsq_id'].values
    return df

In [78]:
get_search_data(category=19000, limit = 50)

<Response [200]>


array(['55fb0d3c498e057977b916c2', '5673f864498ee472fb279a0e',
       '57c850c9498e6cb2f81623d4', '4bbba8953de8c9b6920c9aad',
       '589736e098f8aa7c14353730', '579e6799498e517851c06c7c',
       '57095f3e498e66a61edce1ed', '59a61602a8eb60101e3dad73',
       '4b84707ef964a5206d3531e3', '55fc873b498e02b966f58ade',
       '51b5bee6498eab8b52f4b3ed', '4bc3af5fdce4eee195b5719d',
       '4a8474c4f964a520a9fc1fe3', '430a6700f964a52036271fe3',
       '4b036b69f964a520484f22e3', '57840b36498eb36d8970bd8b',
       '504a36c7e4b03e711eb2cc25', '5073db8de4b07bf36cd7a5da',
       '4bc635cc51b376b06d2d1b6f', '4b9584fef964a52034a734e3',
       '5fa947f8712a0314da0430ed', '4b19bac2f964a520f4e223e3',
       '504a9f5de4b086a8f9a4feb0', '4b5b47b0f964a52003f128e3',
       '459ecd01f964a520bf401fe3', '4c38ef4993db0f470e912292',
       '4b5a0770f964a520c1a828e3', '44d17cecf964a5202b361fe3',
       '4bba2e60cf2fc9b6553fa102', '4b510525f964a520f53d27e3',
       '5860a7ef7d0f6d0aa200f4e6', '4c628b78ec94a593267

In [68]:
"https://api.foursquare.com/v3/places/search" + "?categories={}&limit={}".format(19000,50)


'https://api.foursquare.com/v3/places/search?categories=19000&limit=50'

In [100]:
def get_details_data(fsq_ids,foi):
    
    temp = list()
    features = ''
    
    root = "https://api.foursquare.com/v3/places"
    
    
    for i in foi: # build the url endpoint with features
        if(i == foi[-1]):
            features = features+i
        else:
            features= features + i + "%2C"
    
    
    for i in fsq_ids: # ping api with each id and its features (50 ids x category (10) = 500 places)
        
        details_root = root+ "/" + i  + "?fields="
        
        url = details_root + features
    

        headers = {
            "accept": "application/json",
            "Authorization": "fsq30KK92zlHroWCI4Rve9lTo/oEhYz6473u4fp4Kc99HSQ="
            }

        response = requests.get(url, headers=headers)

        results_json = json.loads(response.text)
        df = pd.json_normalize(results_json)
        temp.append(df)
    
    details_df = pd.concat(temp)

    return details_df
    

In [8]:
foi = ["fsq_id","name",'categories','distance',
"geocodes",'location','link',
'description','tel','website','social_media','hours_popular','rating',
'stats','price','photos','tips','features'
]

In [12]:
categories = {"10000":"	Arts and Entertainment",
            "11000": "Business and Professional Services",
            "12000": "Community and Government",
            "13000":"Dining and Drinking",
            "14000":"Event",
            "15000":"Health and Medicine",
            "16000":"Landmarks and Outdoors",
            "17000":"Retail",
            "18000":"Sports and Recreation",
            "19000":"Travel and Transportation"
            
            
            }

cat = list(categories.keys())

In [291]:
final_list = list()

for i in cat:
    temp = get_search_data(lat = 47.6062095 , lon = -122.3320708, category=i, limit = 50)
    temp_df = get_details_data(temp,foi)
    temp_df["Category Number"] = i
    temp_df["Category Name"] = categories[i].strip()
    print(len(temp_df))
    final_list.append(temp_df)

all_data = pd.concat(final_list)


<Response [200]>
50
<Response [200]>
50
<Response [200]>
50
<Response [200]>
50
<Response [200]>
30
<Response [200]>
50
<Response [200]>
50
<Response [200]>
50
<Response [200]>
50
<Response [200]>
50


In [102]:
len(all_data)

480

In [103]:
# all_data.columns.to_list()

itinerary_gen = all_data[["fsq_id","name","rating","price","website","geocodes.main.latitude",
"geocodes.main.longitude","location.formatted_address","stats.total_ratings","Category Number","Category Name"]]

In [109]:
itinerary_gen.reset_index(drop=True,inplace=True)

In [238]:
itinerary_gen['Category Number'] = pd.to_numeric(itinerary_gen['Category Number'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  itinerary_gen['Category Number'] = pd.to_numeric(itinerary_gen['Category Number'])


In [119]:
import googlemaps
from datetime import datetime

gmaps = googlemaps.Client(key='AIzaSyCtaAGeiteDZrR1Amay-5G8x1Hubo0b0O0')

# Geocoding an address
geocode_result = gmaps.geocode('Seattle, WA')

# Look up an address with reverse geocoding
reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

# Request directions via public transit
now = datetime.now()
directions_result = gmaps.directions("Sydney Town Hall",
                                     "Parramatta, NSW",
                                     mode="transit",
                                     departure_time=now)

# Validate an address with address validation
addressvalidation_result =  gmaps.addressvalidation(['1600 Amphitheatre Pk'], 
                                                    regionCode='US',
                                                    locality='Mountain View', 
                                                    enableUspsCass=True)

In [121]:
gmaps = googlemaps.Client(key='AIzaSyCtaAGeiteDZrR1Amay-5G8x1Hubo0b0O0')


In [130]:
origins = itinerary_gen[itinerary_gen["Category Number"] == "10000"]["location.formatted_address"].to_list()
destinations = itinerary_gen[itinerary_gen["Category Number"] != "10000"]["location.formatted_address"].to_list()

In [128]:
origins = [
            "Perth, Australia",
            "Sydney, Australia",
            "Melbourne, Australia",
            "Adelaide, Australia",
            "Brisbane, Australia",
            "Darwin, Australia",
            "Hobart, Australia",
            "Canberra, Australia",
        ]
destinations = [
    "Uluru, Australia",
    "Kakadu, Australia",
    "Blue Mountains, Australia",
    "Bungle Bungles, Australia",
    "The Pinnacles, Australia",
]

In [165]:
gmaps.distance_matrix(lat_lon[0:2],lat_lon[3:5],mode="driving")


{'destination_addresses': ['305 Harrison St, Seattle, WA 98109, USA',
  '1010 Valley St, Seattle, WA 98109, USA'],
 'origin_addresses': ['1326 5th Ave, Seattle, WA 98101, USA',
  '475 2nd Ave S, Seattle, WA 98104, USA'],
 'rows': [{'elements': [{'distance': {'text': '2.5 km', 'value': 2479},
     'duration': {'text': '9 mins', 'value': 523},
     'status': 'OK'},
    {'distance': {'text': '2.8 km', 'value': 2766},
     'duration': {'text': '5 mins', 'value': 319},
     'status': 'OK'}]},
  {'elements': [{'distance': {'text': '6.1 km', 'value': 6131},
     'duration': {'text': '13 mins', 'value': 805},
     'status': 'OK'},
    {'distance': {'text': '6.8 km', 'value': 6763},
     'duration': {'text': '12 mins', 'value': 723},
     'status': 'OK'}]}],
 'status': 'OK'}

In [140]:
lat_lon = list(zip(itinerary_gen["geocodes.main.latitude"],itinerary_gen["geocodes.main.longitude"]))

In [166]:
def get_gmaps_distance(row):
    result = gmaps.distance_matrix(row['origin'], row['destination'], mode='driving')
    status = result['rows'][0]['elements'][0]['status']
    if status == "OK":
        KM = result['rows'][0]['elements'][0]['distance']['value'] / 1000
    else:
        KM = 0
    return KM



In [151]:
itinerary_gen[["geocodes.main.latitude","geocodes.main.longitude"]]

Unnamed: 0,geocodes.main.latitude,geocodes.main.longitude
0,47.609270,-122.333897
1,47.597171,-122.330151
2,47.620540,-122.350350
3,47.621546,-122.350902
4,47.626503,-122.335760
...,...,...
475,47.591627,-122.327198
476,47.621064,-122.336744
477,47.619749,-122.320371
478,47.622608,-122.346830


In [159]:
ig_o = itinerary_gen[itinerary_gen["Category Number"] == "10000"]
ig_d = itinerary_gen[itinerary_gen["Category Number"] == "13000"]


In [161]:
o_g = pd.DataFrame({

        "origin": list(zip(ig_o["geocodes.main.latitude"],ig_o["geocodes.main.longitude"])),
        "destination": list(zip(ig_d["geocodes.main.latitude"],ig_d["geocodes.main.longitude"]))

                }
)

In [168]:
o_g["distance"] = o_g.apply(get_gmaps_distance, axis=1)

In [179]:
o_g.sort_values("distance")
# o_g[o_g["origin"] == (47.621546, -122.350902)]

Unnamed: 0,origin,destination,distance
3,"(47.621546, -122.350902)","(47.621546, -122.350902)",0.0
32,"(47.610681, -122.341179)","(47.610067, -122.342491)",0.211
6,"(47.614853, -122.339726)","(47.613297, -122.334427)",0.679
13,"(47.61941, -122.35154)","(47.620741, -122.359699)",0.826
7,"(47.615506, -122.320305)","(47.613201, -122.332186)",1.007
31,"(47.615103, -122.315887)","(47.622318, -122.312816)",1.015
16,"(47.652133, -122.354877)","(47.651094, -122.343068)",1.199
21,"(47.593994, -122.332952)","(47.604116, -122.339507)",1.408
11,"(47.619769, -122.350454)","(47.609906, -122.34232)",1.721
0,"(47.60927, -122.333897)","(47.611291, -122.31657)",2.041


In [175]:
o_g
itinerary_gen["coords"] = list(zip(itinerary_gen["geocodes.main.latitude"],
itinerary_gen["geocodes.main.longitude"]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  itinerary_gen["coords"] = list(zip(itinerary_gen["geocodes.main.latitude"],


In [180]:
itinerary_gen[itinerary_gen['coords'] == (47.597171, -122.330151)	]

Unnamed: 0,fsq_id,name,rating,price,website,geocodes.main.latitude,geocodes.main.longitude,location.formatted_address,stats.total_ratings,Category Number,Category Name,coords
1,43768200f964a5204d2a1fe3,Lumen Field,9.1,3.0,https://www.lumenfield.com/plan-your-visit/dir...,47.597171,-122.330151,"800 Occidental Ave S, Seattle, WA 98134",2494.0,10000,Arts and Entertainment,"(47.597171, -122.330151)"


In [181]:
itinerary_gen[itinerary_gen['coords'] == (47.612704, -122.30582)]

Unnamed: 0,fsq_id,name,rating,price,website,geocodes.main.latitude,geocodes.main.longitude,location.formatted_address,stats.total_ratings,Category Number,Category Name,coords
151,513680e47ab48a1d5b9c0ae1,Chuck's Hop Shop,9.3,2.0,http://www.chuckscd.com,47.612704,-122.30582,"2001 E Union St (at 20th Ave), Seattle, WA 98122",338.0,13000,Dining and Drinking,"(47.612704, -122.30582)"


In [308]:
def get_distance(categories):

    ## get the distance matrix between a set of 2 locations (by coordinates)

    train = itinerary_gen[itinerary_gen['Category Number'].isin(categories)]
    
    start = train[train['Category Number'] == categories[0]].reset_index(drop=True)
    #print(len(start))
    end = train[train['Category Number'] == categories[-1]].reset_index(drop=True)
    #print(len(end))
    o_g = pd.DataFrame({
        
        "origin": start['coords'],
        "destination": end['coords']

                }
            )
    o_g.dropna(inplace=True)
    
    o_g["distance"] = o_g.apply(get_gmaps_distance, axis=1) 
    
    o_g = o_g.sort_values("distance").reset_index(drop=True)  ## 1.8 miles or less

    return o_g

In [309]:
get_distance([12000,14000])


Unnamed: 0,origin,destination,distance
0,"(47.562532, -122.293839)","(47.593308, -122.332743)",6.771
1,"(47.606652, -122.332671)","(47.663911, -122.377044)",8.163
2,"(47.598085, -122.327425)","(47.561214, -122.386473)",9.697
3,"(47.610995, -122.322612)","(47.665667, -122.380365)",10.205
4,"(47.674979, -122.381683)","(47.613179, -122.332105)",10.401
5,"(47.614526, -122.322787)","(47.70246, -122.324671)",11.686
6,"(47.617943, -122.238514)","(47.651453, -122.355945)",14.006
7,"(47.595629, -122.32302)","(47.501338, -122.356148)",14.541
8,"(47.676926, -122.37456)","(47.613773, -122.320431)",15.558
9,"(47.491049, -122.27836)","(47.595401, -122.331618)",16.098


In [279]:
def get_itinerary(categories):
    print(categories)
    test = list()
  

    for i in range(len(categories)-1):
        c = [categories[i],categories[i+1]]
        print(c)
        df = get_distance(c)
        test.append(df)
    
    #itinerary = pd.merge(left=test[0],right =test[1],left_on="destination",right_on = "origin")

    return test
    
ci = [10000,13000,14000,16000]
for i in range(0,len(ci)-1):
    print(ci[i],ci[i+1])
#get_itinerary(ci)

10000 13000
13000 14000
14000 16000


In [264]:
seq = iter(ci)

for i in seq:
    print((i,next(seq)))

(10000, 11000)


StopIteration: 

In [267]:
ci

[10000, 11000, 12000]

In [271]:
seq = [10000,11000,12000,14000]
it = iter(seq)
for x in it:
    print (x, next(it))

10000 11000
12000 14000


In [247]:
test = get_distance(ci)
test

Unnamed: 0,origin,destination,distance
0,"(47.621546, -122.350902)","(47.621546, -122.350902)",0.0
1,"(47.610681, -122.341179)","(47.610067, -122.342491)",0.211
2,"(47.614853, -122.339726)","(47.613297, -122.334427)",0.679
3,"(47.61941, -122.35154)","(47.620741, -122.359699)",0.826
4,"(47.615506, -122.320305)","(47.613201, -122.332186)",1.007
5,"(47.615103, -122.315887)","(47.622318, -122.312816)",1.015
6,"(47.652133, -122.354877)","(47.651094, -122.343068)",1.199
7,"(47.593994, -122.332952)","(47.604116, -122.339507)",1.408
8,"(47.619769, -122.350454)","(47.609906, -122.34232)",1.721
9,"(47.60927, -122.333897)","(47.611291, -122.31657)",2.041
