# Choosing a city from CityBikes API

In [16]:
#importing the required libraries.

import plotly.express as px
import pandas as pd
import numpy as np
import seaborn as sns                       # visualisation
import matplotlib.pyplot as plt             # visualisation
%matplotlib inline 
sns.set(color_codes=True)
import requests                            # Might be needed:
#import datetime                            # Might be needed
import json
import time
from pprint import pprint
from pathlib import Path 

# https://location.foursquare.com/developer/reference/place-search #url for the foursquare api requests

In [17]:
save_num_newdf = 1
save_num_df = 1
save_num_ll = 1
save_num_lat = 1
save_num_long = 1
save_num_df_fsq = 1
save_num_df_yelp = 1

In [19]:
# trial_get = requests.get("https://api.citybik.es/v2/networks").json()
# # trial_get to test for function below

In [20]:
#define a function to return a list of dictionaries with information about the requested city
def citybikes_locations(city):
    citybike_networks = requests.get("https://api.citybik.es/v2/networks").json()
    list_of_dict = []
    for citybike_dict in citybike_networks['networks']:
        new_city = citybike_dict['location']['city']
        if new_city.lower() == city.lower():
            list_of_dict.append(citybike_dict)
    return list_of_dict

    # reference: https://medium.com/@ajosegun_/real-time-dashboard-in-python-b8c9a9c4e050
    

In [21]:
#define a function to return a list of dictionaries about the city's stations
def city_stations(city):
    station_dict = citybikes_locations(city)
    if not station_dict:
        print('Error: No info found for {}'.format(city))
        return None
    network_address = station_dict[0]['href']
    url = "https://api.citybik.es/{}".format(network_address)
    return requests.get(url).json()['network']['stations']

    # reference: https://medium.com/@ajosegun_/real-time-dashboard-in-python-b8c9a9c4e050

In [22]:
#define a function that will return a pd.df about the stations for input city
def station_data(city):
    station_info = city_stations(city)
    station_list = []
    for info in station_info:
        a_dict = {
            'Station Name': info['name']
            , 'slots': info['extra']['slots']
            , 'empty_slots': info['empty_slots']
            , 'free_bikes': info['free_bikes']
            , 'status': info['extra']['status']
            , 'bank_card': ', '.join(info['extra']['bank_card']) if info['extra']['bank_card'] else 'No'
            , 'latitude': info['latitude']
            , 'longitude': info['longitude']
            , 'timestamp': info['timestamp']
            , 'Unique ID': info['extra']['uid']
        }
        station_list.append(a_dict)
    return pd.DataFrame(station_list)

In [29]:
jsonFn1_van_network = citybikes_locations("Vancouver")


In [30]:
jsonFn2_van_stations = city_stations("Vancouver")

In [31]:
#Load the data into the DataFrame
df = station_data('Vancouver')

# Save a copy
df.to_csv(f'data/newdf_van_station_data{save_num_newdf}.csv', index=False)
save_num_newdf += 1

In [32]:
#lets check out the size of our new df!
df.shape

(241, 10)

In [33]:
# To display the top 5 rows and headers
df.head(5)

Unnamed: 0,Station Name,slots,empty_slots,free_bikes,status,bank_card,latitude,longitude,timestamp,Unique ID
0,0001 10th & Cambie,35,13,22,online,No,49.262487,-123.114397,2022-11-05T23:37:09.287000Z,1
1,0004 Yaletown-Roundhouse Station,16,8,8,online,No,49.274566,-123.121817,2022-11-05T23:37:09.297000Z,4
2,0005 Dunsmuir & Beatty,26,5,21,online,No,49.279764,-123.110154,2022-11-05T23:37:09.300000Z,5
3,0007 12th & Yukon (City Hall),16,10,6,online,No,49.260599,-123.113504,2022-11-05T23:37:09.301000Z,7
4,0008 8th & Ash,16,7,9,online,No,49.264215,-123.117772,2022-11-05T23:37:09.312000Z,8


In [34]:
# To display the last 5 rows
df.tail(5)

Unnamed: 0,Station Name,slots,empty_slots,free_bikes,status,bank_card,latitude,longitude,timestamp,Unique ID
236,0516 Heather & 29th,22,20,1,online,No,49.245535,-123.120496,2022-11-05T23:37:09.544000Z,516
237,0041 Cardero & Robson,26,7,19,online,No,49.289255,-123.132677,2022-11-05T23:37:09.360000Z,41
238,0352 Commercial & 20th,18,15,3,online,No,49.252656,-123.067965,2022-11-05T23:37:09.521000Z,352
239,0196 Hornby & Drake,24,20,4,online,No,49.277527,-123.129464,2022-11-05T23:37:09.452000Z,196
240,210 Burrard & 14th,18,10,8,online,No,49.259469,-123.145718,2022-11-05T23:37:09.555000Z,210


In [35]:
# Checking the types of data
df.dtypes

Station Name     object
slots             int64
empty_slots       int64
free_bikes        int64
status           object
bank_card        object
latitude        float64
longitude       float64
timestamp        object
Unique ID        object
dtype: object

In [36]:
#or also/alternatively:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241 entries, 0 to 240
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Station Name  241 non-null    object 
 1   slots         241 non-null    int64  
 2   empty_slots   241 non-null    int64  
 3   free_bikes    241 non-null    int64  
 4   status        241 non-null    object 
 5   bank_card     241 non-null    object 
 6   latitude      241 non-null    float64
 7   longitude     241 non-null    float64
 8   timestamp     241 non-null    object 
 9   Unique ID     241 non-null    object 
dtypes: float64(2), int64(3), object(5)
memory usage: 19.0+ KB


----
#

In [37]:
#save a back up just incase we come to regret some of our changes:
df_backup = df.copy()
#confirm the save:
df_backup.head(5)

Unnamed: 0,Station Name,slots,empty_slots,free_bikes,status,bank_card,latitude,longitude,timestamp,Unique ID
0,0001 10th & Cambie,35,13,22,online,No,49.262487,-123.114397,2022-11-05T23:37:09.287000Z,1
1,0004 Yaletown-Roundhouse Station,16,8,8,online,No,49.274566,-123.121817,2022-11-05T23:37:09.297000Z,4
2,0005 Dunsmuir & Beatty,26,5,21,online,No,49.279764,-123.110154,2022-11-05T23:37:09.300000Z,5
3,0007 12th & Yukon (City Hall),16,10,6,online,No,49.260599,-123.113504,2022-11-05T23:37:09.301000Z,7
4,0008 8th & Ash,16,7,9,online,No,49.264215,-123.117772,2022-11-05T23:37:09.312000Z,8


In [38]:
#dropping irrelevant columns
# df = df.drop(['col_a', 'col_b', 'col_c_etc.'], axis=1)
#recheck headings:
# df.head(5)

#I dont know if I'm ready to do this yet, I'll check back again later:

In [39]:
#renaming the columns if needed
df = df.rename(columns = {'Station Name':'name', 'empty_slots': 'empty', 'free_bikes': 'available', 'slots':'slots', 'Unique ID': 'unique_id'})
df.head(5)

Unnamed: 0,name,slots,empty,available,status,bank_card,latitude,longitude,timestamp,unique_id
0,0001 10th & Cambie,35,13,22,online,No,49.262487,-123.114397,2022-11-05T23:37:09.287000Z,1
1,0004 Yaletown-Roundhouse Station,16,8,8,online,No,49.274566,-123.121817,2022-11-05T23:37:09.297000Z,4
2,0005 Dunsmuir & Beatty,26,5,21,online,No,49.279764,-123.110154,2022-11-05T23:37:09.300000Z,5
3,0007 12th & Yukon (City Hall),16,10,6,online,No,49.260599,-123.113504,2022-11-05T23:37:09.301000Z,7
4,0008 8th & Ash,16,7,9,online,No,49.264215,-123.117772,2022-11-05T23:37:09.312000Z,8


In [40]:
#checking for the presence and location of duplicate rows
duplicate_rows_df = df[df.duplicated()]
#and compare the difference in the DF shape:
print(df.shape)
print("number of duplicate rows: ", duplicate_rows_df.shape)

(241, 10)
number of duplicate rows:  (0, 10)


In [41]:
# # we don't appear to have any duplicate rows
# #I'll comment this code out for now just incase it's needed later

# #first a count of the total rows to compare to #of duplicate rows
# df.count()
# #then we drop, and recheck:
# df = df.drop_duplicates()
# df.head(5)

In [42]:
#
df.count()

name         241
slots        241
empty        241
available    241
status       241
bank_card    241
latitude     241
longitude    241
timestamp    241
unique_id    241
dtype: int64

In [43]:
#drop any missing or null values: pretty sure we dont have any, but just in case:
print(df.isnull().sum())

name         0
slots        0
empty        0
available    0
status       0
bank_card    0
latitude     0
longitude    0
timestamp    0
unique_id    0
dtype: int64


In [44]:
# #again, we dont have any, but the code will placehold for now:

# #we'll drop the full rows that contain null values:
# df = dfdropna()
# df.count()

In [45]:
# #validating the dropped missing values
# print(df.isnull().sum())

In [46]:
#detecting outliers on numerical values:
# recheck data types to find the columns to check
df.dtypes

name          object
slots          int64
empty          int64
available      int64
status        object
bank_card     object
latitude     float64
longitude    float64
timestamp     object
unique_id     object
dtype: object

In [47]:
#slot values are appropriate as integers
#status and bank cards are strings - I guess obj is same/okay?
#lat and long are floats, as they should be
#time stamp is an object, we could probably convert that to a date time item
# and id is an obj, which is good.  

#just the time then:


In [48]:
# Save a copy of our df with it's new changes:
df.to_csv(f'data/newdf_van_station_data{save_num_df}.csv', index=False)
save_num_df += 1

----
# Part 2

In [49]:
df.head(5)

Unnamed: 0,name,slots,empty,available,status,bank_card,latitude,longitude,timestamp,unique_id
0,0001 10th & Cambie,35,13,22,online,No,49.262487,-123.114397,2022-11-05T23:37:09.287000Z,1
1,0004 Yaletown-Roundhouse Station,16,8,8,online,No,49.274566,-123.121817,2022-11-05T23:37:09.297000Z,4
2,0005 Dunsmuir & Beatty,26,5,21,online,No,49.279764,-123.110154,2022-11-05T23:37:09.300000Z,5
3,0007 12th & Yukon (City Hall),16,10,6,online,No,49.260599,-123.113504,2022-11-05T23:37:09.301000Z,7
4,0008 8th & Ash,16,7,9,online,No,49.264215,-123.117772,2022-11-05T23:37:09.312000Z,8


## API for data collection at the location points of each station


foursquare api:
https://location.foursquare.com/developer/reference/place-search

In [50]:
#define a function to return a list of dictionaries with information about the requested city


import os


def station_loci(ll_value):
    ''' a function that takes string lat-long as '0.00,-0.00' as it's only argument, 
    and returns a DataFrame object with the following parameters
    '''
    api_key = os.environ["FOURSQUARE_API_KEY"]
    fsq_data_url = (f'https://api.foursquare.com/v3/places/search?ll={ll_value}&radius=1000&fields=rating')
    headers = {
        "accept": "application/json",             #Incorporate headers and API key
        "Authorization": api_key
    }
    fsq_loci_results = requests.get(fsq_data_url, headers=headers).json()           #Perform get request

    list_of_dict = []
    for poi in fsq_loci_results['results']:
        loci_poi_dict = {
              'lat-long': f'{ll_value}'
            , 'cat_id': poi['categories'][0]['id']
            , 'category_name': poi['categories'][0]['name']
            , 'name': poi['name']
            , 'distance': poi['distance']
            , 'latitude': poi['geocodes']['main']['latitude']
            , 'longitude': poi['geocodes']['main']['longitude']
            , 'formatted_address': poi['location']['formatted_address']
            # , 'link': poi['link']
            # , 'address': poi['location']['address']
            # , 'country': poi['location']['country']
            # , 'cross_street': poi['location']['cross_street']
            # , 'locality': poi['location']['locality']
            # , 'postcode': poi['location']['postcode']
            # , 'region': poi['location']['region']
            # , 'timezone': poi['timezone']
        }
        list_of_dict.append(loci_poi_dict)
    return pd.DataFrame(list_of_dict)

In [None]:
# ', '.join(info['extra']['bank_card']) if info['extra']['bank_card'] else 'No'

In [51]:
test_ll = station_loci('49.262487,-123.114397')
test_ll

Unnamed: 0,lat-long,cat_id,category_name,name,distance,latitude,longitude,formatted_address
0,"49.262487,-123.114397",11009,Automotive Service,Canadian Tire Auto Service Centre,291,49.26511,-123.114437,"2290 Cambie St (at W 7th Ave), Vancouver BC V5..."
1,"49.262487,-123.114397",13306,Taco Restaurant,La Taqueria Pinche Taco Shop,164,49.263601,-123.112665,"2450 Yukon St (at W Broadway), Vancouver BC V5..."
2,"49.262487,-123.114397",13199,Indian Restaurant,Vij's,553,49.257634,-123.114751,"3106 Cambie St (at W 15th Ave), Vancouver BC V..."
3,"49.262487,-123.114397",13035,Coffee Shop,Milano Coffee,430,49.263703,-123.108618,"156 8th Ave W (btwn Columbia & Manitoba St), V..."
4,"49.262487,-123.114397",13003,Bar,33 Acres Brewing Co,674,49.263877,-123.105307,"15 8th Ave W (btwn Manitoba & Ontario), Vancou..."
5,"49.262487,-123.114397",13035,Coffee Shop,Elysian Coffee,724,49.264587,-123.105043,"2301 Ontario St (at 7th Ave), Vancouver BC V5T..."
6,"49.262487,-123.114397",13064,Pizzeria,Corduroy Pie Co,782,49.256883,-123.121094,"758 16th Ave W (Willow), Vancouver BC V5Z 1S7"
7,"49.262487,-123.114397",13026,BBQ Joint,Gyu-Kaku BBQ,813,49.263191,-123.125282,"950 Broadway W (at Laurel St), Vancouver BC V5..."
8,"49.262487,-123.114397",13059,Juice Bar,The Juice Truck,779,49.266348,-123.105436,"28 5th Ave W (btwn Manitoba & Ontario), Vancou..."
9,"49.262487,-123.114397",16032,Park,Charleson Park,891,49.267058,-123.124558,"999 Charleson St, Vancouver BC"


In [52]:
test_ll.shape

(10, 8)

In [53]:
#make a df copy for the lat and long values in string format as needed for the fsq api query
ll_df = df.copy()
ll_df.head(5)

Unnamed: 0,name,slots,empty,available,status,bank_card,latitude,longitude,timestamp,unique_id
0,0001 10th & Cambie,35,13,22,online,No,49.262487,-123.114397,2022-11-05T23:37:09.287000Z,1
1,0004 Yaletown-Roundhouse Station,16,8,8,online,No,49.274566,-123.121817,2022-11-05T23:37:09.297000Z,4
2,0005 Dunsmuir & Beatty,26,5,21,online,No,49.279764,-123.110154,2022-11-05T23:37:09.300000Z,5
3,0007 12th & Yukon (City Hall),16,10,6,online,No,49.260599,-123.113504,2022-11-05T23:37:09.301000Z,7
4,0008 8th & Ash,16,7,9,online,No,49.264215,-123.117772,2022-11-05T23:37:09.312000Z,8


In [54]:
# create a new column with the coordinate data we need, reformated to str dtype.

#grab just the two columns we need lat and lon
ll_df.dropna(subset=['latitude', 'longitude'], inplace=True)

# creation of a new variable with lon and lat together, label the column header 'll'
ll_df['ll'] = ll_df['latitude'].astype(str) + ',' + ll_df['longitude'].astype(str)
ll_df = ll_df[ll_df['ll'] != '0.0,0.0']
print(ll_df.shape)
print(ll_df.dtypes)
ll_df.head()

# reference #from Data wrangling exercise

(241, 11)
name          object
slots          int64
empty          int64
available      int64
status        object
bank_card     object
latitude     float64
longitude    float64
timestamp     object
unique_id     object
ll            object
dtype: object


Unnamed: 0,name,slots,empty,available,status,bank_card,latitude,longitude,timestamp,unique_id,ll
0,0001 10th & Cambie,35,13,22,online,No,49.262487,-123.114397,2022-11-05T23:37:09.287000Z,1,"49.262487,-123.114397"
1,0004 Yaletown-Roundhouse Station,16,8,8,online,No,49.274566,-123.121817,2022-11-05T23:37:09.297000Z,4,"49.274566,-123.121817"
2,0005 Dunsmuir & Beatty,26,5,21,online,No,49.279764,-123.110154,2022-11-05T23:37:09.300000Z,5,"49.279764,-123.110154"
3,0007 12th & Yukon (City Hall),16,10,6,online,No,49.260599,-123.113504,2022-11-05T23:37:09.301000Z,7,"49.260599,-123.113504"
4,0008 8th & Ash,16,7,9,online,No,49.264215,-123.117772,2022-11-05T23:37:09.312000Z,8,"49.264215,-123.117772"


In [55]:
# create a series of ll:
ll = ll_df['ll']
ll.head(5)

0      49.262487,-123.114397
1      49.274566,-123.121817
2      49.279764,-123.110154
3      49.260599,-123.113504
4      49.264215,-123.117772
               ...          
236    49.245535,-123.120496
237    49.289255,-123.132677
238    49.252656,-123.067965
239    49.277527,-123.129464
240    49.259469,-123.145718
Name: ll, Length: 241, dtype: object

In [56]:
# populate an empty list with the pd.df objects from each ll location:
df_list = []
for loci in ll:
    next = station_loci(loci)
    df_list.append(next)

In [57]:
# refresh df objects to avoid duplication
df_fsq = pd.DataFrame()
x = pd.DataFrame()

# generate full df object with all locations through concatenation 
for df_loci in df_list:
    x = pd.concat([df_fsq, df_loci])
    df_fsq = x

In [58]:
#confirm success:
df_fsq.shape

(2410, 8)

In [59]:
#save the new df:
df_fsq.to_csv(f'data/df_fsq_all_loci{save_num_df_fsq}.csv', index=False)
save_num_df_fsq += 1

In [60]:
df_fsq.head(20)

Unnamed: 0,lat-long,cat_id,category_name,name,distance,latitude,longitude,formatted_address
0,"49.262487,-123.114397",11009,Automotive Service,Canadian Tire Auto Service Centre,291,49.26511,-123.114437,"2290 Cambie St (at W 7th Ave), Vancouver BC V5..."
1,"49.262487,-123.114397",13306,Taco Restaurant,La Taqueria Pinche Taco Shop,164,49.263601,-123.112665,"2450 Yukon St (at W Broadway), Vancouver BC V5..."
2,"49.262487,-123.114397",13199,Indian Restaurant,Vij's,553,49.257634,-123.114751,"3106 Cambie St (at W 15th Ave), Vancouver BC V..."
3,"49.262487,-123.114397",13035,Coffee Shop,Milano Coffee,430,49.263703,-123.108618,"156 8th Ave W (btwn Columbia & Manitoba St), V..."
4,"49.262487,-123.114397",13003,Bar,33 Acres Brewing Co,674,49.263877,-123.105307,"15 8th Ave W (btwn Manitoba & Ontario), Vancou..."
5,"49.262487,-123.114397",13035,Coffee Shop,Elysian Coffee,724,49.264587,-123.105043,"2301 Ontario St (at 7th Ave), Vancouver BC V5T..."
6,"49.262487,-123.114397",13064,Pizzeria,Corduroy Pie Co,782,49.256883,-123.121094,"758 16th Ave W (Willow), Vancouver BC V5Z 1S7"
7,"49.262487,-123.114397",13026,BBQ Joint,Gyu-Kaku BBQ,813,49.263191,-123.125282,"950 Broadway W (at Laurel St), Vancouver BC V5..."
8,"49.262487,-123.114397",13059,Juice Bar,The Juice Truck,779,49.266348,-123.105436,"28 5th Ave W (btwn Manitoba & Ontario), Vancou..."
9,"49.262487,-123.114397",16032,Park,Charleson Park,891,49.267058,-123.124558,"999 Charleson St, Vancouver BC"


----
> **Personal NOTE:** I think marty was helping me while I was in the bathroom... but I dont know what these are for.   saving them as a note in this markdown cell for now lol
```py
def make_get_request(api_key, url):
    headers = {
        "accept": "application/json",
        "Authorization": api_key
    }

    return requests.get(url, headers=headers).json()

def make_fsq_request(url):
    api_key = os.environ["FOURSQUARE_API_KEY"]
    base_url = (f'https://api.foursquare.com/v3/places')
    return make_get_request(api_key, base_url)

def make_yelp_request(ll_value):
    api_key = os.environ["FOURSQUARE_API_KEY"]
    base_url = (f'https://api.foursquare.com/v3/places/search?ll={ll_value}')
    return make_get_request(api_key, base_url)
```
----

#### Yelp
Yelp api = https://api.yelp.com/v3/businesses/search?latitude=49.262487&longitude=-123.114397&radius=1000


In [61]:
#define a function to return a list of dictionaries with information about the requested city

import requests
import os


def station_loci(lat, long):
    ''' a funtion that takes string lat and long as '0.00', and '-0.00' as it's arguments, 
    and returns a DataFrame object with the following parameters
    '''
    api_key = os.environ["YELP_API_KEY"]
    yelp_data_url = (f'https://api.yelp.com/v3/businesses/search?latitude={lat}&longitude={long}&radius=1000')
    payload={}
    headers = {
        'accept': 'application/json',             #Incorporate headers and API key
        'Authorization': f'Bearer {api_key}'
    }
    yelp_loci_results = requests.get(yelp_data_url, headers=headers, data=payload).json()           #Perform get request

    list_of_dict = []
    for poi in yelp_loci_results["businesses"]:
        loci_poi_dict = {
              'lat-long': f'{lat}, {long}'
            # , 'cat_id': poi['categories'][0]['id']
            , 'category_name': poi['categories'][0]['title'] #', '.join(poi['categories'][1]['title']) if poi['categories'][1]['title'] else 'No'
            , 'categories': poi['categories']
            , 'name': poi['name']
            , 'distance': poi['distance']
            , 'latitude': poi['coordinates']['latitude']
            , 'longitude': poi['coordinates']['longitude']
            , 'address': poi['location']['display_address']
            , 'is_closed': poi['is_closed']
            # , 'display_phone': poi['location']['display_phone']
            , 'review_count': poi['review_count']
            , 'rating': poi['rating']
            # , 'address1': poi['location']['address1']
            # , 'city': poi['location']['city']
            # , 'zip_code': poi['location']['zip_code']
            # , 'country': poi['location']['country']
            # , 'state': poi['location']['state']
        }
        list_of_dict.append(loci_poi_dict)
    return pd.DataFrame(list_of_dict)

In [62]:
# a test run of the function
yelp_test_ll = station_loci('49.262487','-123.114397')
yelp_test_ll

Unnamed: 0,lat-long,category_name,name,distance,latitude,longitude,address,is_closed,review_count,rating
0,"49.262487, -123.114397",Mexican,La Taqueria Pinche Taco Shop,169.517456,49.263559,-123.112736,"[2450 Yukon Street, Vancouver, BC V5Z 3V6, Can...",False,667,4.0
1,"49.262487, -123.114397",Chinese,Peaceful Restaurant,146.418009,49.263147,-123.116143,"[110-532 W Broadway, Vancouver, BC V5Z 1E9, Ca...",False,610,3.5
2,"49.262487, -123.114397",Cafes,Marulilu Cafe,102.329817,49.26338,-123.11415,"[451 W Broadway, Vancouver, BC V5Y 1R4, Canada]",False,281,4.0
3,"49.262487, -123.114397",Noodles,Hokkaido Ramen Santouka,191.044234,49.263127,-123.116892,"[558 W Broadway, Vancouver, BC V5Z 1E9, Canada]",False,218,4.0
4,"49.262487, -123.114397",Japanese,Saku,176.44461,49.263125,-123.116603,"[548 W Broadway, Vancouver, BC V5Z 1E9, Canada]",False,182,4.0
5,"49.262487, -123.114397",Indian,Vij's Restaurant,547.596751,49.25754,-123.11479,"[3106 Cambie Street, Vancouver, BC V5Z 2W2, Ca...",False,366,4.0
6,"49.262487, -123.114397",Coffee & Tea,Yolks,170.324037,49.263114,-123.116353,"[546 W Broadway, Vancouver, BC V5Z 1E9, Canada]",False,336,3.5
7,"49.262487, -123.114397",Desserts,Passion8 Dessert Cafe,452.638601,49.258435,-123.114998,"[3010 Cambie Street, Vancouver, BC V5Z 2V9, Ca...",False,201,4.0
8,"49.262487, -123.114397",Indian,Indian Roti Kitchen,391.283162,49.25901,-123.11538,"[2961 Cambie Street, Vancouver, BC V5Z 2V7, Ca...",False,153,4.5
9,"49.262487, -123.114397",Japanese,Uma Sushi,152.157897,49.263805,-123.113729,"[450 W 8th Avenue, Vancouver, BC V5Y 1N9, Canada]",False,109,4.5


In [63]:
#to run location queries, we'll need the lat and log values for each station to be accessable:

# create a series of latitude:
lat = df['latitude']
print(lat.head(5))
# create a series of longitude:
long = df['longitude']
print(long.head(5))
type(long)

# Save the new series
lat.to_csv(f'data/lat_series{save_num_lat}.csv', index=False)
long.to_csv(f'data/long_series{save_num_long}.csv', index=False)
save_num_lat += 1
save_num_long += 1

0    49.262487
1    49.274566
2    49.279764
3    49.260599
4    49.264215
Name: latitude, dtype: float64
0   -123.114397
1   -123.121817
2   -123.110154
3   -123.113504
4   -123.117772
Name: longitude, dtype: float64


In [64]:
# create a while loop to run the series lat and long through the yelp api function
# This will generate a list of 241 df; one for each of the citybike.s stations

num_rows = df.shape[0]
row_counter = 0
yelp_df_list = []
while row_counter < num_rows:
    next = station_loci(lat[row_counter], long[row_counter])
    yelp_df_list.append(next)
    row_counter += 1


In [65]:
# refresh df objects to avoid duplication
df_yelp = pd.DataFrame()
x = pd.DataFrame()

# generate full df object with all locations through concatenation 
for df_loci in yelp_df_list:
    x = pd.concat([df_yelp, df_loci])
    df_yelp = x

In [66]:
#confirm success:
df_yelp.shape

(4788, 10)

In [67]:
# save new df
df_yelp.to_csv(f'data/newdf_yelp_all_loci{save_num_df_yelp}.csv', index=False)
save_num_df_yelp += 1

In [68]:
df_yelp.head(50)

Unnamed: 0,lat-long,category_name,name,distance,latitude,longitude,address,is_closed,review_count,rating
0,"49.262487, -123.114397",Mexican,La Taqueria Pinche Taco Shop,169.517456,49.263559,-123.112736,"[2450 Yukon Street, Vancouver, BC V5Z 3V6, Can...",False,667,4.0
1,"49.262487, -123.114397",Chinese,Peaceful Restaurant,146.418009,49.263147,-123.116143,"[110-532 W Broadway, Vancouver, BC V5Z 1E9, Ca...",False,610,3.5
2,"49.262487, -123.114397",Cafes,Marulilu Cafe,102.329817,49.26338,-123.11415,"[451 W Broadway, Vancouver, BC V5Y 1R4, Canada]",False,281,4.0
3,"49.262487, -123.114397",Noodles,Hokkaido Ramen Santouka,191.044234,49.263127,-123.116892,"[558 W Broadway, Vancouver, BC V5Z 1E9, Canada]",False,218,4.0
4,"49.262487, -123.114397",Japanese,Saku,176.44461,49.263125,-123.116603,"[548 W Broadway, Vancouver, BC V5Z 1E9, Canada]",False,182,4.0
5,"49.262487, -123.114397",Indian,Vij's Restaurant,547.596751,49.25754,-123.11479,"[3106 Cambie Street, Vancouver, BC V5Z 2W2, Ca...",False,366,4.0
6,"49.262487, -123.114397",Coffee & Tea,Yolks,170.324037,49.263114,-123.116353,"[546 W Broadway, Vancouver, BC V5Z 1E9, Canada]",False,336,3.5
7,"49.262487, -123.114397",Desserts,Passion8 Dessert Cafe,452.638601,49.258435,-123.114998,"[3010 Cambie Street, Vancouver, BC V5Z 2V9, Ca...",False,201,4.0
8,"49.262487, -123.114397",Indian,Indian Roti Kitchen,391.283162,49.25901,-123.11538,"[2961 Cambie Street, Vancouver, BC V5Z 2V7, Ca...",False,153,4.5
9,"49.262487, -123.114397",Japanese,Uma Sushi,152.157897,49.263805,-123.113729,"[450 W 8th Avenue, Vancouver, BC V5Y 1N9, Canada]",False,109,4.5


In [71]:
df_yelp.query('category_name == "restaurant"')

Unnamed: 0,lat-long,category_name,name,distance,latitude,longitude,address,is_closed,review_count,rating


In [None]:
#save data to file
df.to_csv(f'data/station_data_sample{save_num_df}.csv', index=False)
save_num_df += 1
df_fsq.to_csv(f'data/foursq_df_all_stations{save_num_df_fsq}.csv', index=False)
save_num_df_fsq += 1
ll.to_csv(f'data/lat_long_series{save_num_ll}.csv', index=False)
save_num_ll += 1
