In [71]:
import pandas as pd
import json
import requests
import os

In [72]:

api_key_4sq = os.environ['$FOURSQUARE']

### Reading data from city bikes from csv


In [73]:
df_city_bikes_paris = pd.read_csv('df_city_bikes_paris.csv', index_col=[0])

In [74]:
df_city_bikes_paris.head()

Unnamed: 0,latitude,longitude,Lat-Long,name,free_bikes,empty_slots,extra.uid
0,48.865983,2.275725,"48.865983,2.275725",Benjamin Godard - Victor Hugo,5,30,16107
1,48.853756,2.339096,"48.85375581057431,2.3390958085656166",André Mazet - Saint-André des Arts,1,53,6015
2,48.867872,2.364898,"48.867872484749,2.3648982158072",Faubourg Du Temple - Republique,6,27,11037
3,48.856452,2.334852,"48.856451985395786,2.334851883351803",Beaux-Arts - Bonaparte,4,16,6021
4,48.879296,2.33736,"48.87929591733507,2.3373600840568547",Toudouze - Clauzel,3,17,9020


In [75]:
df_city_bikes_paris.shape

(1447, 7)

#### There are 1447 bike stations in Paris

## Extracting a sample dataframe from City bikes to check for API Calls

In [76]:
df_city_bikes_paris_sample = df_city_bikes_paris.iloc[0:3,0:3]

In [77]:
df_city_bikes_paris_sample

Unnamed: 0,latitude,longitude,Lat-Long
0,48.865983,2.275725,"48.865983,2.275725"
1,48.853756,2.339096,"48.85375581057431,2.3390958085656166"
2,48.867872,2.364898,"48.867872484749,2.3648982158072"


In [12]:
df_city_bikes_paris_sample.shape

(3, 3)

## Extrating data from Foursquare for above bike stations for a sample to verify that API calls are working

In [13]:
# Setting up url and headers values

url_4sq = "https://api.foursquare.com/v3/places/search"
headers = { "Accept": "application/json", 'Authorization' : api_key_4sq}

# Creating list to append data from all lat/long

results =[]

# looping throgh the city bikes data frame for each lat - long

for index,rows in df_city_bikes_paris_sample.iterrows():    
    params_dict = { "query": "restaurant","ll": rows["Lat-Long"],"radius": "1000"} 
    response = requests.get(url_4sq, params=params_dict, headers=headers)
    data = json.loads(response.text)
    for item in data['results']:
    # Extract the data for each item and append it to the results list
        result = {
            'fsq_id': item['fsq_id'],
            'name': item['name'],
            'distance': item['distance'],
            'latitude': item['geocodes']['main']['latitude'],
            'longitude': item['geocodes']['main']['longitude'],
            'categories': ', '.join([c['name'] for c in item['categories']]),
            'city_bikes_ll' : rows["Lat-Long"]
    }
        results.append(result)

# Converting to Dataframe    
    
city_bikes_foursquare_sample_df = pd.DataFrame(results)

In [16]:
city_bikes_foursquare_sample_df.head()

Unnamed: 0,fsq_id,name,distance,latitude,longitude,categories,city_bikes_ll
0,4bade735f964a52042713be3,Café Lamartine,31,48.86594,2.276189,"Bar, Café, Brasserie","48.865983,2.275725"
1,5324204f498e195bc3e21511,You Decide,223,48.866904,2.278407,"Café, Coffee Shop, Restaurant","48.865983,2.275725"
2,4ec4f3cd2c5b6307e69711f3,Le Pain Quotidien Victor Hugo,220,48.866964,2.278529,Restaurant,"48.865983,2.275725"
3,4be54720bcef2d7f369d03e5,Hansan,158,48.865092,2.274127,"Asian Restaurant, French Restaurant, Vietnames...","48.865983,2.275725"
4,57caabf0cd107656b7a22851,Le Zinc du 16,246,48.868135,2.275194,"Café, Restaurant","48.865983,2.275725"


### Extrating data from Foursquare for above bike stations for all Bike station for Restaurants

In [32]:
# Setting up url and headers values
url_4sq = "https://api.foursquare.com/v3/places/search"
headers = { "Accept": "application/json", 'Authorization' : api_key_4sq}

In [17]:
# Creating list to append data from all lat/long

results =[]

# looping throgh the city bikes data frame for each lat - long

for index,rows in df_city_bikes_paris.iterrows():    
    params_dict = { "query": "restaurant","ll": rows["Lat-Long"],"radius": "1000"}  # Sending request for category restaurant and radius 1000m
    response = requests.get(url_4sq, params=params_dict, headers=headers)
    data = json.loads(response.text)
    for item in data['results']:
    # Extract the data for each item and append it to the results list
        result = {
            'fsq_id': item['fsq_id'],
            'name': item['name'],
            'distance': item['distance'],
            'latitude': item['geocodes']['main']['latitude'],
            'longitude': item['geocodes']['main']['longitude'],
            'categories': ', '.join([c['name'] for c in item['categories']]),
            'city_bikes_ll' : rows["Lat-Long"]
    }
        results.append(result)

# Converting to Dataframe    

city_bikes_foursquare_df_rest = pd.DataFrame(results)

In [18]:
city_bikes_foursquare_df_rest.shape

(14377, 7)

In [19]:
city_bikes_foursquare_df_rest.nunique()

fsq_id           6012
name             5419
distance          935
latitude         5746
longitude        5806
categories        977
city_bikes_ll    1447
dtype: int64

### Saving the raw data to csv file 

In [20]:
city_bikes_foursquare_df_rest.to_csv("city_bikes_foursquare_df_restaurent.csv", index = True)

### Extrating data from Foursquare for above bike stations for all Bike station for Bars

In [24]:
# Creating list to append data from all lat/long

results =[]

# looping throgh the city bikes data frame for each lat - long

for index,rows in df_city_bikes_paris.iterrows():    
    params_dict = { "query": "bar","ll": rows["Lat-Long"],"radius": "1000"} # Sending request for category bar and radius 1000m
    response = requests.get(url_4sq, params=params_dict, headers=headers)
    data = json.loads(response.text)
    for item in data['results']:
    # Extract the data for each item and append it to the results list
        result = {
            'fsq_id': item['fsq_id'],
            'name': item['name'],
            'distance': item['distance'],
            'latitude': item['geocodes']['main']['latitude'],
            'longitude': item['geocodes']['main']['longitude'],
            'categories': ', '.join([c['name'] for c in item['categories']]),
            'city_bikes_ll' : rows["Lat-Long"]
    }
        results.append(result)
        
# Converting to Dataframe    
  
city_bikes_foursquare_df_bar = pd.DataFrame(results)

In [25]:
city_bikes_foursquare_df_bar.shape

(14221, 7)

### Saving the raw data to csv file 

In [26]:
city_bikes_foursquare_df_bar.to_csv("city_bikes_foursquare_df_bar.csv", index = True)

### Combining the Dataframes from Restaurant and Bars

In [27]:
city_bikes_foursquare_df_raw = pd.concat([city_bikes_foursquare_df_bar, city_bikes_foursquare_df_rest])

In [28]:
city_bikes_foursquare_df_raw.head()

Unnamed: 0,fsq_id,name,distance,latitude,longitude,categories,city_bikes_ll
0,502e6bb9e4b0eed9c30f0e43,Bar de l'Hôtel Raphaël,662,48.862172,2.282686,Hotel Bar,"48.865983,2.275725"
1,4c549caf72cf0f472cc4d3d4,Au barbar,278,48.863712,2.27768,Bar,"48.865983,2.275725"
2,4b77f147f964a52097af2ee3,Saint-James Club,582,48.870457,2.279956,Hotel Bar,"48.865983,2.275725"
3,5ff3069fb907b733a9252835,Joe & The Juice,687,48.869255,2.283876,Dining and Drinking,"48.865983,2.275725"
4,535fdf29498ecab0a45b1d4e,Le Metropolitan Lounge Bar,530,48.865556,2.282755,Hotel Bar,"48.865983,2.275725"


In [29]:
city_bikes_foursquare_df_raw.shape

(28598, 7)

### Saving the raw data to csv file 

In [31]:
city_bikes_foursquare_df_raw.to_csv("city_bikes_foursquare_df_raw.csv", index = True)

### Loading from csv raw data for foursquare api calls

In [50]:
city_bikes_foursquare_df = pd.read_csv('city_bikes_foursquare_df_raw.csv')

### Check no of unique restaurants

In [33]:

city_bikes_foursquare_df['name'].nunique()

6869

### Checking for duplicates


In [34]:
duplicate_count = city_bikes_foursquare_df[city_bikes_foursquare_df['name'].duplicated() == True]

In [35]:
duplicate_count.shape

(21729, 8)

### Removing duplicates

In [36]:
city_bikes_foursquare_df = city_bikes_foursquare_df.drop_duplicates(subset=['name'],keep='first')

In [37]:
city_bikes_foursquare_df.shape

(6869, 8)

### There are 6869 restaurants and bars

In [38]:
city_bikes_foursquare_df.head()

Unnamed: 0.1,Unnamed: 0,fsq_id,name,distance,latitude,longitude,categories,city_bikes_ll
0,0,502e6bb9e4b0eed9c30f0e43,Bar de l'Hôtel Raphaël,662,48.862172,2.282686,Hotel Bar,"48.865983,2.275725"
1,1,4c549caf72cf0f472cc4d3d4,Au barbar,278,48.863712,2.27768,Bar,"48.865983,2.275725"
2,2,4b77f147f964a52097af2ee3,Saint-James Club,582,48.870457,2.279956,Hotel Bar,"48.865983,2.275725"
3,3,5ff3069fb907b733a9252835,Joe & The Juice,687,48.869255,2.283876,Dining and Drinking,"48.865983,2.275725"
4,4,535fdf29498ecab0a45b1d4e,Le Metropolitan Lounge Bar,530,48.865556,2.282755,Hotel Bar,"48.865983,2.275725"


### Saving the data to csv file

In [39]:
city_bikes_foursquare_df.to_csv("city_bikes_foursquare_df.csv", index = True)

city_bikes_foursquare_df = pd.read_csv("city_bikes_foursquare_df.csv", index_col=[0])

# Extrating data from Yelp for above bike stations for a sample to verify that API calls are working

In [78]:
import os
api_key_yelp = os.environ['$YELP']


In [79]:
url_yelp = 'https://api.yelp.com/v3/businesses/search'

In [80]:
# Looping the sample data frame to get data for multiple lat long and saving to a list

headers = { 'Authorization' : 'bearer %s' % api_key_yelp}

results = []
for index,rows in df_city_bikes_paris.iterrows():    
    params_dict = { 'latitude' : rows['latitude'],'longitude' :rows['longitude'],'radius' : '1000'} # Sending request for lat-long and radius 1000m
    response = requests.get(url_yelp, params=params_dict, headers=headers)
    data = json.loads(response.text)
    for biz in data['businesses']:
            result = {
                'name' : biz['name'],
                'rating' : biz['rating'],
                'review_count' : biz['review_count'],
                'latitude' : biz['coordinates']['latitude'],
                'longitude' : biz['coordinates']['longitude'],
                'categories' :  ', '.join([c['title'] for c in biz['categories']]),
                'location' : biz['location']['address1'],
                'city_bikes_ll' : rows["Lat-Long"]
            }
            results.append(result)

# Create a pandas DataFrame from the results list
df_yelp_sample = pd.DataFrame(results)
df_yelp_sample.head()


Unnamed: 0,name,rating,review_count,latitude,longitude,categories,location,city_bikes_ll
0,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil,"48.865983,2.275725"
1,Le Poincaré,4.0,142,48.865057,2.286521,"French, Brasseries, Wine Bars",22 avenue Raymond Poincaré,"48.865983,2.275725"
2,L'Astrance,4.5,62,48.85844,2.28734,French,4 rue Beethoven,"48.865983,2.275725"
3,Crêperie Framboise Passy-Trocadéro,4.5,64,48.858829,2.28504,"Creperies, Gluten-Free",10 bd Delessert,"48.865983,2.275725"
4,Girafe,3.5,93,48.862584,2.288581,"French, Seafood",1 place du Trocadéro,"48.865983,2.275725"


In [81]:
df_yelp_sample.shape

(28573, 8)

### Extrating data from Yelp for all  Bike station

In [12]:
# Looping the full data frame to get data for multiple lat long and saving to a list

headers = { 'Authorization' : 'bearer %s' % api_key_yelp}

results = []
for index,rows in df_city_bikes_paris.iterrows():    
    params_dict = { 'latitude' : rows['latitude'],'longitude' :rows['longitude'],'radius' : '1000'} # Sending request for lat-long and radius 1000m
    response = requests.get(url_yelp, params=params_dict, headers=headers)
    data = json.loads(response.text)
    for biz in data['businesses']:
            result = {
                'name' : biz['name'],
                'rating' : biz['rating'],
                'review_count' : biz['review_count'],
                'latitude' : biz['coordinates']['latitude'],
                'longitude' : biz['coordinates']['longitude'],
                'categories' :  ', '.join([c['title'] for c in biz['categories']]),,
                'location' : biz['location']['address1'],
                'city_bikes_ll' : rows["Lat-Long"]
            }
            results.append(result)

# Create a pandas DataFrame from the results list
yelp_paris_df = pd.DataFrame(results)
yelp_paris_df.head()

In [33]:
yelp_paris_df.head()

Unnamed: 0,name,rating,review_count,latitude,longitude,categories,location
0,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil
1,Le Poincaré,4.0,142,48.865057,2.286521,"French, Brasseries, Wine Bars",22 avenue Raymond Poincaré
2,L'Astrance,4.5,62,48.85844,2.28734,French,4 rue Beethoven
3,Crêperie Framboise Passy-Trocadéro,4.5,64,48.858829,2.28504,"Creperies, Gluten-Free",10 bd Delessert
4,Girafe,3.5,93,48.862584,2.288581,"French, Seafood",1 place du Trocadéro


### Saving the raw data to csv file 

In [82]:
yelp_paris_df.to_csv("city_bikes_yelp_df_raw.csv")

In [83]:
yelp_paris_df = pd.read_csv("city_bikes_yelp_df_raw.csv", index_col=[0])

In [84]:
yelp_paris_df.head()

Unnamed: 0,name,rating,review_count,latitude,longitude,categories,location,city_bikes_ll
0,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil,"48.865983,2.275725"
1,Le Poincaré,4.0,142,48.865057,2.286521,"French, Brasseries, Wine Bars",22 avenue Raymond Poincaré,"48.865983,2.275725"
2,L'Astrance,4.5,62,48.85844,2.28734,French,4 rue Beethoven,"48.865983,2.275725"
3,Crêperie Framboise Passy-Trocadéro,4.5,64,48.858829,2.28504,"Creperies, Gluten-Free",10 bd Delessert,"48.865983,2.275725"
4,Girafe,3.5,93,48.862584,2.288581,"French, Seafood",1 place du Trocadéro,"48.865983,2.275725"


### Checking for duplicates

In [85]:
duplicate_count = yelp_paris_df[yelp_paris_df['name'].duplicated() == True]

In [86]:
duplicate_count.shape

(24190, 8)

### Removing duplicates

In [87]:
yelp_paris_df = yelp_paris_df.drop_duplicates(subset=['name'],keep='first')

In [88]:
yelp_paris_df.shape

(4383, 8)

In [89]:
yelp_paris_df.head()

Unnamed: 0,name,rating,review_count,latitude,longitude,categories,location,city_bikes_ll
0,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil,"48.865983,2.275725"
1,Le Poincaré,4.0,142,48.865057,2.286521,"French, Brasseries, Wine Bars",22 avenue Raymond Poincaré,"48.865983,2.275725"
2,L'Astrance,4.5,62,48.85844,2.28734,French,4 rue Beethoven,"48.865983,2.275725"
3,Crêperie Framboise Passy-Trocadéro,4.5,64,48.858829,2.28504,"Creperies, Gluten-Free",10 bd Delessert,"48.865983,2.275725"
4,Girafe,3.5,93,48.862584,2.288581,"French, Seafood",1 place du Trocadéro,"48.865983,2.275725"


In [90]:
yelp_paris_df.to_csv("city_bikes_yelp_df.csv", index = True)

In [73]:
#yelp_paris_df=pd.read_csv('city_bikes_yelp_df.csv')

## Top 10 POIs as per their ratings

In [91]:
yelp_paris_df.sort_values('rating',ascending=False).head(10)

Unnamed: 0,name,rating,review_count,latitude,longitude,categories,location,city_bikes_ll
28525,La Merveille,5.0,1,48.822559,2.27575,Bakeries,4 rue de l'Abbé Grégoire,"48.822036188382,2.2754641655975"
11181,Tomy and Co,5.0,16,48.86056,2.30944,French,22 rue Surcouf,"48.861273016278126,2.309453550914185"
11389,786,5.0,1,48.81513,2.50571,"Brasseries, Burgers",34 rue Jean Jaurès,"48.813810628405,2.5096759200096"
3869,El Pincho,5.0,1,48.85767,2.434244,"Spanish, Cocktail Bars, Tapas Bars",43 rue de Paris,"48.857421708072074,2.437578141689301"
3894,L'Amitié,5.0,1,48.789676,2.40182,French,14 rue Camille Groult,"48.790114,2.4025777"
3895,Pizza Express,5.0,1,48.78772,2.4025,Pizza,77 ave Ernest Havet,"48.790114,2.4025777"
3897,Boraq,5.0,1,48.79342,2.39583,"Japanese, Thai",118 avenue Paul Vaillant Couturier,"48.790114,2.4025777"
11235,La Escondida,5.0,3,48.833843,2.308681,Peruvian,49 rue de Vouillé,"48.834097274832295,2.3087677359581"
11204,La Forêt Noire,5.0,2,48.890234,2.35466,African,9 rue Marcadet,"48.89874803977525,2.3645754531025887"
11169,Khajuraho,5.0,22,48.861542,2.310126,Indian,14 bd de la Tour Maubourg,"48.861273016278126,2.309453550914185"


## Merging Foursquare and Yelp data on name 

In [105]:
df_yelp_foursq_merged = pd.merge(yelp_paris_df,city_bikes_foursquare_df,on='name',how='inner')

In [107]:
df_yelp_foursq_merged

Unnamed: 0.1,name,rating,review_count,latitude_x,longitude_x,categories_x,location,city_bikes_ll_x,Unnamed: 0,fsq_id,distance,latitude_y,longitude_y,categories_y,city_bikes_ll_y
0,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil,"48.865983,2.275725",8193,4e975f0f30f8ca78ea3dda0b,206,48.868191,2.284522,"Burger Joint, French Restaurant, New American ...","48.86795222470859,2.28146318346262"
1,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil,"48.865983,2.275725",9410,4e975f0f30f8ca78ea3dda0b,250,48.868191,2.284522,"Burger Joint, French Restaurant, New American ...","48.870306124369975,2.285074541420228"
2,La Coïncidence,4.5,509,48.868105,2.284365,French,15 rue Mesnil,"48.865983,2.275725",9480,4e975f0f30f8ca78ea3dda0b,377,48.868191,2.284522,"Burger Joint, French Restaurant, New American ...","48.870809428669546,2.281149007691853"
3,Le Poincaré,4.0,142,48.865057,2.286521,"French, Brasseries, Wine Bars",22 avenue Raymond Poincaré,"48.865983,2.275725",7035,4c9524b003413704540379ef,189,48.865170,2.286653,"Pizzeria, Fast Food Restaurant, Brasserie","48.863354122371,2.2864209115505"
4,Sgabetti,4.5,39,48.865831,2.283666,Italian,15 rue Sablons,"48.865983,2.275725",2404,4bf82e724a67c92860f224cf,247,48.865756,2.283651,"Brasserie, Italian Restaurant","48.863875,2.28189"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8217,Le Café Chineur,3.0,14,48.831940,2.314010,"French, Bars",170 rue Alésia,"48.832200060759,2.3139070151575",11219,4bb19f36f964a520659c3ce3,561,48.831944,2.314111,Brasserie,"48.83287705011431,2.306762784719467"
8218,Le Café Chineur,3.0,14,48.831940,2.314010,"French, Bars",170 rue Alésia,"48.832200060759,2.3139070151575",11371,4bb19f36f964a520659c3ce3,353,48.831944,2.314111,Brasserie,"48.83088717455243,2.318882101207347"
8219,Le Café Chineur,3.0,14,48.831940,2.314010,"French, Bars",170 rue Alésia,"48.832200060759,2.3139070151575",12246,4bb19f36f964a520659c3ce3,385,48.831944,2.314111,Brasserie,"48.82953336292279,2.318179272115231"
8220,Le Café Chineur,3.0,14,48.831940,2.314010,"French, Bars",170 rue Alésia,"48.832200060759,2.3139070151575",12562,4bb19f36f964a520659c3ce3,224,48.831944,2.314111,Brasserie,"48.83059564231877,2.3119183257222176"


In [108]:
df_yelp_foursq_merged.shape

(8222, 15)

In [109]:
df_yelp_foursq_merged = df_yelp_foursq_merged.drop_duplicates(subset=['name'],keep='first')

In [110]:
df_yelp_foursq_merged.shape

(1675, 15)

In [111]:
df_yelp_foursq_merged = df_yelp_foursq_merged.rename(columns={'Lat-Long' : 'city_bikes_ll'})

In [112]:
df_yelp_foursq_merged.to_csv('df_yelp_foursq_merged.csv')

# Comparing Results


### Which API provided you with more complete data? Provide an explanation.

  The foursquare API did not provide us the rating or review data but the yelp api provide us with this data. So the Yelp API provided us with better data for comparison of the POIs