In [2]:
import requests
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
import pymysql
import time
import sqlalchemy as alch

In [27]:
coordinates = pd.read_csv('Barcelona_coordinates.csv')

In [3]:
load_dotenv()
MY_APY_KEY = os.getenv('API_ANTO')
payload={}
headers = {}

# Scrapping with Google Maps API

### Extract all Restaurants in Barcelona

In [3]:
def norm_req(i, radius, API_KEY, total_req):
    """
    Sends a normalized request to the Google Places API to search for nearby restaurants based on the provided location and radius.

    Args:
        i (str): The location coordinates in the format "latitude%2Clongitude".
        radius (int): The radius (in meters) within which to search for restaurants.
        API_KEY (str): The API key to access the Google Places API.
        total_req (int): The total number of requests made so far.

    Returns:
        requests.Response: The response object containing the result of the API request.
    """
    url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={i}&radius={radius}&type=restaurantes&keyword=restaurant&key={API_KEY}"
    response = requests.request("GET", url, headers=headers, data=payload)
    total_req += 1
    return response

In [4]:
def next_page_req(response, APY_KEY, total_req):
    """
    Sends a request to the Google Places API to retrieve the next page of results based on the provided response object. The previous request
    can return up to 60 results, organized in 3 pages with 20 results each. If that is the case, the previous request will contain the argument
    'next_page_toke'. If not, this argument will not be on the resposne

    Args:
        response (requests.Response): The response object from the previous API request.
        API_KEY (str): The API key to access the Google Places API.
        total_req (int): The total number of requests made so far.

    Returns:
        requests.Response: The response object containing the result of the API request for the next page.
    """
    time.sleep(5)
    next_page = response.json()['next_page_token']
    url = f'https://maps.googleapis.com/maps/api/place/nearbysearch/json?pagetoken={next_page}&key={APY_KEY}'
    response = requests.request("GET", url, headers=headers, data=payload)
    total_req += 1
    return response

In [5]:
def appending_responses(response, business_type, location, name, place_id, raiting, price_level, user_raitings_total, vicinity):
    """
    Extracts specific information from the response object and appends it to the corresponding lists.

    Args:
        response (requests.Response): The response object from the API request.
        business_type (list): The list to store the business status of each result.
        location (list): The list to store the location of each result.
        name (list): The list to store the name of each result.
        place_id (list): The list to store the place ID of each result.
        rating (list): The list to store the rating of each result.
        price_level (list): The list to store the price level of each result.
        user_ratings_total (list): The list to store the total number of user ratings for each result.
        vicinity (list): The list to store the vicinity (address or neighborhood) of each result.

    Returns:
        tuple: A tuple containing the updated lists of business_type, location, name, place_id, rating,
               price_level, user_ratings_total, and vicinity.
    """
    for i in response.json()['results']:
        try:
            business_type.append(i['business_status'])
        except KeyError:
            business_type.append(np.nan)
        try:
            location.append(i['geometry']['location'])
        except KeyError:
            location.append(np.nan)
        try:
            name.append(i['name'])
        except KeyError:
            name.append(np.nan)
        try:
            place_id.append(i['place_id'])
        except KeyError:
            place_id.append(np.nan)
        try:
            raiting.append(i['rating'])
        except KeyError:
            raiting.append(np.nan)
        try:
            price_level.append(i['price_level'])
        except KeyError:
            price_level.append(np.nan)
        try:
            user_raitings_total.append(i['user_ratings_total'])
        except KeyError:
            user_raitings_total.append(np.nan)
        try:
            vicinity.append(i['vicinity'])
        except:
            vicinity.append(np.nan)
    return business_type, location, name, place_id, raiting, price_level, user_raitings_total, vicinity
    

In [38]:
radius = 50
total_req = 0

business_type = []
location = []
name = []
place_id = []
raiting = []
price_level = []
user_raitings_total = []
vicinity = []
total_req = 0

for j in coordinates.columns[:1]:
    for i in coordinates[j][:2]:
        response = norm_req(i, radius, MY_APY_KEY, total_req)
        appending_responses(response)
        try:
            response = next_page_req(response, MY_APY_KEY, total_req)
            appending_responses(response)
            try:
                response = next_page_req(response, MY_APY_KEY, total_req)
                appending_responses(response)
            except:
                pass
        except:
            pass

1st page finished
1st page finished
1st page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
3rd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
1st page finished
2nd page finished
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterarion 2nd page
On the iterari

In [39]:
my_data = {
    'name': name,
    'place_id': place_id,
    'business_status': business_type,
    'location': location,
    'raiting': raiting,
    'price_level': price_level,
    'total_reviews': user_raitings_total,
    'direction': vicinity
}
my_data = pd.DataFrame(my_data)
my_data = my_data.drop_duplicates(subset=['place_id', 'location'])
my_data.to_csv('barc_restaurants.csv')

### Get reviews details

In [87]:
restaurants = pd.read_csv('barc_restaurants.csv')
place_reviews = {'place_id':[], 'reviews': [], 'reviews_rating': [], 'time': []}

count_yes = 0
count_no = 0
n = 0
for i in restaurants['place_id'][4479:]:
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={i}&fields=reviews&language=en&reviews_no_translations=false&key={MY_APY_KEY}"
    response = requests.request("GET", url, headers=headers, data=payload)
    try:
        for j in response.json()['result']['reviews']:
            place_reviews['place_id'].append(i)
            place_reviews['reviews'].append(j['text'])
            place_reviews['reviews_rating'].append(j['rating'])
            place_reviews['time'].append(j['time'])
        count_yes += 1
        print('y', count_yes)
    except KeyError:
        place_reviews['place_id'].append(i)
        place_reviews['reviews'].append('No reviews available')
        place_reviews['reviews_rating'].append(np.nan)
        place_reviews['time'].append(np.nan)
        count_no+=1
        print('n', count_no)
    if (count_no+count_yes)%100 == 0:
        n+=1
        to_write = pd.DataFrame(place_reviews)
        to_write.to_csv(f'restaurants_reviews{n}.csv')
    else:
        pass

place_reviews = pd.DataFrame(place_reviews)
place_reviews.to_csv('restaurants_reviews.csv')
    

y 1
y 2
y 3
y 4
y 5
y 6
y 7
y 8
y 9
y 10
y 11
y 12
y 13
y 14
y 15
y 16
y 17
y 18
y 19
y 20
n 1
y 21
y 22
y 23
y 24
y 25
y 26
y 27
y 28
y 29
y 30
y 31
y 32
y 33
y 34
y 35
n 2
y 36
y 37
y 38
y 39
y 40
y 41
y 42
y 43
y 44
y 45
y 46
y 47
y 48
y 49
y 50
y 51
y 52
y 53
y 54
y 55
n 3
y 56
y 57
y 58
y 59
y 60
y 61
y 62
y 63
y 64
y 65
y 66
y 67
y 68
y 69
y 70
y 71
y 72
y 73
y 74
y 75
y 76
n 4
y 77
y 78
y 79
y 80
y 81
y 82
y 83
y 84
y 85
y 86
y 87
y 88
y 89
y 90
y 91
y 92
y 93
y 94
y 95
y 96
n 5
y 97
y 98
y 99
y 100
y 101
y 102
y 103
y 104
y 105
y 106
y 107
y 108
y 109
y 110
y 111
y 112
y 113
y 114
y 115
y 116
y 117
y 118
y 119
y 120
y 121
y 122
y 123
y 124
y 125
y 126
y 127
y 128
y 129
y 130
y 131
y 132
y 133
y 134
y 135
y 136
y 137
y 138
y 139
y 140
y 141
y 142
y 143
y 144
y 145
y 146
y 147
y 148
y 149
y 150
y 151
y 152
y 153
y 154
y 155
y 156
y 157
y 158
n 6
n 7
y 159
y 160
n 8
y 161
y 162
y 163
y 164
y 165
y 166
y 167
y 168
y 169
y 170
y 171
y 172
y 173
y 174
y 175
y 176
y 177
y 178
y 179
y 

### Get extra details

In [6]:
restaurants = pd.read_csv('barc_restaurants.csv')
restaurants['place_id'][:2]

0    ChIJUwcqio-YpBIRranOacUHR2o
1    ChIJW-TUp4-YpBIRLlEDpClK2tI
Name: place_id, dtype: object

In [10]:
restaurants[restaurants['place_id'] == 'ChIJdZNfDY-YpBIRJPNiF-Q3QaE']

Unnamed: 0.1,Unnamed: 0,name,place_id,business_status,location,raiting,price_level,total_reviews,direction
16,27,Can Santi,ChIJdZNfDY-YpBIRJPNiF-Q3QaE,OPERATIONAL,"{'lat': 41.3745833, 'lng': 2.1339175}",4.3,,21,"C/ de Sant Jordi, 15, Barcelona"


In [12]:
url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id=ChIJdZNfDY-YpBIRJPNiF-Q3QaE&fields=wheelchair_accessible_entrance%2Cdine_in%2Creservable%2Cserves_vegetarian_food%2Ctakeout%2Cserves_beer%2Cserves_wine%2Copening_hours&key={MY_APY_KEY}"
response1 = requests.request("GET", url, headers=headers, data=payload)

In [27]:
days = {
    '0': 'sun_hours'
    ,'1': 'mon_hours'
    ,'2': 'tue_hours'
    ,'3': 'wed_hours'
    ,'4': 'thu_hours'
    ,'5': 'fri_hours'
    ,'6': 'sat_hours'

}

restaurants = pd.read_csv('barc_restaurants.csv')
place_details = {'place_id':[], 'dine_in': [], 'reservable': [], 'serves_beer': [], 'serves_wine' :[], 'vegeterian': [], 'takeout': [], 'wheel_chair_acc': [], 
                 'mon_hours': [], 'tue_hours': [], 'wed_hours': [], 'thu_hours': [], 'fri_hours': [], 'sat_hours': [], 'sun_hours': []}
counter_pos = 0
counter_neg = 0
n = 2
for i in restaurants['place_id'][1001:]:
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={i}&fields=wheelchair_accessible_entrance%2Cdine_in%2Creservable%2Cserves_vegetarian_food%2Ctakeout%2Cserves_beer%2Cserves_wine%2Copening_hours&key={MY_APY_KEY}"
    response1 = requests.request("GET", url, headers=headers, data=payload)
    try:
        response = response1.json()['result']
        print('pos', counter_pos)
        place_details['place_id'].append(i)
        try:
            place_details['dine_in'].append(response['dine_in'])
        except KeyError:
            place_details['dine_in'].append(np.nan)
        try:    
            place_details['reservable'].append(response['reservable'])
        except KeyError:
            place_details['reservable'].append(np.nan)
        try:
            place_details['serves_beer'].append(response['serves_beer'])
        except KeyError:
            place_details['serves_beer'].append(np.nan)
        try:
            place_details['serves_wine'].append(response['serves_wine'])
        except KeyError:
            place_details['serves_wine'].append(np.nan)
        try:
            place_details['vegeterian'].append(response['serves_vegetarian_food'])
        except KeyError:
            place_details['vegeterian'].append(np.nan)
        try:
            place_details['takeout'].append(response['takeout'])
        except KeyError:
            place_details['takeout'].append(np.nan)
        try:
            place_details['wheel_chair_acc'].append(response['wheelchair_accessible_entrance'])
        except KeyError:
            place_details['wheel_chair_acc'].append(np.nan)    
        count = 0
        try:
            a = response['opening_hours']['weekday_text']
            for j in response['opening_hours']['weekday_text']:
                place_details[days[str(count)]].append(j.split(": ", 1)[1].replace('\u202f', '').replace('\u2009', ''))
                count += 1
            if count != 7:
                print('no enough days', count)
        except:
            for j in range(7):
                place_details[days[str(j)]].append(np.nan)
        counter_pos += 1
        
        if (counter_pos+counter_neg)%100 == 0:
            n += 1
            place_details1 = pd.DataFrame(place_details)
            place_details1.to_csv((f'place_details{n}.csv'))
            print('csv successed')
        else:
            pass
    except:
        counter_neg += 1
        print('neg', counter_neg)

pos 0
pos 1
pos 2
pos 3
pos 4
pos 5
pos 6
pos 7
pos 8
pos 9
pos 10
pos 11
pos 12
pos 13
pos 14
pos 15
pos 16
pos 17
pos 18
pos 19
pos 20
pos 21
pos 22
pos 23
pos 24
pos 25
pos 26
pos 27
pos 28
pos 29
pos 30
pos 31
pos 32
pos 33
pos 34
pos 35
pos 36
pos 37
pos 38
pos 39
pos 40
pos 41
pos 42
pos 43
pos 44
pos 45
pos 46
pos 47
pos 48
pos 49
pos 50
pos 51
pos 52
pos 53
pos 54
pos 55
pos 56
pos 57
pos 58
pos 59
pos 60
pos 61
pos 62
pos 63
pos 64
pos 65
pos 66
pos 67
pos 68
pos 69
pos 70
pos 71
pos 72
pos 73
pos 74
pos 75
pos 76
pos 77
pos 78
pos 79
pos 80
pos 81
pos 82
pos 83
pos 84
pos 85
pos 86
pos 87
pos 88
pos 89
pos 90
pos 91
pos 92
pos 93
pos 94
pos 95
pos 96
pos 97
pos 98
pos 99
csv successed
pos 100
pos 101
pos 102
pos 103
pos 104
pos 105
pos 106
pos 107
pos 108
pos 109
pos 110
pos 111
pos 112
pos 113
pos 114
pos 115
pos 116
pos 117
pos 118
pos 119
pos 120
pos 121
pos 122
pos 123
pos 124
pos 125
pos 126
pos 127
pos 128
pos 129
pos 130
pos 131
pos 132
pos 133
pos 134
pos 135
pos 136


In [20]:
place_details.to_csv(f'place_details{n}.csv')

AttributeError: 'dict' object has no attribute 'to_csv'

In [47]:
url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id=ChIJ39UxdOIVYA0RUMyAsJv1aRA&fields=wheelchair_accessible_entrance%2Cdine_in%2Creservable%2Cserves_vegetarian_food%2Ctakeout%2Cserves_beer%2Cserves_wine%2Copening_hours&key={MY_APY_KEY}"
response1 = requests.request("GET", url, headers=headers, data=payload)
response = response1.json()['result']

In [49]:
response

{'dine_in': True,
 'opening_hours': {'open_now': False,
  'periods': [{'close': {'day': 1, 'time': '1600'},
    'open': {'day': 1, 'time': '0900'}},
   {'close': {'day': 2, 'time': '1600'}, 'open': {'day': 2, 'time': '0900'}},
   {'close': {'day': 3, 'time': '1600'}, 'open': {'day': 3, 'time': '0900'}},
   {'close': {'day': 3, 'time': '2230'}, 'open': {'day': 3, 'time': '2030'}},
   {'close': {'day': 4, 'time': '1600'}, 'open': {'day': 4, 'time': '0900'}},
   {'close': {'day': 4, 'time': '2230'}, 'open': {'day': 4, 'time': '2030'}},
   {'close': {'day': 5, 'time': '1600'}, 'open': {'day': 5, 'time': '0900'}},
   {'close': {'day': 5, 'time': '2230'}, 'open': {'day': 5, 'time': '2030'}},
   {'close': {'day': 6, 'time': '1600'}, 'open': {'day': 6, 'time': '1300'}}],
  'weekday_text': ['Monday: 9:00\u202fAM\u2009–\u20094:00\u202fPM',
   'Tuesday: 9:00\u202fAM\u2009–\u20094:00\u202fPM',
   'Wednesday: 9:00\u202fAM\u2009–\u20094:00\u202fPM, 8:30\u2009–\u200910:30\u202fPM',
   'Thursday: 9:00

In [53]:
a = 'Wednesday: 9:00\u202fAM\u2009–\u20094:00\u202fPM, 8:30\u2009–\u200910:30\u202fPM'
b = a.split(": ", 1)[1].replace('\u202f', '').replace('\u2009', '')
b

'9:00AM–4:00PM, 8:30–10:30PM'

In [36]:
days = {
    '0': 'sun_hours'
    ,'1': 'mon_hours'
    ,'2': 'tue_hours'
    ,'3': 'wed_hours'
    ,'4': 'thu_hours'
    ,'5': 'fri_hours'
    ,'6': 'sat_hours'
}

place_details = {'place_id':[], 'dine_in': [], 'reservable': [], 'serves_beer': [], 'serves_wine' :[], 'vegeterian': [], 'takeout': [], 'wheel_chair_acc': [], 
                 'mon_hours': [], 'tue_hours': [], 'wed_hours': [], 'thu_hours': [], 'fri_hours': [], 'sat_hours': [], 'sun_hours': []}


count = 0
for j in response['opening_hours']['periods']:

    place_details[days[str(count)]].append(j['open']['time']+" - "+j['close']['time'])
    count += 1
place_details

{'place_id': [],
 'dine_in': [],
 'reservable': [],
 'serves_beer': [],
 'serves_wine': [],
 'vegeterian': [],
 'takeout': [],
 'wheel_chair_acc': [],
 'mon_hours': ['0930 - 1630'],
 'tue_hours': ['0930 - 2300'],
 'wed_hours': ['0930 - 2300'],
 'thu_hours': ['0930 - 2300'],
 'fri_hours': ['0930 - 2300'],
 'sat_hours': ['0930 - 2300'],
 'sun_hours': ['0930 - 1630']}

In [None]:
df_reviews = pd.read_csv('restaurants_reviews.csv')

# Connection with SQL

In [37]:
def create_connection (schema):
    """
    Creates a connection to a MySQL database using the provided schema, table name, and DataFrame.

    Args:
        schema (str): The name of the database schema.
        table_name (str): The name of the table to connect to.
        df (pandas.DataFrame): The DataFrame containing the data to be inserted into the table.

    Returns:
        sqlalchemy.engine.Engine: The engine object representing the database connection.
    """
    dbName = schema
    password = os.getenv('workbench_pass')
    connectionData=f"mysql+pymysql://root:{password}@localhost/{dbName}"
    engine = alch.create_engine(connectionData)
    return engine

In [None]:
def upload_data_bulky(df, table_name, schema):
    """
    Uploads a DataFrame to a SQL database table using the specified schema.

    Parameters:
        df (pandas.DataFrame): The DataFrame to be uploaded.
        table_name (str): The name of the table in the database.
        schema (str): The schema of the database connection.

    Returns:
        None
    """

    df.to_sql(con=create_connection(schema), name=table_name, if_exists='replace')

In [49]:
df = pd.read_csv('barc_goooglemaps_api_data.csv')
df = df.drop(columns=['Unnamed: 0'])
df.to_sql(con=create_connection('search_restaurants'), name='restaurants_details', if_exists='replace')

5023

In [105]:
df_reviews = pd.read_csv('restaurants_reviews.csv', encoding='latin1')
df_reviews = df_reviews.drop(columns=['Unnamed: 0', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7'])
df_reviews.to_sql(con=create_connection('search_restaurants'), name='restaurants_reviews', if_exists='replace')

24150