In [6]:
import requests
import json
import time
import pandas as pd

# Define the Yelp API key and endpoint
API_KEY = '7BoWFsg38bW_s_b06yYPvSBhV_5Q4xgrI8vmzMqZscYYgx2anEHMyyvkASO-6SLNI4Ivjdl4KGaQF99ndsINfn4qFcdF2dXDMhd2zrbBV1zoXcIIH95NKWQvpC8MZ3Yx'
SEARCH_URL = 'https://api.yelp.com/v3/businesses/search'

# Define headers with your API key
HEADERS = {
    'Authorization': f'Bearer {API_KEY}'
}

# Function to get restaurants from Yelp by cuisine type
def get_restaurants_by_cuisine(cuisine, location='Manhattan', limit=50, offset=0):
    params = {
        'term': cuisine,
        'location': location,
        'limit': limit,
        'offset': offset  # Pagination: Adjust this to get more results
    }
    response = requests.get(SEARCH_URL, headers=HEADERS, params=params)
    return response.json()

# Store results in a DataFrame or a list
def fetch_restaurant_data():
    all_restaurants = []
    cuisines = ['Indian', 'Mexican', 'Italian']  # Define your cuisine types

    for cuisine in cuisines:
        for offset in range(0, 50, 50):  # Yelp limits 50 results per request; so paginate
            print(f"Fetching {cuisine} restaurants with offset {offset}")
            data = get_restaurants_by_cuisine(cuisine, offset=offset)
            
            if 'businesses' in data:
                for restaurant in data['businesses']:
                    all_restaurants.append({
                        'BusinessID': restaurant['id'],
                        'Name': restaurant['name'],
                        'Address': ', '.join(restaurant['location']['display_address']),
                        'Coordinates': restaurant['coordinates'],
                        'NumberOfReviews': restaurant['review_count'],
                        'Rating': restaurant['rating'],
                        'ZipCode': restaurant['location']['zip_code'],
                        'Cuisine': cuisine
                    })
            time.sleep(1)  # Avoid hitting rate limits by adding a delay between requests

    # Convert the list to a DataFrame and save it locally
    df = pd.DataFrame(all_restaurants)
    df.to_csv('restaurants3.csv', index=False)  # Save to CSV
    print(f"Saved {len(all_restaurants)} restaurants to restaurants3.csv")

if __name__ == '__main__':
    fetch_restaurant_data()

Fetching Indian restaurants with offset 0
Fetching Mexican restaurants with offset 0
Fetching Italian restaurants with offset 0
Saved 150 restaurants to restaurants3.csv


In [7]:
# Load the CSV file after upload
df = pd.read_csv('restaurants3.csv')

# Display the first few rows
print(df.head())

               BusinessID           Name                            Address  \
0  x0k5kFArHLijdY8-NEQ7Xg  Bengal Tiger    234W 56th St, New York, NY 10019   
1  m5Whyz8O8jM4DrE808h71g   Darbar Grill  157 E 55th St, New York, NY 10022   
2  -mxZcmP2TtgIMmfNmxWMQA        Javitri  132 E 61st St, New York, NY 10065   
3  u_CnSp9FieAuWqhvCn-A2A   Spice Mantra   1047 2nd Ave, New York, NY 10022   
4  QqrEF8_ZQCGi63cL10J5oA          Chola  232 E 58th St, New York, NY 10022   

                                         Coordinates  NumberOfReviews  Rating  \
0  {'latitude': 40.765673188349595, 'longitude': ...             2444     4.5   
1   {'latitude': 40.759357, 'longitude': -73.968826}              633     4.0   
2  {'latitude': 40.76358346058178, 'longitude': -...               19     4.6   
3     {'latitude': 40.75825, 'longitude': -73.96633}               99     4.7   
4     {'latitude': 40.76016, 'longitude': -73.96582}              706     4.0   

   ZipCode Cuisine  
0    10019  India

In [14]:
import requests
import json
from requests.auth import HTTPBasicAuth

# ElasticSearch Endpoint (replace with your actual OpenSearch endpoint)
ES_ENDPOINT = 'https://search-restaurants-domain-bfaqu2hsgnnvgmsspl77vr237u.aos.us-east-1.on.aws'
INDEX = 'restaurants'
DOC_TYPE = '_doc'  #OpenSearch no longer uses types, use _doc for backwards compatibility

# Credentials if required (for simple access policy, you might not need this)
USERNAME = '*******'
PASSWORD = '*******'

def insert_restaurant_data(restaurant_id, cuisine):
    url = f"{ES_ENDPOINT}/{INDEX}/{DOC_TYPE}/{restaurant_id}"
    headers = {
        'Content-Type': 'application/json'
    }
    data = {
        "RestaurantID": restaurant_id,
        "Cuisine": cuisine
    }
    #
    response = requests.put(url, auth=HTTPBasicAuth(USERNAME, PASSWORD), headers=headers, data=json.dumps(data))
    
    if response.status_code == 201:
        print(f"Successfully inserted {restaurant_id}")
    else:
        print(f"Failed to insert {restaurant_id}: {response.text}")

# Example usage

restaurants = [
    {
        "RestaurantID": row["BusinessID"],
        "Cuisine": row["Cuisine"]
    }
    for index, row in df.iterrows()
]

for restaurant in restaurants:
    insert_restaurant_data(restaurant['RestaurantID'], restaurant['Cuisine'])


Successfully inserted x0k5kFArHLijdY8-NEQ7Xg
Successfully inserted m5Whyz8O8jM4DrE808h71g
Successfully inserted -mxZcmP2TtgIMmfNmxWMQA
Successfully inserted u_CnSp9FieAuWqhvCn-A2A
Successfully inserted QqrEF8_ZQCGi63cL10J5oA
Successfully inserted XLEdQl5s1bO5Iu43W_c96Q
Successfully inserted IysZ64t48AedrhWfz3WJGQ
Successfully inserted A-ert0jDRBfku9ogyW_mEg
Successfully inserted 1_pK5aQ47atF50oAKHADhA
Successfully inserted izRJeBfDQYJmd6ibIQ6Mfg
Successfully inserted 3Lzq6WS7q0Cz9_RinS00OA
Successfully inserted t6PzD4uzBOfyCLcd_lT8cQ
Successfully inserted juAQ4DHCW_24LpV1T1JmBA
Successfully inserted _7BGw3YFNOTzP1Www3zB7g
Successfully inserted wBwTAHhfSm5hpJU-InCUdw
Successfully inserted guwS_mhhUjOT5d8sYfEluQ
Successfully inserted GNPne4Il-itbn2sLbvMo1g
Successfully inserted z2VXX7wGl8XZ6_oo9W3koA
Successfully inserted 9pMhRYYgV7E9L7pzECtJKA
Successfully inserted 50eKjWOB-H9iuo1ocACOaQ
Successfully inserted 1KGvtMU7VBcdlvxo2brdQg
Successfully inserted 7DzfrwNwXmbS6OhYxPKfHQ
Successful

In [8]:
df

Unnamed: 0,BusinessID,Name,Address,Coordinates,NumberOfReviews,Rating,ZipCode,Cuisine
0,x0k5kFArHLijdY8-NEQ7Xg,Bengal Tiger,"234W 56th St, New York, NY 10019","{'latitude': 40.765673188349595, 'longitude': ...",2444,4.5,10019,Indian
1,m5Whyz8O8jM4DrE808h71g,Darbar Grill,"157 E 55th St, New York, NY 10022","{'latitude': 40.759357, 'longitude': -73.968826}",633,4.0,10022,Indian
2,-mxZcmP2TtgIMmfNmxWMQA,Javitri,"132 E 61st St, New York, NY 10065","{'latitude': 40.76358346058178, 'longitude': -...",19,4.6,10065,Indian
3,u_CnSp9FieAuWqhvCn-A2A,Spice Mantra,"1047 2nd Ave, New York, NY 10022","{'latitude': 40.75825, 'longitude': -73.96633}",99,4.7,10022,Indian
4,QqrEF8_ZQCGi63cL10J5oA,Chola,"232 E 58th St, New York, NY 10022","{'latitude': 40.76016, 'longitude': -73.96582}",706,4.0,10022,Indian
...,...,...,...,...,...,...,...,...
145,eMqKFQbKQXq82EUVnlPguw,Bar Primi Penn District,"349 W 33rd St, New York, NY 10001","{'latitude': 40.7524583, 'longitude': -73.9954...",72,4.3,10001,Italian
146,RbiXVG4tzpC3zuILadyPIQ,La Pecora Bianca UWS,"359 Columbus Ave, New York, NY 10024","{'latitude': 40.78049101969648, 'longitude': -...",104,3.9,10024,Italian
147,MOwB9YYd-Ko6nVgSB-ltQw,Briciola,"370 W 51st St, New York, NY 10019","{'latitude': 40.7637997047278, 'longitude': -7...",1063,4.0,10019,Italian
148,JwCnSW-RQ7_DxvbzWNO1hA,Coletta,"365 3rd Ave, New York, NY 10016","{'latitude': 40.74094, 'longitude': -73.98139}",298,4.6,10016,Italian
