In [None]:
import requests
import csv
import time

# Yelp API credentials
API_KEY = ''
CLIENT_ID = ''

# Set the endpoint URL for searching businesses
businesses_url = 'https://api.yelp.com/v3/businesses/search'

# Set the endpoint URL for fetching reviews
reviews_url = 'https://api.yelp.com/v3/businesses/{}/reviews'

# Set parameters for the search
params = {
    'term': 'restaurants',  # Search term
    'location': 'New York',  # Location to search in
    'limit': 50  # Number of results to return per request
}

# Set up the headers with the API key
headers = {
    'Authorization': f'Bearer {API_KEY}'
}

# Make requests to fetch a total of 1000 results
total_limit = 1000
offset = 0
business_data = []

while offset < total_limit:
    params['offset'] = offset
    try:
        # Make the request to Yelp API to search for businesses
        response = requests.get(businesses_url, params=params, headers=headers)
        response.raise_for_status()  # Raise HTTPError for bad response status
        
        # Extract data from the response
        data = response.json()
        
        # Iterate over the businesses and fetch reviews for each
        for business in data['businesses']:
            business_info = {
                'Name': business['name'],
                'Rating': business['rating'],
                'Address': ', '.join(business['location']['display_address']),
                'Reviews': []
            }
            
            # Fetch reviews for the current business
            reviews_response = requests.get(reviews_url.format(business['id']), headers=headers)
            if reviews_response.status_code == 200:
                reviews_data = reviews_response.json()
                for review in reviews_data['reviews']:
                    text = review['text']
                    business_info['Reviews'].append(text)
                    # Preprocess review text
                    #preprocessed_text = preprocess_text(text)
                    
                    
            else:
                print('Error occurred while fetching reviews:', reviews_response.status_code)
            
            # Add business info to business data list
            business_data.append(business_info)
        
        offset += 50  # Increment offset for the next request
        
        # Add a small delay between requests to avoid hitting the rate limit
        time.sleep(1)
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from Yelp API: {e}")
        break  # Stop fetching data on error

# Export data to CSV
csv_file = 'yelpNY_data.csv'
fieldnames = ['Name', 'Rating', 'Address', 'Reviews']

with open(csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    for business_info in business_data:
        writer.writerow(business_info)
