In [1]:
import json
import pandas as pd

# 1. Load review data
print("Loading reviews...")
reviews = []
with open('yelp_academic_dataset_review.json', 'r', encoding='utf-8') as f:
    for line in f:
        reviews.append(json.loads(line))

df_reviews = pd.DataFrame(reviews)

# 2. Search for "black sesame" mentions
black_sesame_reviews = df_reviews[
    df_reviews['text'].str.contains('black sesame', case=False, na=False)
]

print(f"Found {len(black_sesame_reviews)} reviews mentioning black sesame")

# 3. View sample results
print("\nSample reviews:")
print(black_sesame_reviews[['business_id', 'text', 'date']].head())

# 4. Get unique restaurant IDs
business_ids = black_sesame_reviews['business_id'].unique()
print(f"\nInvolves {len(business_ids)} businesses")

Loading reviews...
Found 698 reviews mentioning black sesame

Sample reviews:
                   business_id  \
19640   MFxRPt8-B2xTYmon3s84kg   
58970   nFjk0xVI9fNiVN__5g-m8Q   
224605  MFxRPt8-B2xTYmon3s84kg   
225400  msHYY8zS_8D3_BJitdGdmA   
228196  hmmiyt6KljD5G3a861qsKw   

                                                     text                 date  
19640   Cute place with friendly owner.  We sampled th...  2018-09-25 20:48:54  
58970   We had Brunchuru at Ichicoro Ane in St. Peters...  2018-09-30 21:52:58  
224605  Hands down, the best soft serve ice cream I've...  2018-11-11 04:41:17  
225400  I have been to this location about three times...  2010-01-31 00:10:46  
228196  K & S is great for specific things. It's not r...  2008-08-03 01:46:11  

Involves 334 businesses


In [2]:
# Search for multiple related terms
keywords = [
    'black sesame', 
    'black sesame latte', 
    'black sesame ice cream',
    'black sesame mochi',
    'kuro goma',  # Japanese
    'heugimja'    # Korean
]

pattern = '|'.join(keywords)
matches = df_reviews[
    df_reviews['text'].str.contains(pattern, case=False, na=False)
]

# Analyze trends by year
matches['year'] = pd.to_datetime(matches['date']).dt.year
yearly_trend = matches.groupby('year').size()
print("\nYearly trend:")
print(yearly_trend)


Yearly trend:
year
2007      1
2008      7
2009      8
2010     11
2011     24
2012     19
2013     34
2014     43
2015     47
2016     66
2017    101
2018    119
2019    114
2020     49
2021     51
2022      4
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches['year'] = pd.to_datetime(matches['date']).dt.year


In [3]:
# 1. Load business data
print("Loading business data...")
businesses = []
with open('yelp_academic_dataset_business.json', 'r', encoding='utf-8') as f:
    for line in f:
        businesses.append(json.loads(line))

df_business = pd.DataFrame(businesses)

# 2. Merge reviews with business info
result = black_sesame_reviews.merge(
    df_business[['business_id', 'name', 'address', 'city', 'state', 
                 'latitude', 'longitude', 'categories']],
    on='business_id',
    how='left'
)

# 3. Top cities for black sesame mentions
print("\nTop 10 cities:")
print(result['city'].value_counts().head(10))

# 4. Export results
result.to_csv('black_sesame_restaurants.csv', index=False)
print("\nResults saved to black_sesame_restaurants.csv")

Loading business data...

Top 10 cities:
city
Philadelphia     303
Reno              68
Santa Barbara     49
Edmonton          39
New Orleans       30
Nashville         26
Tampa             24
Saint Louis       21
Indianapolis      15
Tucson            13
Name: count, dtype: int64

Results saved to black_sesame_restaurants.csv


In [4]:
# Compare black sesame vs matcha vs ube
asian_flavors = {
    'black_sesame': ['black sesame', 'kuro goma'],
    'matcha': ['matcha', 'green tea latte'],
    'ube': ['ube', 'purple yam', 'taro']
}

comparison = {}

for flavor, keywords in asian_flavors.items():
    pattern = '|'.join(keywords)
    matches = df_reviews[
        df_reviews['text'].str.contains(pattern, case=False, na=False)
    ]
    comparison[flavor] = len(matches)
    print(f"{flavor}: {len(matches)} mentions")

# Trend over time
for flavor, keywords in asian_flavors.items():
    pattern = '|'.join(keywords)
    matches = df_reviews[
        df_reviews['text'].str.contains(pattern, case=False, na=False)
    ]
    matches['year'] = pd.to_datetime(matches['date']).dt.year
    trend = matches.groupby('year').size()
    print(f"\n{flavor} yearly trend:")
    print(trend)

black_sesame: 698 mentions
matcha: 6068 mentions
ube: 66201 mentions


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches['year'] = pd.to_datetime(matches['date']).dt.year



black_sesame yearly trend:
year
2007      1
2008      7
2009      8
2010     11
2011     24
2012     19
2013     34
2014     43
2015     47
2016     66
2017    101
2018    119
2019    114
2020     49
2021     51
2022      4
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches['year'] = pd.to_datetime(matches['date']).dt.year



matcha yearly trend:
year
2007       4
2008       7
2009      17
2010      14
2011      35
2012      50
2013      84
2014     153
2015     285
2016     581
2017     810
2018    1199
2019    1071
2020     705
2021     999
2022      54
dtype: int64

ube yearly trend:
year
2005       3
2006      20
2007     129
2008     570
2009     772
2010    1403
2011    2156
2012    2420
2013    3070
2014    4082
2015    5326
2016    6581
2017    7993
2018    9507
2019    9050
2020    6063
2021    6674
2022     382
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches['year'] = pd.to_datetime(matches['date']).dt.year


In [5]:
# Focus on cafes, dessert shops, and ice cream parlors
cafe_categories = [
    'Coffee', 'Cafe', 'Dessert', 'Ice Cream', 
    'Bakeries', 'Bubble Tea', 'Gelato'
]

# Filter businesses
category_pattern = '|'.join(cafe_categories)
cafes = df_business[
    df_business['categories'].str.contains(category_pattern, case=False, na=False)
]

print(f"Found {len(cafes)} cafes/dessert shops")

# Find black sesame mentions in these cafes
cafe_ids = set(cafes['business_id'])
black_sesame_in_cafes = black_sesame_reviews[
    black_sesame_reviews['business_id'].isin(cafe_ids)
]

print(f"Black sesame mentions in cafes: {len(black_sesame_in_cafes)}")

# Merge with cafe details
cafe_analysis = black_sesame_in_cafes.merge(
    cafes[['business_id', 'name', 'city', 'state', 'latitude', 'longitude']],
    on='business_id'
)

cafe_analysis.to_csv('black_sesame_cafes.csv', index=False)

Found 14348 cafes/dessert shops
Black sesame mentions in cafes: 339


In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

def load_yelp_data():
    """Load all Yelp dataset files"""
    print("Loading data...")
    
    
    # Load reviews
    reviews = []
    with open('yelp_academic_dataset_review.json', 'r', encoding='utf-8') as f:
        for line in f:
            reviews.append(json.loads(line))
    
    # Load businesses
    businesses = []
    with open('yelp_academic_dataset_business.json', 'r', encoding='utf-8') as f:
        for line in f:
            businesses.append(json.loads(line))
    
    return pd.DataFrame(reviews), pd.DataFrame(businesses)

def search_keywords(df_reviews, keywords):
    """Search for specific keywords in reviews"""
    results = {}
    
    for keyword in keywords:
        matches = df_reviews[
            df_reviews['text'].str.contains(keyword, case=False, na=False)
        ]
        results[keyword] = matches
        print(f"{keyword}: {len(matches)} mentions")
    
    return results

def analyze_trends(keyword_results):
    """Analyze yearly trends for each keyword"""
    trends = {}
    
    for keyword, data in keyword_results.items():
        data['year'] = pd.to_datetime(data['date']).dt.year
        trend = data.groupby('year').size()
        trends[keyword] = trend
        print(f"\n{keyword} yearly trend:")
        print(trend)
    
    return trends

def geographic_analysis(matches, df_business):
    """Analyze geographic distribution"""
    result = matches.merge(
        df_business[['business_id', 'name', 'city', 'state', 
                     'latitude', 'longitude', 'categories']],
        on='business_id',
        how='left'
    )
    
    print("\nTop 10 cities:")
    print(result['city'].value_counts().head(10))
    
    return result

# Main execution
if __name__ == "__main__":
    # Load data
    df_reviews, df_business = load_yelp_data()
    
    # Search for Asian flavors
    keywords = ['black sesame', 'matcha', 'ube']
    results = search_keywords(df_reviews, keywords)
    
    # Analyze trends
    trends = analyze_trends(results)
    
    # Geographic analysis for black sesame
    black_sesame_geo = geographic_analysis(results['black sesame'], df_business)
    
    # Save results
    black_sesame_geo.to_csv('black_sesame_analysis.csv', index=False)
    print("\nAnalysis complete! Results saved.")

Loading data...
black sesame: 698 mentions
matcha: 5864 mentions
ube: 60161 mentions

black sesame yearly trend:
year
2007      1
2008      7
2009      8
2010     11
2011     24
2012     19
2013     34
2014     43
2015     47
2016     66
2017    101
2018    119
2019    114
2020     49
2021     51
2022      4
dtype: int64

matcha yearly trend:
year
2007       3
2008       4
2009      12
2010      12
2011      29
2012      40
2013      69
2014     139
2015     263
2016     550
2017     784
2018    1171
2019    1048
2020     694
2021     992
2022      54
dtype: int64

ube yearly trend:
year
2005       2
2006      17
2007     111
2008     516
2009     669
2010    1278
2011    1958
2012    2177
2013    2763
2014    3634
2015    4824
2016    6057
2017    7380
2018    8665
2019    8246
2020    5503
2021    6025
2022     336
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year'] = pd.to_datetime(data['date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year'] = pd.to_datetime(data['date']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['year'] = pd.to_datetime(data['date']).dt.year



Top 10 cities:
city
Philadelphia     303
Reno              68
Santa Barbara     49
Edmonton          39
New Orleans       30
Nashville         26
Tampa             24
Saint Louis       21
Indianapolis      15
Tucson            13
Name: count, dtype: int64

Analysis complete! Results saved.


In [8]:
import requests

API_KEY = 'your_api_key_here'
headers = {'Authorization': f'Bearer {API_KEY}'}

params = {
    'term': 'black sesame',
    'location': 'San Francisco',
    'categories': 'cafes,desserts',
    'limit': 50
}

response = requests.get(
    'https://api.yelp.com/v3/businesses/search',
    headers=headers,
    params=params
)

# 先检查响应状态和内容
print("Status Code:", response.status_code)
print("Response:", response.json())

# 然后再处理数据
if response.status_code == 200:
    data = response.json()
    if 'businesses' in data:
        businesses = data['businesses']
        print(f"\nFound {len(businesses)} businesses:")
        for biz in businesses:
            print(f"{biz['name']} - {biz['location']['city']}")
    else:
        print("No 'businesses' key in response")
        print("Available keys:", data.keys())
else:
    print(f"Error: {response.status_code}")
    print(response.text)

Status Code: 400
Response: {'error': {'code': 'VALIDATION_ERROR', 'description': "'Bearer your_api_key_here' does not match '^(?i)Bearer [A-Za-z0-9\\\\-\\\\_]{128}$'", 'field': 'Authorization', 'instance': 'Bearer your_api_key_here'}}
Error: 400
{"error": {"code": "VALIDATION_ERROR", "description": "'Bearer your_api_key_here' does not match '^(?i)Bearer [A-Za-z0-9\\\\-\\\\_]{128}$'", "field": "Authorization", "instance": "Bearer your_api_key_here"}}
