In [1]:
import pandas as pd
from pathlib import Path

# 设置路径
remap_path = Path("../query_yelp/ground_truth_dataset")

# 读取五个重编码后的表
df_business = pd.read_json(remap_path / "business_gt.json", lines=True)
df_checkin  = pd.read_json(remap_path / "checkin_gt.json", lines=True)
df_review   = pd.read_json(remap_path / "review_gt.json", lines=True)
df_tip      = pd.read_json(remap_path / "tip_gt.json", lines=True)
df_user     = pd.read_json(remap_path / "user_gt.json", lines=True)

In [2]:
df_business

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,businessid_49,Steps to Learning Montessori Preschool,6901 Phelps Rd,Goleta,CA,93117,34.423311,-119.870637,4.5,8,1,"{'BusinessAcceptsCreditCards': 'True', 'WiFi':...","Education, Elementary Schools, Child Care & Da...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-17:0', '..."
1,businessid_47,Breeze Blow Dry Bar,9916 Clayton Rd,St. Louis,MO,63124,38.636716,-90.393139,4.0,81,0,"{'ByAppointmentOnly': 'False', 'BusinessAccept...","Hair Salons, Beauty & Spas, Hair Stylists, Ski...","{'Monday': '7:0-18:0', 'Tuesday': '7:0-18:0', ..."
2,businessid_88,Impact Guns,11655 W Executive Dr,Boise,ID,83713,43.608700,-116.328000,3.0,39,1,"{'BusinessParking': '{'garage': False, 'street...","Gun/Rifle Ranges, Active Life","{'Monday': '10:0-19:0', 'Tuesday': '10:0-19:0'..."
3,businessid_41,Palms Primary Care,"1615 Pasadena Ave S, Ste 430",Saint Petersburg,FL,33707,27.752653,-82.741325,4.0,5,1,,"Internal Medicine, Doctors, Health & Medical","{'Monday': '8:30-17:0', 'Tuesday': '8:30-17:0'..."
4,businessid_33,J&Q Nails,"9655 E US Hwy 36, Unit H",Avon,IN,46123,39.763059,-86.352280,3.5,28,1,"{'BusinessParking': '{'garage': False, 'street...","Nail Salons, Hair Removal, Beauty & Spas, Waxing","{'Monday': '9:30-19:0', 'Tuesday': '9:30-19:0'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,businessid_23,Harbor Freight,"5211 Hickory Hollow Pkwy, Ste 103",Antioch,TN,37013,36.047915,-86.660254,3.5,8,1,,"Automotive, Shopping, Hardware Stores, Auto Pa...","{'Monday': '8:0-19:0', 'Tuesday': '8:0-19:0', ..."
96,businessid_38,Philadelphia Hair Studio,744 S 6th St,Philadelphia,PA,19147,39.940074,-75.152765,2.5,22,0,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Hair Salons, Beauty & Spas","{'Tuesday': '11:0-19:0', 'Wednesday': '11:0-19..."
97,businessid_81,Fantastic Sams Cut & Color,36129 E Lake Rd,Palm Harbor,FL,34685,28.101070,-82.692492,3.5,6,1,"{'BusinessParking': '{'garage': False, 'street...","Barbers, Hair Salons, Beauty & Spas, Hair Removal","{'Monday': '8:0-21:0', 'Tuesday': '8:0-21:0', ..."
98,businessid_13,Avian Glen Winery,3545 Almaville Rd,Smyrna,TN,37167,35.911697,-86.552256,4.0,12,1,"{'RestaurantsPriceRange2': '2', 'RestaurantsTa...","Food, Arts & Entertainment, Wineries, Wine Tas...","{'Monday': '12:0-20:0', 'Tuesday': '12:0-20:0'..."


In [5]:
# 统计 city 出现频率
city_counts = df_business["city"].value_counts().reset_index()
city_counts.columns = ["city", "count"]

# 统计 state 出现频率
state_counts = df_business["state"].value_counts().reset_index()
state_counts.columns = ["state", "count"]

# 显示前几项
print("✅ 出现最多的前 10 个城市：")
print(city_counts.head(10))

print("\n✅ 出现最多的前 10 个州：")
print(state_counts.head(10))


✅ 出现最多的前 10 个城市：
              city  count
0     Philadelphia     15
1     Indianapolis      8
2            Tampa      7
3      Saint Louis      4
4      New Orleans      4
5         Edmonton      4
6  King of Prussia      3
7          Fishers      3
8   St. Petersburg      3
9            Boise      3

✅ 出现最多的前 10 个州：
  state  count
0    PA     27
1    FL     23
2    IN     12
3    MO      9
4    LA      7
5    AB      4
6    ID      4
7    CA      3
8    IL      3
9    TN      3


In [6]:
import pandas as pd

# 假设 df_business 已经加载，且包含 'categories' 字段

# 1. 去除缺失值，确保是字符串
df_valid = df_business[df_business["categories"].notnull()].copy()

# 2. 拆解逗号分隔的类别列表
df_valid["category_list"] = df_valid["categories"].apply(
    lambda x: [cat.strip() for cat in x.split(",")] if isinstance(x, str) else []
)

# 3. 展平成一列（explode）
df_exploded = df_valid.explode("category_list")

# 4. 统计频次
category_counts = df_exploded["category_list"].value_counts().reset_index()
category_counts.columns = ["category", "count"]

# 5. 输出前 20 个
print("✅ 出现次数最多的前 20 个类别：")
print(category_counts.head(20))

# 6. 类别总数
print(f"\n📊 总共独立类别数量: {len(category_counts)}")


✅ 出现次数最多的前 20 个类别：
                     category  count
0                 Restaurants     35
1                        Food     21
2                    Shopping     18
3               Home Services     12
4                  Automotive     10
5            Health & Medical      8
6          Breakfast & Brunch      8
7                   Fast Food      8
8                        Bars      8
9              American (New)      8
10                  Nightlife      8
11              Beauty & Spas      7
12  Event Planning & Services      7
13     American (Traditional)      7
14              Home & Garden      7
15                 Sandwiches      7
16             Local Services      7
17                    Burgers      6
18                    Chinese      4
19                Auto Repair      4

📊 总共独立类别数量: 187


In [7]:
from collections import Counter
import pandas as pd

# 假设 df_business 已经加载，且包含 'attributes' 字段

# 初始化计数器
attr_counter = Counter()

# 遍历每一行的 attributes 字段
for attrs in df_business["attributes"].dropna():
    if isinstance(attrs, dict):
        attr_counter.update(attrs.keys())

# 转换为 DataFrame 并排序
attr_stats = pd.DataFrame(attr_counter.items(), columns=["attribute_key", "count"])
attr_stats = attr_stats.sort_values(by="count", ascending=False).reset_index(drop=True)

# 输出前几项
print("✅ 出现频率最高的 attribute 字段：")
print(attr_stats.head(20))

# 属性种类总数
print(f"\n📊 总共不同 attribute 字段数量: {len(attr_stats)}")


✅ 出现频率最高的 attribute 字段：
                 attribute_key  count
0   BusinessAcceptsCreditCards     77
1              BusinessParking     60
2       RestaurantsPriceRange2     56
3                  BikeParking     45
4           RestaurantsTakeOut     43
5          RestaurantsDelivery     41
6                  GoodForKids     36
7                         WiFi     35
8     RestaurantsGoodForGroups     30
9            ByAppointmentOnly     29
10                      Caters     29
11                    Ambience     29
12     RestaurantsReservations     29
13                  NoiseLevel     28
14              OutdoorSeating     28
15                     Alcohol     28
16           RestaurantsAttire     27
17                       HasTV     27
18                 GoodForMeal     20
19        WheelchairAccessible     19

📊 总共不同 attribute 字段数量: 34


In [3]:
# 🧮 统计 city 出现频率（验证 Indianapolis 是否存在）
city_counts = df_business["city"].value_counts().reset_index()
city_counts.columns = ["city", "count"]

print("✅ 出现最多的前 10 个城市：")
print(city_counts.head(10))

# 📍 获取 Indianapolis 的所有 business_id
indy_business_ids = df_business[df_business["city"] == "Indianapolis"]["business_id"]

# 🧾 在 review 表中筛选出属于这些 business_id 的记录
indy_reviews = df_review[df_review["business_id"].isin(indy_business_ids)]

# ⭐ 计算平均评分
average_rating = indy_reviews["stars"].mean()

# 📢 打印结果
print(f"\n📍 Indianapolis 的 business 数量为：{len(indy_business_ids)}")
print(f"⭐ 其平均评分为：{average_rating:.2f}")


✅ 出现最多的前 10 个城市：
              city  count
0     Philadelphia     15
1     Indianapolis      8
2            Tampa      7
3      Saint Louis      4
4      New Orleans      4
5         Edmonton      4
6  King of Prussia      3
7          Fishers      3
8   St. Petersburg      3
9            Boise      3

📍 Indianapolis 的 business 数量为：8
⭐ 其平均评分为：3.55


In [4]:
# 📦 导入库

# 🧮 合并两个表：把 state 信息关联到每条 review
df_merged = pd.merge(df_review, df_business[["business_id", "state"]], on="business_id", how="inner")

# 📊 按 state 分组，统计每个州的评论数量和平均评分
state_stats = df_merged.groupby("state").agg(
    review_count=("review_id", "count"),
    avg_rating=("stars", "mean")
).reset_index()

# 🔝 找出评论最多的州
top_state = state_stats.sort_values(by="review_count", ascending=False).head(1)

# 🖨️ 输出结果
print("✅ 拥有最多评论的州：")
print(top_state.to_string(index=False))


✅ 拥有最多评论的州：
state  review_count  avg_rating
   PA           662    3.699396


In [11]:


# 🕒 解析日期字段，筛选出 2022 年的评论
df_review["date"] = pd.to_datetime(df_review["date"])
df_review_2022 = df_review[df_review["date"].dt.year == 2018].copy()

print(f"✅ 共有 {len(df_review_2022)} 条评论发生在 2022 年。")

# 📌 找到这些评论对应的 business_id
business_ids_2022 = df_review_2022["business_id"].unique()

# 🎯 在 business 表中筛选出这些 business
df_business_active = df_business[df_business["business_id"].isin(business_ids_2022)].copy()

# 🧠 解析 attributes 字段为字典（有些是字符串形式）
def parse_attributes(attr):
    if isinstance(attr, dict):
        return attr
    try:
        return ast.literal_eval(attr)
    except:
        return {}

df_business_active["attributes_parsed"] = df_business_active["attributes"].apply(parse_attributes)

# 🔍 判断是否含有 BusinessParking 或 BikeParking
def has_parking(attrs):
    return "BusinessParking" in attrs or "BikeParking" in attrs

df_business_active["has_parking_attr"] = df_business_active["attributes_parsed"].apply(has_parking)

# ✅ 筛选满足条件的 business 数量
matched_count = df_business_active["has_parking_attr"].sum()

print(f"\n🚗 在 2022 年收到评论的 business 中，有 {matched_count} 家提供了 BusinessParking 或 BikeParking。")


✅ 共有 211 条评论发生在 2022 年。

🚗 在 2022 年收到评论的 business 中，有 41 家提供了 BusinessParking 或 BikeParking。


In [15]:
import pandas as pd
import ast


def parse_attributes(attr):
    if isinstance(attr, dict):
        return attr
    try:
        return ast.literal_eval(attr)
    except:
        return {}

df_business["attributes_parsed"] = df_business["attributes"].apply(parse_attributes)

# ✅ 判断是否支持信用卡
def accepts_credit(attr_dict):
    return attr_dict.get("BusinessAcceptsCreditCards", "").lower() == "true"

df_business["accepts_credit"] = df_business["attributes_parsed"].apply(accepts_credit)

# ✅ 筛选支持信用卡的商家
df_cc = df_business[df_business["accepts_credit"] == True].copy()

# ✅ 解析 categories 为 list
def parse_categories(cat):
    if isinstance(cat, str):
        return [c.strip() for c in cat.split(",") if c.strip()]
    return []

df_cc["category_list"] = df_cc["categories"].apply(parse_categories)

# 🔄 拆分每行的多个类别
df_exploded = df_cc[["business_id", "category_list"]].explode("category_list").dropna()

# 📊 统计每个类别下支持信用卡的 business 数量
category_counts = df_exploded.groupby("category_list")["business_id"].nunique().reset_index()
category_counts.columns = ["category", "credit_card_business_count"]

# 🔗 合并 review 表
df_review_filtered = df_review[df_review["business_id"].isin(df_exploded["business_id"])]
df_merged = pd.merge(df_review_filtered, df_exploded, on="business_id", how="inner")

# 🧮 每个类别下所有 review 的平均评分
category_avg_rating = df_merged.groupby("category_list")["stars"].mean().reset_index()
category_avg_rating.columns = ["category", "avg_rating"]

# 📈 合并数量 + 平均评分
df_result = pd.merge(category_counts, category_avg_rating, on="category").sort_values(
    by="credit_card_business_count", ascending=False
)

# ✅ 输出前 10 类别
print("✅ 前 10 个支持信用卡的 business 类别（按数量）及其平均用户评分：")
print(df_result.head(10).to_string(index=False))


✅ 前 10 个支持信用卡的 business 类别（按数量）及其平均用户评分：
              category  credit_card_business_count  avg_rating
           Restaurants                          27    3.633676
              Shopping                          17    3.534979
                  Food                          15    3.734234
         Home Services                           8    3.375000
        American (New)                           7    3.517241
    Breakfast & Brunch                           7    4.083969
            Automotive                           7    2.910615
             Nightlife                           7    3.737374
             Fast Food                           7    2.817204
American (Traditional)                           7    3.706767


In [16]:


# ✅ 解析 attributes 字段
def parse_attributes(attr):
    if isinstance(attr, dict):
        return attr
    try:
        return ast.literal_eval(attr)
    except:
        return {}

df_business["attributes_parsed"] = df_business["attributes"].apply(parse_attributes)

# ✅ 筛选提供 WiFi 的商家（注意有些值是 u'free', u'paid'）
def offers_wifi(attr_dict):
    wifi = attr_dict.get("WiFi", "").lower()
    return wifi in ["u'free'", "u'paid'", "'free'", "'paid'", "free", "paid"]

df_business["offers_wifi"] = df_business["attributes_parsed"].apply(offers_wifi)
df_wifi = df_business[df_business["offers_wifi"] == True].copy()
print(f"📶 提供 WiFi 的 business 数量: {len(df_wifi)}")

# ✅ 按 state 分组，统计提供 WiFi 的 business 数量
state_counts = df_wifi.groupby("state")["business_id"].nunique().reset_index()
state_counts.columns = ["state", "wifi_business_count"]

# 🔝 找出提供 WiFi 商家数量最多的 state
top_state = state_counts.sort_values(by="wifi_business_count", ascending=False).head(1)
target_state = top_state.iloc[0]["state"]
print(f"\n🏆 提供 WiFi 商家最多的州是: {target_state}")

# ✅ 找出该州的所有 business_id，提取 review 并计算平均评分
business_ids = df_wifi[df_wifi["state"] == target_state]["business_id"]
df_review_target = df_review[df_review["business_id"].isin(business_ids)]
avg_rating = df_review_target["stars"].mean()

# ✅ 打印最终结果
print(f"\n📊 州: {target_state}")
print(f"🏢 提供 WiFi 的 business 数量: {int(top_state.iloc[0]['wifi_business_count'])}")
print(f"⭐ 这些 business 的平均评分: {avg_rating:.2f}")


📶 提供 WiFi 的 business 数量: 22

🏆 提供 WiFi 商家最多的州是: PA

📊 州: PA
🏢 提供 WiFi 的 business 数量: 8
⭐ 这些 business 的平均评分: 3.48


In [19]:
import pandas as pd
import ast


# Convert review date to datetime
df_review["date"] = pd.to_datetime(df_review["date"])

# Create a half-year period label (e.g., 2018-H1, 2018-H2)
def get_half_year_period(date):
    return f"{date.year}-H1" if date.month <= 6 else f"{date.year}-H2"

df_review["period"] = df_review["date"].apply(get_half_year_period)

# Aggregate: average rating and review count per business per period
df_agg = df_review.groupby(["period", "business_id"]).agg(
    avg_rating=("stars", "mean"),
    review_count=("stars", "count")
).reset_index()

# Filter businesses with at least 5 reviews
df_agg = df_agg[df_agg["review_count"] >= 5]

# For each period, select the business with highest average rating
# (break ties by higher review count)
df_top = df_agg.sort_values(["period", "avg_rating", "review_count"], ascending=[True, False, False]) \
               .groupby("period").head(1).reset_index(drop=True)

# Merge business metadata (name, categories)
df_top = df_top.merge(df_business[["business_id", "name", "categories"]], on="business_id", how="left")

# Reorder columns for output
df_top = df_top[["period", "name", "avg_rating", "review_count", "categories"]]

# Print final result
print("✅ Top-rated businesses for each half-year period (with ≥ 5 reviews):")
print(df_top.to_string(index=False))


✅ Top-rated businesses for each half-year period (with ≥ 5 reviews):
 period                          name  avg_rating  review_count                                                                                                                              categories
2011-H1           Farmhaus Restaurant    4.000000             5                                          Wine Bars, American (New), Cocktail Bars, Restaurants, American (Traditional), Nightlife, Bars
2012-H1              Insomnia Cookies    3.833333             6                                                             Ice Cream & Frozen Yogurt, Food, Food Delivery Services, Bakeries, Desserts
2012-H2               Taste of Europe    3.857143             7                                                                         Food, Specialty Food, Ethnic Food, Modern European, Restaurants
2013-H1           Farmhaus Restaurant    4.571429             7                                          Wine Bars, American (New),

In [21]:
import pandas as pd
import ast
from collections import defaultdict


# Extract registration year from user["yelping_since"]
df_user["yelping_since"] = pd.to_datetime(df_user["yelping_since"])
df_user["registration_year"] = df_user["yelping_since"].dt.year

# Parse business categories
def parse_categories(cat):
    if isinstance(cat, str):
        return [c.strip() for c in cat.split(",") if c.strip()]
    return []

df_business["category_list"] = df_business["categories"].apply(parse_categories)

# Merge review with user registration year
df_review_user = df_review.merge(df_user[["user_id", "registration_year"]], on="user_id", how="inner")

# Merge with business category info
df_review_full = df_review_user.merge(
    df_business[["business_id", "category_list"]],
    on="business_id",
    how="inner"
)

# Count how many users registered in each year
user_count_by_year = df_user["registration_year"].value_counts().to_dict()

# Build per-year category review counter
year_to_category_counter = defaultdict(lambda: defaultdict(int))

for year in sorted(df_user["registration_year"].dropna().unique()):
    df_year = df_review_full[df_review_full["registration_year"] == year]
    for categories in df_year["category_list"]:
        if categories:
            for cat in categories:
                year_to_category_counter[year][cat] += 1

# Compile top 5 categories per year (with new user count)
rows = []
for year, cat_counts in year_to_category_counter.items():
    new_user_count = user_count_by_year.get(year, 0)
    top5 = sorted(cat_counts.items(), key=lambda x: x[1], reverse=True)[:5]
    for category, count in top5:
        rows.append({
            "registration_year": year,
            "new_user_count": new_user_count,
            "category": category,
            "review_count": count
        })

# Create and sort DataFrame
df_result = pd.DataFrame(rows)
df_result = df_result.sort_values(by=["registration_year", "review_count"], ascending=[True, False])

# Print final output
print("✅ Top 5 reviewed business categories for each Yelp registration year:")
print(df_result.to_string(index=False))



✅ Top 5 reviewed business categories for each Yelp registration year:
 registration_year  new_user_count                  category  review_count
              2005               1               Restaurants             1
              2005               1        Breakfast & Brunch             1
              2005               1                      Bars             1
              2005               1                 Wine Bars             1
              2005               1              Coffee & Tea             1
              2006               7               Restaurants             6
              2006               7                      Bars             3
              2006               7                 Nightlife             3
              2006               7        Breakfast & Brunch             2
              2006               7                 Wine Bars             2
              2007              19               Restaurants             9
              2007            

In [5]:
df_review

Unnamed: 0,review_id,user_id,business_id,stars,useful,funny,cool,text,date
0,reviewid_135,userid_548,businessid_34,2,0,0,0,"Sure, it's cheap, but there isn't much to see....",2016-08-01 03:44:21
1,reviewid_1067,userid_213,businessid_89,5,2,0,0,Very good service but a little pricey for the ...,2021-06-14 11:39:51
2,reviewid_871,userid_616,businessid_82,4,0,0,0,My friend and I enjoyed a fantastic meal at Mi...,2013-05-29 23:01:09
3,reviewid_314,userid_1903,businessid_66,2,1,2,1,This location is not one of my favorites peopl...,2016-05-21 18:48:52
4,reviewid_487,,businessid_95,1,0,0,0,Terrible service. I was charged twice for onli...,2021-11-01 17:11:59
...,...,...,...,...,...,...,...,...,...
1995,reviewid_687,,businessid_16,2,5,0,0,The Fox has gone downhill since I last visited...,2014-02-06 16:08:41
1996,reviewid_202,userid_428,businessid_16,4,0,0,0,Food was great as well as the service. Big scr...,2015-02-24 06:44:48
1997,reviewid_1362,userid_1684,businessid_85,5,0,0,0,Excellent customer service. Our order took 10 ...,2017-12-03 03:37:04
1998,reviewid_1352,userid_13,businessid_96,5,2,0,1,"SO GOOD! I already love this place, even if I ...",2011-03-08 23:20:46


In [6]:
# 检查是否是 'date' 或 'timestamp' 列
if "date" in df_review.columns:
    latest_date = pd.to_datetime(df_review["date"]).max()
elif "timestamp" in df_review.columns:
    latest_date = pd.to_datetime(df_review["timestamp"]).max()
else:
    raise ValueError("❌ 未找到日期列，可能是 'date' 或 'timestamp' 命名不一致。")

print(f"🕒 最新的评论日期是：{latest_date}")

🕒 最新的评论日期是：2022-01-19 19:06:14


In [27]:
import pandas as pd
import ast
from datetime import datetime


# ==== Ensure timestamp is datetime ====
df_review["timestamp"] = pd.to_datetime(df_review["timestamp"])

# ==== Filter reviews from 2020 onwards ====
df_recent = df_review[df_review["timestamp"] >= pd.Timestamp("2020-01-01")].copy()

# ==== Safe parse of categories ====
def safe_parse(cat):
    if isinstance(cat, list):
        return cat
    try:
        return ast.literal_eval(cat)
    except:
        return []

df_meta["categories"] = df_meta["categories"].apply(safe_parse)

# ==== Filter books with 'History' in category ====
df_meta_history = df_meta[df_meta["categories"].apply(lambda cats: "History" in cats)].copy()

# ==== Merge with filtered review ====
df_merged = pd.merge(
    df_recent[["parent_asin", "rating", "timestamp"]],
    df_meta_history[["parent_asin", "title", "categories"]],
    on="parent_asin",
    how="inner"
)

# ==== Group by book title and categories, and collect review times and ratings ====
df_merged["categories_str"] = df_merged["categories"].apply(lambda x: ", ".join(x) if isinstance(x, list) else str(x))

# ==== Compute average rating per book ====
df_avg = (
    df_merged.groupby(["title", "categories_str"])
    .agg(avg_rating=("rating", "mean"), review_count=("rating", "count"))
    .reset_index()
)

# ==== Filter books with avg rating >= 4.0 ====
df_avg_filtered = df_avg[df_avg["avg_rating"] >= 4.0].copy()

# ==== Join back with review data for inspection ====
df_detailed = pd.merge(
    df_avg_filtered[["title"]],
    df_merged[["title", "timestamp", "rating", "categories"]],
    on="title",
    how="left"
)

# ==== Output ====
print("📚 History books with avg rating ≥ 4.0 since 2020, along with review timestamps and ratings:")
df_detailed



📚 History books with avg rating ≥ 4.0 since 2020, along with review timestamps and ratings:


Unnamed: 0,title,timestamp,rating,categories
0,Carthage: A Survey of Punic History and Cultur...,2021-02-07 14:44:53.244,5,"[Books, History, Africa]"
1,"Cheer Up, Ben Franklin! (Young Historians)",2023-03-06 13:38:39.404,5,"[Books, Children's Books, History]"
2,Mining Engineers and the American West: The La...,2021-04-13 16:55:28.780,4,"[Books, History, Americas]"
3,Six Gun Sound: The Early History of the Los An...,2022-03-23 14:19:17.671,5,"[Books, History, Americas]"
4,Six Gun Sound: The Early History of the Los An...,2022-12-26 21:59:27.358,5,"[Books, History, Americas]"


In [12]:
latest_time = df_review['timestamp'].max()
print("📅 最迟评论时间是:", latest_time)



📅 最迟评论时间是: 2023-06-14 16:29:39.173000
