In [44]:
# import pandas for data analysis
import pandas as pd

In [45]:
# read in two csv files containing restaurant recommendations from Xiaohongshu (my Chinese social media) and TimeOut Magazine.
xhs_df=pd.read_csv("data/eva-data-studio-project-01 - Xiaohongshu.csv")
to_df=pd.read_csv("data/eva-data-studio-project-01 - TimeOut.csv")

In [46]:
# Check through the name column TimeOut and Xiaohongshu 
# and make a new df of restaurants that overlap in these two lists
overlap_df=pd.merge(to_df, xhs_df, how="inner", on="Name", copy=False)


In [47]:
# Drop duplicate columns of coordinates and cuisine and standardize column names
overlap_df=overlap_df.drop(columns=["Coordinates_y", "Cuisine_y"])
overlap_df.rename({"Coordinates_x": "Coordinates", "Cuisine_x": "Cuisine"}, axis=1, inplace=True)

In [48]:
overlap_df

Unnamed: 0,Name,Coordinates,Cuisine
0,Hutong New York,"40.76190, -73.96758",Northern Chinese
1,Szechuan Mountain House,"40.73937, -73.98798",Sichuanese
2,Hao Noodle,"40.73382, -74.00001",Sichuanese
3,Congee Village,"40.71883, -73.99035",Cantonese


In [49]:
# A list of restaurant names mentioned in both
overlap_names=overlap_df.Name.to_list()
overlap_names

['Hutong New York', 'Szechuan Mountain House', 'Hao Noodle', 'Congee Village']

In [50]:
# deleting the xhs restaurants also mentioned in timeout
for overlap_name in overlap_names:
    xhs_df = xhs_df[xhs_df.Name!=overlap_name]
xhs_df

Unnamed: 0,Name,Coordinates,Cuisine
0,Shoo Long Kan,"40.76047, -73.83350",Sichuanese Hot pot
1,Chai,"40.76123, -73.98974",Beijingnese
3,Antidote,"40.71686, -73.96587",Sichuanese
4,Uluh,"40.72948, -73.98657",Sichuanese
5,Kong Sihk Tong,"40.71565, -73.99813",Cantonese
6,Bo Ky,"40.71591, -73.99867",Cantonese
7,Yin Ji Chang Fen,"40.71588, -73.99930",Cantonese
8,CheLi Manhattan,"40.72939, -73.98869",Zhejiangnese
9,Dun Huang,"40.81120, -73.95823",Northwestern Chinese
10,Taiwan Pork Chop House,"40.71441, -73.99803",Taiwanese


In [51]:
# deleting the timeout restaurants also mentioned in xhs
for overlap_name in overlap_names:
    to_df = to_df[to_df.Name!=overlap_name]
to_df

Unnamed: 0,Name,Coordinates,Cuisine
0,Nom Wah Tea Parlor,"40.71467, -73.99822",Cantonese
2,Grand Sichuan,"40.74749, -74.00096",Sichuanese
3,DAXI Sichuan Cuisine,"40.75950, -73.82906",Sichuanese
4,Mala Project,"40.72836, -73.98555",Sichuanese
5,RedFarm,"40.73881, -74.00606",Chinese fusion
6,Great NY Noodletown,"40.71517, -73.99711",Cantonese
7,Xi'an Famous Foods,"40.71584, -73.99716",Northwestern Chinese
8,456 New Shanghai,"40.71636, -73.99828",Shanghainese
9,Lan Sheng,"40.75168, -73.98833",Sichuanese
10,Pacific Palace,"40.63831, -74.00615",Cantonese


In [53]:
xhs_df[["Latitude", "Longitude"]] = xhs_df.Coordinates.str.split(",", expand=True)

In [55]:
to_df[["Latitude", "Longitude"]] = to_df.Coordinates.str.split(",", expand=True)

In [59]:
overlap_df[["Latitude", "Longitude"]] = overlap_df.Coordinates.str.split(",", expand=True)

In [57]:
xhs_df.to_csv("xiaohongshu_unique.csv", index=False)
to_df.to_csv("timeout_unique.csv", index=False)

In [60]:
overlap_df.to_csv("overlap.csv", index=False)

In [61]:
xhs_df["source"] = "Xiaohongshu"
to_df["source"] = "TimeOut Magazine"
overlap_df["source"] = "Recommended by Both"

In [66]:
all_df = pd.concat([xhs_df, to_df, overlap_df], ignore_index=False)

In [67]:
all_df

Unnamed: 0,Name,Coordinates,Cuisine,Latitude,Longitude,source
0,Shoo Long Kan,"40.76047, -73.83350",Sichuanese Hot pot,40.76047,-73.8335,Xiaohongshu
1,Chai,"40.76123, -73.98974",Beijingnese,40.76123,-73.98974,Xiaohongshu
3,Antidote,"40.71686, -73.96587",Sichuanese,40.71686,-73.96587,Xiaohongshu
4,Uluh,"40.72948, -73.98657",Sichuanese,40.72948,-73.98657,Xiaohongshu
5,Kong Sihk Tong,"40.71565, -73.99813",Cantonese,40.71565,-73.99813,Xiaohongshu
6,Bo Ky,"40.71591, -73.99867",Cantonese,40.71591,-73.99867,Xiaohongshu
7,Yin Ji Chang Fen,"40.71588, -73.99930",Cantonese,40.71588,-73.9993,Xiaohongshu
8,CheLi Manhattan,"40.72939, -73.98869",Zhejiangnese,40.72939,-73.98869,Xiaohongshu
9,Dun Huang,"40.81120, -73.95823",Northwestern Chinese,40.8112,-73.95823,Xiaohongshu
10,Taiwan Pork Chop House,"40.71441, -73.99803",Taiwanese,40.71441,-73.99803,Xiaohongshu


In [68]:
all_df.to_csv("all_res.csv", index=False)