In [1]:
import pandas as pd

In [2]:
tabelog_data = pd.read_csv('tabelog_data_geocoded.csv')
tripadvisor_data = pd.read_csv('trip_advisor_data_geocoded.csv')

In [3]:
# add a 'Platform' column to each dataset
# this shows whether the data belongs to Tabelog or Tripadvisor
tabelog_data['Platform'] = 'Tabelog'
tripadvisor_data['Platform'] = 'Tripadvisor'

In [4]:
# select only the relevant columns: name, lat, lon, platform
tabelog_selected = tabelog_data[['Name', 'Latitude', 'Longitude', 'Platform']]
tripadvisor_selected = tripadvisor_data[['Name', 'Latitude', 'Longitude', 'Platform']]

In [5]:
# combine the two datasets into one
combined_data = pd.concat([tabelog_selected, tripadvisor_selected], ignore_index=True)

In [6]:
# verify the combined data
print(f"Combined data contains {len(combined_data)} entries.")
print(combined_data.head())

Combined data contains 400 entries.
                            Name   Latitude   Longitude Platform
0              Shimbashi Hoshino  35.662458  139.755308  Tabelog
1  Nihonbashi Kakigaracho Sugita  35.681852  139.784998  Tabelog
2                            aca  35.686864  139.772481  Tabelog
3           Higashiazabu Amamoto  35.657349  139.743468  Tabelog
4                      Matsukawa  35.667462  139.742390  Tabelog


In [8]:
# export the combined data to a new CSV file
output_csv = 'combined_restaurant_locations.csv'
combined_data.to_csv(output_csv, index=False)

print(f"Combined data has been saved to {output_csv}.")

Combined data has been saved to combined_restaurant_locations.csv.


In [9]:
# checking for any duplicate restaurants: any common ground?
duplicate_names = combined_data[combined_data.duplicated(subset='Name', keep=False)]

# display the duplicates
if not duplicate_names.empty:
    print(f"Found {len(duplicate_names)} duplicate entries:")
    print(duplicate_names.sort_values(by='Name'))  # Sort by name for clarity
else:
    print("No duplicate restaurant names found.")

Found 4 duplicate entries:
                Name   Latitude   Longitude     Platform
19   Sushi Takamitsu  35.647404  139.695155      Tabelog
120  Sushi Takamitsu  35.656124  139.777213      Tabelog
298              NaN        NaN         NaN  Tripadvisor
312              NaN        NaN         NaN  Tripadvisor
