In [28]:
import pandas as pd
import os

# Function to convert price range to average
def convert_price_to_avg(price):
    price_map = {
        "～JPY 999": 999,
        "JPY 1,000～JPY 1,999": 1500,
        "JPY 2,000～JPY 2,999": 2500,
        "JPY 3,000～JPY 3,999": 3500,
        "JPY 4,000～JPY 4,999": 4500,
        "JPY 5,000～JPY 5,999": 5500,
        "JPY 6,000～JPY 7,999": 7000,
        "JPY 8,000～JPY 9,999": 9000,
        "JPY 10,000～JPY 14,999": 12500,
        "JPY 15,000～JPY 19,999": 17500,
        "JPY 20,000～JPY 29,999": 25000,
        "JPY 30,000～JPY 39,999": 35000,
        "JPY 40,000～JPY 49,999": 45000,
        "JPY 50,000～JPY 59,999": 55000,
        "JPY 60,000～JPY 79,999": 70000,
        "JPY 80,000～JPY 99,999": 90000,
        "JPY 100,000～": 100000
    }
    return price_map.get(price, None)

cities = {'tokyo','kyoto','osaka','hokkaido'}

for city in cities:
        df = pd.read_csv(f'{city}-top50pages.csv')

        # Apply the price function to the data
        df['Avg_Lunch'] = df['Lunch Price'].apply(convert_price_to_avg)
        df['Avg_Dinner'] = df['Dinner Price'].apply(convert_price_to_avg)

        # Adjustments to data and titles
        df.rename(columns={"Unnamed: 0": "Rank"}, inplace=True)
        df['Rank'] = df['Rank'].astype(int)
        df['Rank'] = df['Rank'] + 1

        df.loc[:, 'Link'] = df['Rank'].apply(
            lambda r: f'=HYPERLINK(CONCATENATE("https://www.google.com/maps/search/?api=1&query=", SUBSTITUTE(B{r + 1}," ","+"),"+", SUBSTITUTE(F{r + 1}," ","+")), "Link")'
        )

        df = df[['Rank', 'Name', 'Rating','Link', 'Genre','Avg_Lunch', 'Avg_Dinner', 'Address', 'Station','Reviews','URL',"Dinner Price","Lunch Price"]]

        df['Name'] = df['Name'].str.upper()
        df['Genre'] = df['Genre'].str.strip()
        df['Genre'] = df['Genre'].str.upper()
        df['Address'] = df['Address'].str.upper()
        df['Station'] = df['Station'].str.upper()


        df['Price'] = df[['Avg_Lunch', 'Avg_Dinner']].max(axis=1)
        export_csv = df.to_csv(f'{city}-top50pages-formatted.csv', index=False)


In [31]:
# List of formatted CSV files
formatted_files = [
    'tokyo-top50pages-formatted.csv',
    'kyoto-top50pages-formatted.csv',
    'osaka-top50pages-formatted.csv',
    'hokkaido-top50pages-formatted.csv'
]

# Read and concatenate all DataFrames
df_list = [pd.read_csv(file) for file in formatted_files]
merged_df = pd.concat(df_list, ignore_index=True)

# Export the merged DataFrame to a new CSV
merged_df.to_csv('all-cities-top50pages-formatted.csv', index=False)

# Compute 'PRICE' as the maximum of 'Lunch Price' and 'Dinner Price'
merged_df['Price'] = merged_df[['Avg_Lunch','Avg_Dinner']].max(axis=1)

# Define unique price bins and labels
bins = [0,1000, 2500, 5000, 10000, 20000, float('inf')]
labels = [
    '-999',
    '1k-2.5k',
    '2.5k-5k',
    '5k-10k',
    '10k-20k',
    '20k+'
]

# Create 'Price_Range' column
merged_df['Price Range'] = pd.cut(
    merged_df['Price'],
    bins=bins,
    labels=labels,
    right=False
)

# Create an export directory for price range CSVs
export_dir = 'price_categories'
os.makedirs(export_dir, exist_ok=True)

# Group by Price Range and export each group to a separate CSV
for price_range in labels:
    filtered_df = merged_df[merged_df['Price Range'] == price_range]
    if not filtered_df.empty:
        # Sanitize the price_range string for filenames if necessary
        filename = f'price_{price_range.replace("/", "_")}.csv'
        filtered_df.to_csv(os.path.join(export_dir, filename), index=False)
        print(f"Exported {len(filtered_df)} records to {filename}")




Exported 715 records to price_-999.csv
Exported 899 records to price_1k-2.5k.csv
Exported 526 records to price_2.5k-5k.csv
Exported 526 records to price_5k-10k.csv
Exported 621 records to price_10k-20k.csv
Exported 648 records to price_20k+.csv


In [27]:
merged_df

Unnamed: 0,Rank,Name,Rating,Link,Genre,Avg_Lunch,Avg_Dinner,Address,Station,Reviews,URL,Dinner Price,Lunch Price,Price,Price_Range
0,1,SUGI TA,4.66,"=HYPERLINK(CONCATENATE(""https://www.google.com...",SUSHI,45000.0,45000.0,東京都中央区日本橋蛎殻町1-33-6 ビューハイツ日本橋 B1F,SUITENGUMAE,808,https://tabelog.com/en/tokyo/A1302/A130204/130...,"JPY 40,000～JPY 49,999","JPY 40,000～JPY 49,999",45000.0,20k+
1,2,HOSHINO,4.63,"=HYPERLINK(CONCATENATE(""https://www.google.com...",JAPANESE CUISINE,,55000.0,東京都港区新橋5-31-3,ONARIMON,249,https://tabelog.com/en/tokyo/A1314/A131401/131...,"JPY 50,000～JPY 59,999",-,55000.0,20k+
2,3,AMA MOTO,4.60,"=HYPERLINK(CONCATENATE(""https://www.google.com...",SUSHI,,45000.0,東京都港区東麻布1-7-9 ザ・ソノビル 102,AKABANEBASHI,523,https://tabelog.com/en/tokyo/A1314/A131401/131...,"JPY 40,000～JPY 49,999",-,45000.0,20k+
3,4,ACA 1°,4.60,"=HYPERLINK(CONCATENATE(""https://www.google.com...",SPAIN,,70000.0,東京都中央区日本橋室町2-1-1 三井2号館,MITSUKOSHIMAE,430,https://tabelog.com/en/tokyo/A1302/A130202/132...,"JPY 60,000～JPY 79,999",-,70000.0,20k+
4,5,MATSUKAWA,4.59,"=HYPERLINK(CONCATENATE(""https://www.google.com...",JAPANESE CUISINE,70000.0,70000.0,東京都港区赤坂1-11-6 赤坂テラスハウス １階,ROPPONGI ITCHOME,472,https://tabelog.com/en/tokyo/A1307/A130701/131...,"JPY 60,000～JPY 79,999","JPY 60,000～JPY 79,999",70000.0,20k+
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,996,SAPPORO RAMEN GENTEN,3.52,"=HYPERLINK(CONCATENATE(""https://www.google.com...","RAMEN, CURRY",999.0,1500.0,北海道江別市野幌松並町26-6,NOPPORO,120,https://tabelog.com/en/hokkaido/A0107/A010703/...,"JPY 1,000～JPY 1,999",～JPY 999,1500.0,1k-2.5k
3996,997,RAMEN NO TOKIN,3.52,"=HYPERLINK(CONCATENATE(""https://www.google.com...",RAMEN,999.0,999.0,北海道札幌市北区新琴似5条7丁目6-16,SHIN KOTONI,119,https://tabelog.com/en/hokkaido/A0101/A010201/...,～JPY 999,～JPY 999,999.0,1k-2.5k
3997,998,KITANO RAMEN,3.52,"=HYPERLINK(CONCATENATE(""https://www.google.com...",RAMEN,999.0,999.0,北海道札幌市中央区北7条西18-4-21,SOEN,117,https://tabelog.com/en/hokkaido/A0101/A010102/...,～JPY 999,～JPY 999,999.0,1k-2.5k
3998,999,RAMENYA,3.52,"=HYPERLINK(CONCATENATE(""https://www.google.com...","RAMEN, TSUKEMEN",999.0,999.0,北海道恵庭市島松仲町1-12-20,SHIMAMATSU,116,https://tabelog.com/en/hokkaido/A0107/A010703/...,～JPY 999,～JPY 999,999.0,1k-2.5k
