In [98]:
import pandas as pd
from collections import Counter
import os

import warnings

warnings.simplefilter("ignore")

In [99]:
df_store = pd.read_csv("./店家名單.csv", encoding="utf-8-sig")
df_store.columns = ["shop_name", "url", "total_score", "food_type"]
df_store.head()

Unnamed: 0,shop_name,url,total_score,food_type
0,早到晚到 (彰化中正店),/restaurant/mq7f/zao-dao-wan-dao-zhang-hua-zho...,5 stars out of 5,台式
1,火車頭小吃 (彰化店),/restaurant/pui4/huo-che-tou-xiao-chi-zhang-hu...,4.8 stars out of 5,台式
2,可不可熟成紅茶 (彰化中正店),/restaurant/tdxz/ke-bu-ke-shou-cheng-hong-cha-...,4.9 stars out of 5,飲料
3,霖生炒鴨肉羹,/restaurant/g58s/lin-sheng-chao-ya-rou-geng,4.5 stars out of 5,台式
4,茶經鮮奶茶專賣店,/restaurant/nz2a/cha-jing-xian-nai-cha-zhuan-m...,4.9 stars out of 5,飲料


In [100]:
df_food = pd.read_csv("./完整餐點.csv", encoding="utf-8-sig")
df_food.head()

Unnamed: 0,pic_id,food_name,food_id,introduce,address,longitude,latitude,label,shop_id,sort,price,span
0,mq7f_96743787.jpg,(F)手感豬肉漢堡,96743787,肉品原產地：台灣豬肉,彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,65.0,2
1,mq7f_96743788.jpg,(F)法式牛奶起司鍋燒麵,96743788,,彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,188.0,4
2,mq7f_96743789.jpg,花生厚片,96743789,香濃滑順的花生醬搭配厚片，絲滑入口，等你帶我走！(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,65.0,2
3,mq7f_96743791.jpg,(F)濃厚乳酪花生法式吐司,96743791,香濃滑順的花生醬搭配吐司，絲滑入口，等你帶我走！(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,80.0,2
4,mq7f_96743792.jpg,薯泥起司蛋餅,96743792,將綿密薯泥、濃郁起司，包裹進蛋餅裡，療癒感十足。(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,75.0,2


In [101]:
def merge_csv_in_folder(folder_path):
    all_dfs = []
    col_name = ["shop_id", "shop_name", "review", "customer_score"]
    for file in os.listdir(folder_path):
        if file.endswith('.csv'):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path)
            df.columns = col_name
            all_dfs.append(df)

    # 使用 concat 進行垂直合併
    merged_df = pd.concat(all_dfs, ignore_index=True, axis=0)
    
    return merged_df

# 假設資料夾路徑是 'data/csv_files'
folder_path = './外送平台/GOOGLE評論/'
df_google_comment = merge_csv_in_folder(folder_path)

df_google_comment.head()


Unnamed: 0,shop_id,shop_name,review,customer_score
0,mq7f,早到晚到 (彰化中正店),彰化 新開 餐 飲店 火車 站 與員林客 運之間 一間 主打 晚餐 宵夜 店 彰化市 夜生活...,5 顆星
1,mq7f,早到晚到 (彰化中正店),東西 好吃 好吃 黑輪片 炸 黑 可怕,2 顆星
2,mq7f,早到晚到 (彰化中正店),其實 不用 賣 多品 項光 炒 泡 麵 稱霸 台灣 有點 缺點 炒泡 麵 建議 不要 淋巴肉...,4 顆星
3,mq7f,早到晚到 (彰化中正店),uber 上點 一下子 餐點 之後對 豬排 咖哩 飯 滿意 甜不辣 不錯 吃 法吐煉 乳覺 ...,5 顆星
4,mq7f,早到晚到 (彰化中正店),服務態度 餐點 不錯 可能 剛開幕 人手不足 等候 時間 很長,5 顆星


In [102]:
# 選擇 'shop_id' 和 'shop_name' 列，並移除重複的行
unique_shops = df_google_comment[["shop_id", "shop_name"]].drop_duplicates().reset_index(drop=True)

# unique_shops 現在包含了唯一的商店記錄
unique_shops.head()



Unnamed: 0,shop_id,shop_name
0,mq7f,早到晚到 (彰化中正店)
1,nz2a,茶經鮮奶茶專賣店
2,z9ey,一手私藏世界紅茶 (彰化和平店)
3,mp7q,春陽茶事 (彰化中正店)
4,e6ve,頂好馨臭臭鍋 (彰化店)


In [103]:
df_store = pd.merge(df_store, unique_shops, on="shop_name", how="left")

## 資料清整

In [104]:
df_food.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1090625 entries, 0 to 1090624
Data columns (total 12 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   pic_id     1090625 non-null  object 
 1   food_name  1090625 non-null  object 
 2   food_id    1090625 non-null  int64  
 3   introduce  862505 non-null   object 
 4   address    1090625 non-null  object 
 5   longitude  1090625 non-null  float64
 6   latitude   1090625 non-null  float64
 7   label      1090625 non-null  object 
 8   shop_id    1090625 non-null  object 
 9   sort       760937 non-null   object 
 10  price      1090625 non-null  float64
 11  span       1090625 non-null  int64  
dtypes: float64(3), int64(2), object(7)
memory usage: 99.8+ MB


In [105]:
df_food['introduce'] = df_food['introduce'].fillna("no comment")

In [106]:
df_store.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41389 entries, 0 to 41388
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   shop_name    41389 non-null  object
 1   url          41389 non-null  object
 2   total_score  41389 non-null  object
 3   food_type    41389 non-null  object
 4   shop_id      28433 non-null  object
dtypes: object(5)
memory usage: 1.6+ MB


In [107]:
df_store['total_score'] = df_store['total_score'].apply(lambda x: x.split()[0])

In [108]:
df_google_comment['customer_score'] = df_google_comment['customer_score'].apply(lambda x: x.split()[0])

## 特徵工程

In [109]:
df_store

Unnamed: 0,shop_name,url,total_score,food_type,shop_id
0,早到晚到 (彰化中正店),/restaurant/mq7f/zao-dao-wan-dao-zhang-hua-zho...,5,台式,mq7f
1,火車頭小吃 (彰化店),/restaurant/pui4/huo-che-tou-xiao-chi-zhang-hu...,4.8,台式,
2,可不可熟成紅茶 (彰化中正店),/restaurant/tdxz/ke-bu-ke-shou-cheng-hong-cha-...,4.9,飲料,
3,霖生炒鴨肉羹,/restaurant/g58s/lin-sheng-chao-ya-rou-geng,4.5,台式,
4,茶經鮮奶茶專賣店,/restaurant/nz2a/cha-jing-xian-nai-cha-zhuan-m...,4.9,飲料,nz2a
...,...,...,...,...,...
41384,老味道便當小火鍋,/restaurant/hzli/lao-wei-dao-bian-dang-xiao-hu...,5,台式,
41385,家文香雞排,/restaurant/t9bh/jia-wen-xiang-ji-pai,4.8,小吃,t9bh
41386,胖達咖啡&茶專賣店 (麥寮忠孝店),/restaurant/t1yr/pang-da-ka-pei-andcha-zhuan-m...,4.9,歐美,t1yr
41387,安木食堂,/restaurant/e9c9/an-mu-shi-tang,5,台式,e9c9


In [110]:
df_store['common_shop_name'] = df_store['shop_name'].apply(lambda x: x.split()[0])

# 現在 df['common_shop_name'] 包含了不考慮分店的店名
df_store


Unnamed: 0,shop_name,url,total_score,food_type,shop_id,common_shop_name
0,早到晚到 (彰化中正店),/restaurant/mq7f/zao-dao-wan-dao-zhang-hua-zho...,5,台式,mq7f,早到晚到
1,火車頭小吃 (彰化店),/restaurant/pui4/huo-che-tou-xiao-chi-zhang-hu...,4.8,台式,,火車頭小吃
2,可不可熟成紅茶 (彰化中正店),/restaurant/tdxz/ke-bu-ke-shou-cheng-hong-cha-...,4.9,飲料,,可不可熟成紅茶
3,霖生炒鴨肉羹,/restaurant/g58s/lin-sheng-chao-ya-rou-geng,4.5,台式,,霖生炒鴨肉羹
4,茶經鮮奶茶專賣店,/restaurant/nz2a/cha-jing-xian-nai-cha-zhuan-m...,4.9,飲料,nz2a,茶經鮮奶茶專賣店
...,...,...,...,...,...,...
41384,老味道便當小火鍋,/restaurant/hzli/lao-wei-dao-bian-dang-xiao-hu...,5,台式,,老味道便當小火鍋
41385,家文香雞排,/restaurant/t9bh/jia-wen-xiang-ji-pai,4.8,小吃,t9bh,家文香雞排
41386,胖達咖啡&茶專賣店 (麥寮忠孝店),/restaurant/t1yr/pang-da-ka-pei-andcha-zhuan-m...,4.9,歐美,t1yr,胖達咖啡&茶專賣店
41387,安木食堂,/restaurant/e9c9/an-mu-shi-tang,5,台式,e9c9,安木食堂


In [144]:
cities_and_counties = [
    "臺北市", "新北市", "桃園市", "臺中市", "臺南市", "高雄市",
    "新竹縣", "苗栗縣", "彰化縣", "南投縣", "雲林縣", "嘉義縣",
    "屏東縣", "宜蘭縣", "花蓮縣", "臺東縣", "澎湖縣", "金門縣",
    "連江縣", "基隆市", "新竹市", "嘉義市", "彰化市"
]

cities_and_counties_english = [
    "Taipei City", "New Taipei City", "Taoyuan City", "Taichung City", "Tainan City", "Kaohsiung City",
    "Hsinchu County", "Miaoli County", "Changhua County", "Nantou County", "Yunlin County", "Chiayi County",
    "Pingtung County", "Yilan County", "Hualien County", "Taitung County", "Penghu County", "Kinmen County",
    "Lienchiang County", "Keelung City", "Hsinchu City", "Chiayi City", "Changhua City"
]

cities_and_counties_all = cities_and_counties + cities_and_counties_english

# 對DataFrame進行操作來添加新列 'county'
df_food['county'] = df_food['address'].apply(lambda address: next((county for county in cities_and_counties_all if county in address), ''))

mapping_table = {
    'Taipei City': '臺北市',
    'New Taipei City': '新北市',
    'Taoyuan City': '桃園市',
    'Taichung City': '臺中市',
    'Tainan City': '臺南市',
    'Kaohsiung City': '高雄市',
    'Hsinchu County': '新竹縣',
    'Miaoli County': '苗栗縣',
    'Changhua County': '彰化縣',
    'Nantou County': '南投縣',
    'Yunlin County': '雲林縣',
    'Chiayi County': '嘉義縣',
    'Pingtung County': '屏東縣',
    'Yilan County': '宜蘭縣',
    'Hualien County': '花蓮縣',
    'Taitung County': '臺東縣',
    'Penghu County': '澎湖縣',
    'Kinmen County': '金門縣',
    'Lienchiang County': '連江縣',
    'Keelung City': '基隆市',
    'Hsinchu City': '新竹市',
    'Chiayi City': '嘉義市',
    'Changhua City': '彰化市'
}

# Converting the 'City' column, skipping unmapped values
df_food['county'] = df_food['county'].apply(lambda x: mapping_table[x] if x in mapping_table else x)

# 顯示更新後的DataFrame
df_food.head()


Unnamed: 0,pic_id,food_name,food_id,introduce,address,longitude,latitude,label,shop_id,sort,price,span,county
0,mq7f_96743787.jpg,(F)手感豬肉漢堡,96743787,肉品原產地：台灣豬肉,彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,65.0,2,彰化縣
1,mq7f_96743788.jpg,(F)法式牛奶起司鍋燒麵,96743788,no comment,彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,188.0,4,彰化縣
2,mq7f_96743789.jpg,花生厚片,96743789,香濃滑順的花生醬搭配厚片，絲滑入口，等你帶我走！(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,65.0,2,彰化縣
3,mq7f_96743791.jpg,(F)濃厚乳酪花生法式吐司,96743791,香濃滑順的花生醬搭配吐司，絲滑入口，等你帶我走！(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,80.0,2,彰化縣
4,mq7f_96743792.jpg,薯泥起司蛋餅,96743792,將綿密薯泥、濃郁起司，包裹進蛋餅裡，療癒感十足。(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,75.0,2,彰化縣


In [143]:
Counter(df_food['county'])

# df['City'] = df['City'].map(mapping_table)

Counter({'': 376934,
         '新北市': 138565,
         '高雄市': 128179,
         '桃園市': 115531,
         '彰化縣': 52537,
         '屏東縣': 39373,
         '新竹市': 35149,
         '宜蘭縣': 33437,
         '雲林縣': 30563,
         '嘉義市': 29156,
         '苗栗縣': 27805,
         '南投縣': 24069,
         '基隆市': 21594,
         '花蓮縣': 21344,
         '澎湖縣': 4163,
         '金門縣': 3822,
         '臺北市': 3756,
         '臺中市': 2618,
         '臺南市': 1155,
         '彰化市': 457,
         '臺東縣': 331,
         '新竹縣': 80,
         '嘉義縣': 7})

目的:
我想投資餐飲業，哪種餐廳最賺錢?

問題:
影響一間店的營收有哪些?


- 分店規模
- 客單價
- 評價
- 地點
- 餐點型態
- 品類數量

# 餐廳投資分析報告

## 目的
為了幫助投資者了解餐飲業的投資機會，本報告利用現有數據集進行深入分析，以確定哪些因素最影響餐廳的營收。

## 數據集
本報告基於以下數據集進行分析：
- `df_store`: 包含店鋪名稱、網址、總評分、餐點類型和店鋪ID。
- `df_meal`: 包括食品名稱、ID、介紹、地址、經緯度、標籤、店鋪ID、類別、價格和範圍。
- `df_google_comment`: 包含店鋪ID、店鋪名稱、顧客評論和客戶評分。

## 分析面向

### 1. 店鋪評價與營收分析
- 分析店鋪的總體評分與營業額之間的關聯性。
- 進行客戶評論的情感分析，了解其對營收的影響。

### 2. 餐點型態與客單價分析
- 探索不同餐點類型的平均價格與客單價的關係。
- 考慮菜單多樣性與店鋪績效之間的相關性。

### 3. 地理位置分析
- 利用經緯度數據分析地點對營業額的影響。

### 4. 綜合分析
- 結合市場趨勢分析，考慮不同餐廳風格的受歡迎程度。
- 分析評價與顧客流量的關聯。

### 5. 其他考慮因素
- 進行時段分析，了解不同時間的營業額變化。
- 考慮市場競爭對店鋪營收的影響。



### 衡量指標

- 平均單價: SUM(餐點價格 * 品項 ) / 總品項數量   
- 歡迎程度: 評論數量 * 平均星等 / 每一店家
- 粗估獲利能力: 平均單價 * 評論數量 * 分店數量

## 計算平均單價

In [112]:
# 計算平均單價
# 這裡假設 df_meal 中每行是一個品項

customer_price = df_food.groupby('shop_id')['price'].sum() / df_food.groupby('shop_id')['food_id'].count()


## 計算歡迎程度

In [114]:
# 計算歡迎程度
# 轉換星級評分為數字
df_google_comment['numeric_score'] = df_google_comment['customer_score'].apply(lambda x: float(x.split()[0]))
popularity = (df_google_comment.groupby('shop_id')['review'].count() * df_google_comment.groupby('shop_id')['numeric_score'].mean())



## 計算獲利能力

In [132]:
df_store_main = df_store.groupby('common_shop_name')['common_shop_name'].count()

df_store_main = pd.DataFrame(df_store_main)

df_store_main.columns = ["branch_count"]

df_store_main.reset_index(inplace=True)

df_store_main



Unnamed: 0,common_shop_name,branch_count
0,!'M,1
1,&TEA,1
2,(AN)越南美食,1
3,(古根)土雞肉飯,1
4,(和)橋山豆漿大王,1
...,...,...
35363,ＢＬＵＥ營養三明治,1
35364,Ｗeilagirl微辣女孩,1
35365,���熹哞熹,1
35366,𡘙師傅便當專賣店,1


In [133]:
df_store_branch = pd.merge(df_store, df_store_main, on="common_shop_name", how="outer")

df_store_branch


Unnamed: 0,shop_name,url,total_score,food_type,shop_id,common_shop_name,branch_count
0,早到晚到 (彰化中正店),/restaurant/mq7f/zao-dao-wan-dao-zhang-hua-zho...,5,台式,mq7f,早到晚到,5
1,早到晚到 (高雄陽明店),/restaurant/hdqe/zao-dao-wan-dao-gao-xiong-yan...,4.8,台式,hdqe,早到晚到,5
2,早到晚到 (屏東東港店),/restaurant/ljjj/zao-dao-wan-dao-ping-dong-don...,4.8,台式,ljjj,早到晚到,5
3,早到晚到 (台中文心店),/restaurant/bv33/zao-dao-wan-dao-tai-zhong-wen...,5,台式,,早到晚到,5
4,早到晚到 (台南永康店),/restaurant/bx3h/zao-dao-wan-dao-tai-nan-yong-...,4.8,台式,bx3h,早到晚到,5
...,...,...,...,...,...,...,...
41384,老味道便當小火鍋,/restaurant/hzli/lao-wei-dao-bian-dang-xiao-hu...,5,台式,,老味道便當小火鍋,1
41385,家文香雞排,/restaurant/t9bh/jia-wen-xiang-ji-pai,4.8,小吃,t9bh,家文香雞排,1
41386,胖達咖啡&茶專賣店 (麥寮忠孝店),/restaurant/t1yr/pang-da-ka-pei-andcha-zhuan-m...,4.9,歐美,t1yr,胖達咖啡&茶專賣店,1
41387,安木食堂,/restaurant/e9c9/an-mu-shi-tang,5,台式,e9c9,安木食堂,1


In [134]:
# 計算獲利能力
# 假設 df_store 中有分店數量 'branch_count'
profitability = customer_price * df_google_comment.groupby('shop_id')['review'].count() * df_store_branch.groupby('shop_id')['branch_count'].mean()

# 將計算結果整合到一個新的 DataFrame
final_df = pd.DataFrame({
    'Customer Price': customer_price,
    'Popularity': popularity,
    'Profitability': profitability
})

print(final_df)



         Customer Price  Popularity  Profitability
shop_id                                           
a008         129.562500       144.0    4146.000000
a02p         113.833333       424.0  107572.500000
a03t          51.333333       440.0    7802.666667
a06a         188.666667         NaN            NaN
a07o                NaN       170.0            NaN
...                 ...         ...            ...
zzsq          73.937500        99.0    1774.500000
zzuc          76.944444         NaN            NaN
zzvo         211.888889       890.0   41106.444444
zzz1                NaN        54.0            NaN
zzz6          51.363636         NaN            NaN

[39936 rows x 3 columns]


In [137]:
"""
有shop id 沒有shop name 代表沒有實體店，走線上
沒有 shop id 有shop name 代表沒有線上店，走線下

合併 df_平均單價, df_歡迎程度

"""

# 將計算結果整合到一個新的 DataFrame
df_metrics = pd.DataFrame({
    'Customer Price': customer_price,
    'Popularity': popularity,
    'Profitability': profitability
})

df_metrics.reset_index(inplace=True)


None


In [138]:
df_metrics

Unnamed: 0,shop_id,Customer Price,Popularity,Profitability
0,a008,129.562500,144.0,4146.000000
1,a02p,113.833333,424.0,107572.500000
2,a03t,51.333333,440.0,7802.666667
3,a06a,188.666667,,
4,a07o,,170.0,
...,...,...,...,...
39931,zzsq,73.937500,99.0,1774.500000
39932,zzuc,76.944444,,
39933,zzvo,211.888889,890.0,41106.444444
39934,zzz1,,54.0,


## 敘述統計 & EDA

In [156]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 結自目前 總共有以下資料表

# df_food > foodpanda 餐點
# df_google_comment > 店家評論
# df_store_branch > google 店家資訊
# df_metrics > 自製指標 by features engineering



In [148]:
df_food.head()

Unnamed: 0,pic_id,food_name,food_id,introduce,address,longitude,latitude,label,shop_id,sort,price,span,county
0,mq7f_96743787.jpg,(F)手感豬肉漢堡,96743787,肉品原產地：台灣豬肉,彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,65.0,2,彰化縣
1,mq7f_96743788.jpg,(F)法式牛奶起司鍋燒麵,96743788,no comment,彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,188.0,4,彰化縣
2,mq7f_96743789.jpg,花生厚片,96743789,香濃滑順的花生醬搭配厚片，絲滑入口，等你帶我走！(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,65.0,2,彰化縣
3,mq7f_96743791.jpg,(F)濃厚乳酪花生法式吐司,96743791,香濃滑順的花生醬搭配吐司，絲滑入口，等你帶我走！(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,80.0,2,彰化縣
4,mq7f_96743792.jpg,薯泥起司蛋餅,96743792,將綿密薯泥、濃郁起司，包裹進蛋餅裡，療癒感十足。(此描述僅供參考),彰化縣彰化市中正路二段148號,24.078734,120.537428,Brunch,mq7f,服務,75.0,2,彰化縣


In [149]:
df_store_branch.head()

Unnamed: 0,shop_name,url,total_score,food_type,shop_id,common_shop_name,branch_count
0,早到晚到 (彰化中正店),/restaurant/mq7f/zao-dao-wan-dao-zhang-hua-zho...,5.0,台式,mq7f,早到晚到,5
1,早到晚到 (高雄陽明店),/restaurant/hdqe/zao-dao-wan-dao-gao-xiong-yan...,4.8,台式,hdqe,早到晚到,5
2,早到晚到 (屏東東港店),/restaurant/ljjj/zao-dao-wan-dao-ping-dong-don...,4.8,台式,ljjj,早到晚到,5
3,早到晚到 (台中文心店),/restaurant/bv33/zao-dao-wan-dao-tai-zhong-wen...,5.0,台式,,早到晚到,5
4,早到晚到 (台南永康店),/restaurant/bx3h/zao-dao-wan-dao-tai-nan-yong-...,4.8,台式,bx3h,早到晚到,5


In [168]:
df_store_branch.info()


df_store_branch['total_score'] = df_store_branch['total_score'].astype(float)
df_store_branch

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41389 entries, 0 to 41388
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   shop_name         41389 non-null  object
 1   url               41389 non-null  object
 2   total_score       41389 non-null  object
 3   food_type         41389 non-null  object
 4   shop_id           28433 non-null  object
 5   common_shop_name  41389 non-null  object
 6   branch_count      41389 non-null  int64 
dtypes: int64(1), object(6)
memory usage: 2.2+ MB


ValueError: could not convert string to float: '未知'

In [169]:
Counter(df_store_branch['total_score'])

Counter({'4.8': 10271,
         '4.7': 8087,
         '4.9': 6720,
         '4.6': 4354,
         '5': 3243,
         '4.5': 2600,
         '未知': 1376,
         '4.4': 1312,
         '4.3': 807,
         '4.2': 424,
         '4': 404,
         '4.1': 386,
         '3.9': 231,
         '3.8': 145,
         '3': 122,
         '3.6': 112,
         '3.7': 112,
         '3.1': 85,
         '3.5': 74,
         '2.9': 71,
         '2.8': 56,
         '3.4': 54,
         '1': 48,
         '3.2': 44,
         '3.3': 44,
         '2': 31,
         '2.7': 29,
         '2.6': 29,
         '2.2': 27,
         '2.3': 24,
         '2.5': 20,
         '2.4': 15,
         '1.9': 11,
         '2.1': 8,
         '1.6': 6,
         '1.7': 3,
         '1.3': 2,
         '1.8': 2})

In [158]:
# Bar plot of average score by food type
sns.barplot(x='food_type', y='total_score', data=df_store_branch)
plt.title('Average Score by Food Type')
plt.xlabel('Food Type')
plt.ylabel('Average Score')
plt.xticks(rotation=90)  # Rotate x-axis labels if they overlap
plt.show()

TypeError: Neither the `x` nor `y` variable appears to be numeric.

In [154]:
df_store_branch.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41389 entries, 0 to 41388
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   shop_name         41389 non-null  object
 1   url               41389 non-null  object
 2   total_score       41389 non-null  object
 3   food_type         41389 non-null  object
 4   shop_id           28433 non-null  object
 5   common_shop_name  41389 non-null  object
 6   branch_count      41389 non-null  int64 
dtypes: int64(1), object(6)
memory usage: 2.2+ MB


In [150]:
df_google_comment.head()

Unnamed: 0,shop_id,shop_name,review,customer_score,numeric_score
0,mq7f,早到晚到 (彰化中正店),彰化 新開 餐 飲店 火車 站 與員林客 運之間 一間 主打 晚餐 宵夜 店 彰化市 夜生活...,5,5.0
1,mq7f,早到晚到 (彰化中正店),東西 好吃 好吃 黑輪片 炸 黑 可怕,2,2.0
2,mq7f,早到晚到 (彰化中正店),其實 不用 賣 多品 項光 炒 泡 麵 稱霸 台灣 有點 缺點 炒泡 麵 建議 不要 淋巴肉...,4,4.0
3,mq7f,早到晚到 (彰化中正店),uber 上點 一下子 餐點 之後對 豬排 咖哩 飯 滿意 甜不辣 不錯 吃 法吐煉 乳覺 ...,5,5.0
4,mq7f,早到晚到 (彰化中正店),服務態度 餐點 不錯 可能 剛開幕 人手不足 等候 時間 很長,5,5.0


In [151]:
df_metrics.head()

Unnamed: 0,shop_id,Customer Price,Popularity,Profitability
0,a008,129.5625,144.0,4146.0
1,a02p,113.833333,424.0,107572.5
2,a03t,51.333333,440.0,7802.666667
3,a06a,188.666667,,
4,a07o,,170.0,


In [153]:
Counter(df_store_branch["food_type"])

Counter({'台式': 14346,
         '小吃': 6064,
         '飲料': 3547,
         '日式': 3262,
         '甜點': 3123,
         '歐美': 2905,
         '早餐': 2390,
         '中式': 1191,
         '東南亞': 1091,
         '健康餐': 884,
         '素食': 821,
         '韓式': 705,
         '港式': 510,
         '咖啡': 390,
         '異國': 153,
         '未知': 6,
         '披薩': 1})