In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import openpyxl
import re
import os
import itertools
from tqdm.auto import tqdm
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
from plotly.subplots import make_subplots
import plotly.io as pio
from typing import Any, Dict, List, Union

In [3]:
data_2dep_ori = pd.read_csv(r"C:\Users\USER\OneDrive\桌面\C2L2\Amazon_dep2_all.csv", encoding="latin-1")

  data_2dep_ori = pd.read_csv(r"C:\Users\USER\OneDrive\桌面\C2L2\Amazon_dep2_all.csv", encoding="latin-1")


In [4]:
publishers = ["HarperCollins", "Little, Brown and Company", "Tor Books", "Doubleday", "Bloomsbury Publishing"]  # Disney Hyperion Books (Disney Publishing Group)、Oxford World's Classics 沒有書在榜
formats = ["Paperback", "Hardcover", "Kindle"]
features = ["category_rank", "whole_rank", "original_price", "publish_year"]

In [6]:
def formatting_data(data: pd.DataFrame) -> pd.DataFrame:
    data = data.rename(columns={"all_rank_list": "whole_rank", "before_discount": "original_price", "new_price": "selling_price", "department": "category"})

    def clean_price_column(price_series: pd.Series) -> pd.Series:
        price_series_cleaned = price_series.str.extract(r"(\$?[0-9]+(?:\.[0-9]{1,2})?)", expand=False)
        price_series_cleaned = price_series_cleaned.str.replace("$", "", regex=False).astype(float)
        price_series_cleaned = price_series_cleaned.where((price_series_cleaned >= 0) & (price_series_cleaned < 200), np.nan)
        return price_series_cleaned

    data["whole_rank"] = data["whole_rank"].apply(lambda x: int("".join(filter(str.isdigit, str(x)))) if "#" in str(x) else None).astype(float)
    data["whole_rank"] = data["whole_rank"].where(data["whole_rank"] > 0, np.nan)
    data["dep_rank"] = data["dep_rank"].apply(lambda x: int("".join(filter(str.isdigit, str(x)))) if "#" in str(x) else None).astype(float)
    data["dep_rank"] = data["dep_rank"].where(data["dep_rank"] > 0, np.nan)
    data["format"] = data["format"].apply(lambda x: next((f for f in formats if f in str(x)), "Other Format")).astype(str)
    data["original_price"] = clean_price_column(data["original_price"])
    data["selling_price"] = clean_price_column(data["selling_price"])
    data["original_price"] = data["original_price"].fillna(data["selling_price"])
    data["publisher"] = data["other_list"].apply(lambda x: next((p for p in publishers if f"Publisher:{p}".lower() in str(x).lower()), "Other Publishers")).astype(str)
    data["publish_year"] = data["other_list"].str.extract(r"\b(\d{4})\b", expand=False).astype(float)
    data["publish_year"] = data["publish_year"].where((data["publish_year"] >= 1600) & (data["publish_year"] <= 2025), np.nan)
    data["discount_rate"] = data["selling_price"] / data["original_price"]
    data["discount_rate"] = data["discount_rate"].where((data["discount_rate"] >= 0) & (data["discount_rate"] <= 1), np.nan).astype(float)

    data = data.dropna(subset=["title"])

    return data.drop_duplicates()[["whole_rank", "dep_rank", "format", "category", "title", "original_price", "selling_price", "publish_year", "discount_rate"]]


data_2dep = formatting_data(data_2dep_ori)
data_2dep.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 109480 entries, 0 to 109491
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   whole_rank      104948 non-null  float64
 1   dep_rank        109479 non-null  float64
 2   format          109480 non-null  object 
 3   category        109480 non-null  object 
 4   title           109480 non-null  object 
 5   original_price  108143 non-null  float64
 6   selling_price   108134 non-null  float64
 7   publish_year    95458 non-null   float64
 8   discount_rate   94705 non-null   float64
dtypes: float64(6), object(3)
memory usage: 8.4+ MB


In [7]:
def consolidate_books(data: pd.DataFrame) -> pd.DataFrame:
    # 初始化一個空列表，用來存放處理後的書籍數據，每個元素是一個字典，對應一本書的整合數據。
    consolidated_list: List[Dict[str, Any]] = []

    # 對原始數據按照書名(title)進行分組，每一組代表同一本書的不同條目。
    for title, group in data.groupby("title"):
        # 創建一個字典來存放當前書的整合數據。
        # key為書名，value為書名本身。
        # category欄位結合同一書名下所有不同類別，使用分號隔開。
        book_data: Dict[str, Union[str, float]] = {"title": title, "category": ";".join(group["category"].astype(str).unique())}

        # 再次對同一書名的數據按照版本(format)進行分組。
        for format_group in group.groupby("format"):
            # 格式化版本名稱，將空格替換為下劃線並轉為小寫，以便後續作為欄位名使用。
            format_type: str = format_group[0].replace(" ", "_").lower()
            # 獲取具體某一版本的所有條目。
            format_data: pd.DataFrame = format_group[1]
            # 從這些條目中選擇第一條作為代表。
            representative: pd.Series = format_data.iloc[0]

            # 遍歷需要整合的欄位名稱列表。
            for field in ["original_price", "selling_price", "publish_year", "discount_rate", "whole_rank", "dep_rank"]:
                # 為每個欄位名稱加上版本前綴，並從代表條目中提取對應的值，存放到book_data字典中。
                book_data[f"{format_type}-{field}"] = representative[field]

        # 將處理後的書籍數據加入到列表中。
        consolidated_list.append(book_data)

    # 將列表轉換為DataFrame並返回。
    data_con = pd.DataFrame(consolidated_list)
    # data_con = data_con[(data_con["paperback-whole_rank"] <= 500000) & (data_con["hardcover-whole_rank"] <= 500000)]
    return data_con


data_2dep_con = consolidate_books(data_2dep)

In [8]:
data_2dep_con["better_whole_rank"] = data_2dep_con[["paperback-whole_rank", "hardcover-whole_rank"]].min(axis=1)

In [9]:
selected_1dep_categories = pd.read_csv("amazon_selected_1dep_category.csv", encoding="latin-1")["selected_1dep_category"].to_list()
data_2dep_con["category_1dep"] = data_2dep_con["category"].str.split(" \| ", n=1).str[0]
data_2dep_test = data_2dep_con[data_2dep_con["category_1dep"].isin(selected_1dep_categories)]

In [10]:
data_2dep_sorted = data_2dep_test.sort_values(by=["category_1dep", "better_whole_rank"])
data_2dep_top100 = data_2dep_sorted.groupby("category_1dep").head(100).reset_index(drop=True)
data_2dep_top100

Unnamed: 0,title,category,paperback-original_price,paperback-selling_price,paperback-publish_year,paperback-discount_rate,paperback-whole_rank,paperback-dep_rank,kindle-original_price,kindle-selling_price,...,other_format-whole_rank,other_format-dep_rank,hardcover-original_price,hardcover-selling_price,hardcover-publish_year,hardcover-discount_rate,hardcover-whole_rank,hardcover-dep_rank,better_whole_rank,category_1dep
0,Mad Libs: For the Fans: Taylor Swift Edition,Arts & Photography | Music;Humor & Entertainme...,5.99,4.78,2024.0,0.797997,138.0,1.0,,,...,211975.0,1.0,,,,,,,138.0,Arts & Photography
1,How To Draw Everything: 300 Drawings of Cute S...,Arts & Photography | Drawing;Arts & Photograph...,10.90,10.90,2023.0,1.000000,280.0,1.0,,,...,51202.0,1.0,,,,,,,280.0,Arts & Photography
2,The Art of Home: A Designer Guide to Creating ...,Arts & Photography | Architecture;Arts & Photo...,,,,,,,21.99,21.99,...,443815.0,1.0,20.0,20.00,2023.0,1.000000,428.0,1.0,428.0,Arts & Photography
3,Piano Adventures - Lesson Book - Level 1,Arts & Photography | Music,9.99,8.99,1996.0,0.899900,464.0,6.0,9.99,6.99,...,,,,,,,,,464.0,Arts & Photography
4,"ABC Coloring Book: Color 100+ Animals, Birds, ...",Arts & Photography | Drawing;Children's Books ...,3.99,3.99,2022.0,1.000000,493.0,5.0,,,...,,,,,,,,,493.0,Arts & Photography
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2391,Rick Steves Provence & the French Riviera (Tra...,Travel | Europe,22.99,13.99,2022.0,0.608525,12937.0,83.0,16.99,16.99,...,,,,,,,,,12937.0,Travel
2392,Rick Steves Pocket Italy's Cinque Terre,"Travel | Food, Lodging & Transportation",14.99,13.22,2023.0,0.881921,12944.0,76.0,14.99,9.99,...,,,,,,,,,12944.0,Travel
2393,Frommer's Paris 2024 (Frommer's Travel Guides),Travel | Europe,22.95,20.99,2023.0,0.914597,13079.0,64.0,22.95,10.99,...,,,,,,,,,13079.0,Travel
2394,National Geographic Atlas of the National Parks,"Travel | Food, Lodging & Transportation;Travel...",14.99,14.99,2021.0,1.000000,82363.0,44.0,,,...,,,65.0,40.99,2019.0,0.630615,13159.0,44.0,13159.0,Travel


In [11]:
category_counts = data_2dep_top100["category_1dep"].value_counts().reset_index()
category_counts.columns = ["category_1dep", "count"]

# 繪製長條圖
fig = px.bar(category_counts, x="category_1dep", y="count", labels={"category_1dep": "Category 1dep", "count": "Frequency"}, title="Distribution of Category 1dep in data_2dep_top100")

# 顯示圖表
fig.show()

In [12]:
data_2dep_top100.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2396 entries, 0 to 2395
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   title                        2396 non-null   object 
 1   category                     2396 non-null   object 
 2   paperback-original_price     1911 non-null   float64
 3   paperback-selling_price      1911 non-null   float64
 4   paperback-publish_year       1870 non-null   float64
 5   paperback-discount_rate      1911 non-null   float64
 6   paperback-whole_rank         1898 non-null   float64
 7   paperback-dep_rank           1922 non-null   float64
 8   kindle-original_price        1956 non-null   float64
 9   kindle-selling_price         1956 non-null   float64
 10  kindle-publish_year          1929 non-null   float64
 11  kindle-discount_rate         1802 non-null   float64
 12  kindle-whole_rank            1923 non-null   float64
 13  kindle-dep_rank   

In [13]:
import plotly.express as px
import plotly.graph_objects as go

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["paperback-original_price", "hardcover-original_price"])

# 創建散佈圖
fig = px.scatter(
    filtered_data,
    x="paperback-original_price",
    y="hardcover-original_price",
    labels={"paperback-original_price": "Paperback Original Price", "hardcover-original_price": "Hardcover Original Price"},
    title="Original Price",
)

# 設置軸的範圍和單位長
min_val = min(filtered_data["paperback-original_price"].min(), filtered_data["hardcover-original_price"].min())
max_val = max(filtered_data["paperback-original_price"].max(), filtered_data["hardcover-original_price"].max())
buffer = (max_val - min_val) * 0.05  # 預留邊緣緩衝區
min_val -= buffer
max_val += buffer

fig.update_layout(xaxis=dict(scaleanchor="y", scaleratio=1, range=[min_val, max_val]), yaxis=dict(range=[min_val, max_val]), width=600, height=600)  # 保證圖形是正方形

# 添加45度線
fig.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(dash="dash"), showlegend=False))

fig.show()

In [15]:
import plotly.express as px
import plotly.graph_objects as go

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["paperback-original_price", "hardcover-original_price"])

# 創建一個副本來調整價格
adjusted_data = filtered_data.copy()
adjusted_data["paperback-original_price"] = adjusted_data["paperback-original_price"].apply(lambda x: min(x, 100))
adjusted_data["hardcover-original_price"] = adjusted_data["hardcover-original_price"].apply(lambda x: min(x, 100))

# 設置軸的範圍和單位長
min_val = 0
max_val = 100
buffer = 5  # 預留邊緣緩衝區
min_val -= buffer
max_val += buffer

# 創建散佈圖
fig = px.scatter(
    adjusted_data,
    x="paperback-original_price",
    y="hardcover-original_price",
    labels={"paperback-original_price": "Paperback Original Price", "hardcover-original_price": "Hardcover Original Price"},
    title="Original Price",
)

fig.update_layout(xaxis=dict(range=[min_val, max_val]), yaxis=dict(range=[min_val, max_val]), width=600, height=600, xaxis_scaleanchor="y")  # 保證圖形是正方形

# 添加45度線
fig.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(dash="dash"), showlegend=False))

fig.show()

In [18]:
import plotly.express as px
import plotly.graph_objects as go

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["paperback-original_price", "hardcover-original_price"])

# 創建一個副本來調整價格
adjusted_data = filtered_data.copy()
adjusted_data["paperback-adjusted"] = adjusted_data["paperback-original_price"].apply(lambda x: min(x, 100))
adjusted_data["hardcover-adjusted"] = adjusted_data["hardcover-original_price"].apply(lambda x: min(x, 100))

# 添加一個標誌來識別是否被調整過
adjusted_data["adjusted"] = adjusted_data.apply(lambda row: row["paperback-original_price"] > 100 or row["hardcover-original_price"] > 100, axis=1)

# 設置軸的範圍和單位長
min_val = 0
max_val = 100
buffer = 5  # 預留邊緣緩衝區
min_val -= buffer
max_val += buffer

# 創建散佈圖
fig = px.scatter(
    adjusted_data,
    x="paperback-adjusted",
    y="hardcover-adjusted",
    symbol="adjusted",
    labels={"paperback-adjusted": "Paperback Original Price", "hardcover-adjusted": "Hardcover Original Price"},
    title="Original Price",
)

fig.update_layout(xaxis=dict(range=[min_val, max_val]), yaxis=dict(range=[min_val, max_val]), width=600, height=600, xaxis_scaleanchor="y", showlegend=False)  # 保證圖形是正方形  # 不顯示圖例

# 添加45度線
fig.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(color="red", dash="dash"), showlegend=False))

fig.show()

In [66]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["paperback-original_price", "hardcover-original_price"])

# 創建一個副本來調整價格
adjusted_data = filtered_data.copy()
adjusted_data["paperback-adjusted"] = adjusted_data["paperback-original_price"].apply(lambda x: min(x, 100))
adjusted_data["hardcover-adjusted"] = adjusted_data["hardcover-original_price"].apply(lambda x: min(x, 100))

# 設置軸的範圍和單位長
min_val = 0
max_val = 100
buffer = 5  # 預留邊緣緩衝區
min_val -= buffer
max_val += buffer

# 創建第一個子圖（未調整價格）
fig1 = make_subplots(rows=1, cols=2, subplot_titles=("Original Price (Unadjusted)", "Selling Price (Unadjusted)"), x_title="Paperback Price", y_title="Hardcover Price")

# 原價-未調整
scatter1 = px.scatter(filtered_data, x="paperback-original_price", y="hardcover-original_price")
scatter1.update_traces(showlegend=False)
fig1.add_traces(scatter1.data, rows=1, cols=1)

# 售價-未調整
scatter2 = px.scatter(filtered_data, x="paperback-selling_price", y="hardcover-selling_price")
scatter2.update_traces(showlegend=False)
fig1.add_traces(scatter2.data, rows=1, cols=2)

# 更新子圖佈局，設置軸範圍和單位長
fig1.update_layout(
    height=600,
    width=1200,
    title_text="Comparison of Original and Selling Prices (Unadjusted)",
    xaxis1=dict(range=[min_val, max_val]),
    yaxis1=dict(range=[min_val, max_val]),
    xaxis2=dict(range=[min_val, max_val]),
    yaxis2=dict(range=[min_val, max_val]),
)

# 添加45度線
for i in range(1, 3):
    fig1.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(color="red", dash="dash"), showlegend=False), row=1, col=i)

# 創建第二個子圖（調整價格）
fig2 = make_subplots(rows=1, cols=2, subplot_titles=("Original Price (Adjusted)", "Selling Price (Adjusted)"), x_title="Paperback Price", y_title="Hardcover Price")

# 原價-調整
scatter3 = px.scatter(adjusted_data, x="paperback-adjusted", y="hardcover-adjusted")
scatter3.update_traces(showlegend=False)
fig2.add_traces(scatter3.data, rows=1, cols=1)

# 售價-調整
scatter4 = px.scatter(adjusted_data, x="paperback-selling_price", y="hardcover-selling_price")
scatter4.update_traces(showlegend=False)
fig2.add_traces(scatter4.data, rows=1, cols=2)

# 更新子圖佈局，設置軸範圍和單位長
fig2.update_layout(
    height=600,
    width=1200,
    title_text="Comparison of Original and Selling Prices (Adjusted)",
    xaxis1=dict(range=[min_val, max_val]),
    yaxis1=dict(range=[min_val, max_val]),
    xaxis2=dict(range=[min_val, max_val]),
    yaxis2=dict(range=[min_val, max_val]),
)

# 添加45度線
for i in range(1, 3):
    fig2.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(color="red", dash="dash"), showlegend=False), row=1, col=i)

fig1.show()
fig2.show()

In [65]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["paperback-original_price", "hardcover-original_price"])

# 創建一個副本來調整價格
adjusted_data = filtered_data.copy()
adjusted_data["paperback-adjusted"] = adjusted_data["paperback-original_price"].apply(lambda x: min(x, 100))
adjusted_data["hardcover-adjusted"] = adjusted_data["hardcover-original_price"].apply(lambda x: min(x, 100))

# 添加一個標誌來識別是否被調整過
adjusted_data["adjusted"] = adjusted_data.apply(lambda row: row["paperback-original_price"] > 100 or row["hardcover-original_price"] > 100, axis=1)

# 設置軸的範圍和單位長
min_val = 0
max_val = 100
buffer = 5  # 預留邊緣緩衝區
min_val -= buffer
max_val += buffer

# 創建第一個子圖（未調整價格）
fig1 = make_subplots(rows=1, cols=2, subplot_titles=("Original Price (Unadjusted)", "Selling Price (Unadjusted)"), x_title="Paperback Price", y_title="Hardcover Price")

# 原價-未調整
scatter1 = px.scatter(filtered_data, x="paperback-original_price", y="hardcover-original_price")
scatter1.update_traces(showlegend=False)
fig1.add_traces(scatter1.data, rows=1, cols=1)

# 售價-未調整
scatter2 = px.scatter(filtered_data, x="paperback-selling_price", y="hardcover-selling_price")
scatter2.update_traces(showlegend=False)
fig1.add_traces(scatter2.data, rows=1, cols=2)

# 更新子圖佈局，設置軸範圍和單位長
fig1.update_layout(
    height=600,
    width=1200,
    title_text="Comparison of Original and Selling Prices (Unadjusted)",
    xaxis1=dict(range=[min_val, max_val]),
    yaxis1=dict(range=[min_val, max_val]),
    xaxis2=dict(range=[min_val, max_val]),
    yaxis2=dict(range=[min_val, max_val]),
)

# 添加45度線
for i in range(1, 3):
    fig1.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(color="red", dash="dash"), showlegend=False), row=1, col=i)

# 創建第二個子圖（調整價格）
fig2 = make_subplots(rows=1, cols=2, subplot_titles=("Original Price (Adjusted)", "Selling Price (Adjusted)"), x_title="Paperback Price", y_title="Hardcover Price")

# 原價-調整
scatter3 = px.scatter(adjusted_data, x="paperback-adjusted", y="hardcover-adjusted", symbol="adjusted")
scatter3.update_traces(showlegend=False)
fig2.add_traces(scatter3.data, rows=1, cols=1)

# 售價-調整
scatter4 = px.scatter(adjusted_data, x="paperback-selling_price", y="hardcover-selling_price", symbol="adjusted")
scatter4.update_traces(showlegend=False)
fig2.add_traces(scatter4.data, rows=1, cols=2)

# 更新子圖佈局，設置軸範圍和單位長
fig2.update_layout(
    height=600,
    width=1200,
    title_text="Comparison of Original and Selling Prices (Adjusted)",
    xaxis1=dict(range=[min_val, max_val]),
    yaxis1=dict(range=[min_val, max_val]),
    xaxis2=dict(range=[min_val, max_val]),
    yaxis2=dict(range=[min_val, max_val]),
)

# 添加45度線
for i in range(1, 3):
    fig2.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(color="red", dash="dash"), showlegend=False), row=1, col=i)
fig1.write_image("figs_0723(1dep)/hardcover_paperback_price_scatter.svg")
fig1.show()
fig2.write_image("figs_0723(1dep)/hardcover_paperback_price_scatter_initial.svg")
fig2.show()

In [64]:
import plotly.express as px
import plotly.graph_objects as go

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["paperback-discount_rate", "hardcover-discount_rate"])

# 設置軸的範圍和單位長
min_val = 0
max_val = max(filtered_data["paperback-discount_rate"].max(), filtered_data["hardcover-discount_rate"].max())
buffer = (max_val - min_val) * 0.05  # 預留邊緣緩衝區
max_val += buffer

# 創建散佈圖
fig = px.scatter(
    filtered_data,
    x="paperback-discount_rate",
    y="hardcover-discount_rate",
    labels={"paperback-discount_rate": "Paperback Discount Rate", "hardcover-discount_rate": "Hardcover Discount Rate"},
    title="Discount Rates for Paperback and Hardcover",
)

fig.update_layout(xaxis=dict(scaleanchor="y", scaleratio=1, range=[min_val, max_val]), yaxis=dict(range=[min_val, max_val]), width=600, height=600)  # 保證圖形是正方形

# 添加45度線
fig.add_trace(go.Scatter(x=[min_val, max_val], y=[min_val, max_val], mode="lines", line=dict(color="red", dash="dash"), showlegend=False))
fig.write_image("figs_0723(1dep)/discount_rate_scatter.svg")
fig.show()

In [11]:
data_2dep_top100["hardcover_over_paperback-selling_price"] = data_2dep_top100["hardcover-selling_price"] / data_2dep_top100["paperback-selling_price"]
data_2dep_top100["hardcover_over_paperback-original_price"] = data_2dep_top100["hardcover-original_price"] / data_2dep_top100["paperback-original_price"]

In [40]:
percentiles = [0.2, 0.4, 0.6, 0.8]
quantile_values = filtered_data["hardcover_over_paperback-selling_price"].quantile(percentiles)
print(quantile_values)

0.2    0.973589
0.4    1.333623
0.6    1.656457
0.8    2.230226
Name: hardcover_over_paperback-selling_price, dtype: float64


In [63]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["hardcover_over_paperback-selling_price", "hardcover_over_paperback-original_price"])

# 設置軸的範圍和單位長
min_val = min(filtered_data["hardcover_over_paperback-selling_price"].min(), filtered_data["hardcover_over_paperback-original_price"].min())
max_val = max(filtered_data["hardcover_over_paperback-selling_price"].max(), filtered_data["hardcover_over_paperback-original_price"].max())
buffer = (max_val - min_val) * 0.10  # 預留邊緣緩衝區
min_val -= buffer
max_val += buffer

# 創建子圖
fig = make_subplots(rows=1, cols=2, subplot_titles=("Hardcover Over Paperback Selling Price", "Hardcover Over Paperback Original Price"), x_title="Ratio", y_title="Count")

# 售價直方圖
hist1 = px.histogram(filtered_data, x="hardcover_over_paperback-selling_price", nbins=30, text_auto=True)
hist1.update_traces(showlegend=False)
fig.add_traces(hist1.data, rows=1, cols=1)

# 原價直方圖
hist2 = px.histogram(filtered_data, x="hardcover_over_paperback-original_price", nbins=30, text_auto=True)
hist2.update_traces(showlegend=False)
fig.add_traces(hist2.data, rows=1, cols=2)

# 更新子圖佈局，設置軸範圍和單位長
fig.update_layout(height=600, width=1200, title_text="Histograms of Hardcover Over Paperback Prices")

fig.update_xaxes(range=[min_val, max_val], row=1, col=1)
fig.update_xaxes(range=[min_val, max_val], row=1, col=2)
fig.update_yaxes(range=[0, 675], row=1, col=1)
fig.update_yaxes(range=[0, 675], row=1, col=2)
fig.write_image("figs_0723(1dep)/hardcover_over_paperback_histogram_initial.svg")
fig.show()

In [62]:
# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["hardcover_over_paperback-selling_price", "hardcover_over_paperback-original_price"])

# 定義區間範圍
bins = [0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2, 2.5, 3, 3.5, 4, float("inf")]

# 根據區間範圍計算頻率
filtered_data["selling_price_binned"] = pd.cut(filtered_data["hardcover_over_paperback-selling_price"], bins=bins)
filtered_data["original_price_binned"] = pd.cut(filtered_data["hardcover_over_paperback-original_price"], bins=bins)

selling_price_counts = filtered_data["selling_price_binned"].value_counts().sort_index()
original_price_counts = filtered_data["original_price_binned"].value_counts().sort_index()

# 設置區間標籤
labels = ["0-0.25", "0.25-0.5", "0.5-0.75", "0.75-1.0", "1.0-1.25", "1.25-1.5", "1.5-1.75", "1.75-2.0", "2.0-2.5", "2.5-3.0", "3.0-3.5", "3.5-4.0", ">4"]

# 創建子圖
fig = make_subplots(rows=1, cols=2, subplot_titles=("Hardcover Over Paperback Selling Price", "Hardcover Over Paperback Original Price"), x_title="Ratio", y_title="Count")

# 售價長條圖
fig.add_trace(go.Bar(x=labels, y=selling_price_counts.values, text=selling_price_counts.values, textposition="auto", name="Selling Price"), row=1, col=1)

# 原價長條圖
fig.add_trace(
    go.Bar(x=labels, y=original_price_counts.values, text=original_price_counts.values, textposition="auto", name="Original Price", marker_color="#636EFA"), row=1, col=2  # 設置原價長條圖的顏色為藍色
)

# 設置相同的刻度範圍
max_y = max(selling_price_counts.max(), original_price_counts.max())
buffer = max_y * 0.1  # 預留邊緣緩衝區

fig.update_yaxes(range=[0, max_y + buffer], row=1, col=1)
fig.update_yaxes(range=[0, max_y + buffer], row=1, col=2)

# 更新子圖佈局
fig.update_layout(height=600, width=1200, title_text="Bar Charts of Hardcover Over Paperback Prices", showlegend=False)
fig.write_image("figs_0723(1dep)/hardcover_over_paperback_histogram.svg")
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [12]:
# 篩選掉缺少必要數據的行
filtered_data = data_2dep_top100.dropna(subset=["hardcover_over_paperback-selling_price", "hardcover_over_paperback-original_price"])

# 定義區間範圍
bins = [0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2, 2.5, 3, 3.5, 4, float("inf")]

# 根據區間範圍計算頻率
filtered_data["selling_price_binned"] = pd.cut(filtered_data["hardcover_over_paperback-selling_price"], bins=bins)
filtered_data["original_price_binned"] = pd.cut(filtered_data["hardcover_over_paperback-original_price"], bins=bins)

selling_price_counts = filtered_data["selling_price_binned"].value_counts().sort_index()
original_price_counts = filtered_data["original_price_binned"].value_counts().sort_index()

# 設置區間標籤
labels = ["0-0.25", "0.25-0.5", "0.5-0.75", "0.75-1.0", "1.0-1.25", "1.25-1.5", "1.5-1.75", "1.75-2.0", "2.0-2.5", "2.5-3.0", "3.0-3.5", "3.5-4.0", ">4"]
# 創建子圖
fig = make_subplots(rows=1, cols=2, subplot_titles=("精裝售價 / 平裝售價", "精裝定價 / 平裝定價"), x_title="", y_title="", column_widths=[0.5, 0.5])

# 售價長條圖
fig.add_trace(go.Bar(x=labels, y=selling_price_counts.values, name="Selling Price", marker_color="#1f77b4"), row=1, col=1)

# 原價長條圖
fig.add_trace(go.Bar(x=labels, y=original_price_counts.values, name="Original Price", marker_color="#1f77b4"), row=1, col=2)  # 設置原價長條圖的顏色為藍色

# 設置相同的刻度範圍
max_y = max(selling_price_counts.max(), original_price_counts.max())
buffer = max_y * 0.1  # 預留邊緣緩衝區

fig.update_yaxes(title_font=dict(size=20), tickfont=dict(size=20), range=[0, max_y + buffer], row=1, col=1)
fig.update_yaxes(title_font=dict(size=20), tickfont=dict(size=20), range=[0, max_y + buffer], row=1, col=2)
fig.update_xaxes(tickfont=dict(size=16), row=1, col=1, title_standoff=100)  # 左子圖
fig.update_xaxes(tickfont=dict(size=16), row=1, col=2, title_standoff=100)  # 右子圖
fig.update_annotations(font=dict(size=20))

# 更新子圖佈局
fig.update_layout(
    height=700,
    width=1400,
    title_text="Bar Charts of Hardcover Over Paperback Prices",
    showlegend=False,
    # plot_bgcolor="rgba(200, 200, 200, 0.5)",  # 設置灰色背景
)

fig.update_layout(font=dict(family=r"C:\Users\USER\OneDrive\桌面\Fonts\TaipeiSansTCBeta-Regular.ttf", color="Black"))
fig.update_layout(margin=dict(l=150, r=150, t=100, b=150))

fig.write_image("figs_0723(1dep)/hardcover_over_paperback_histogram.svg")
fig.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data["selling_price_binned"] = pd.cut(filtered_data["hardcover_over_paperback-selling_price"], bins=bins)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data["original_price_binned"] = pd.cut(filtered_data["hardcover_over_paperback-original_price"], bins=bins)


In [13]:
import pandas as pd

# 創建 DataFrame 儲存售價區間頻率
selling_price_data = pd.DataFrame({"Selling Price Binned": selling_price_counts.index, "Selling Price Count": selling_price_counts.values})

# 創建 DataFrame 儲存原價區間頻率
original_price_data = pd.DataFrame({"Original Price Binned": original_price_counts.index, "Original Price Count": original_price_counts.values})

# 匯出為 Excel，分別放在不同的工作表中
with pd.ExcelWriter("1125/hardcover_over_paperback_price_counts.xlsx") as writer:
    selling_price_data.to_excel(writer, sheet_name="Selling Price Data", index=False)
    original_price_data.to_excel(writer, sheet_name="Original Price Data", index=False)

In [59]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


def plot_rank_price_ratio(data_con: pd.DataFrame) -> None:
    # 提取相關欄位
    paperback_rank = data_con["paperback-whole_rank"]
    hardcover_rank = data_con["hardcover-whole_rank"]
    paperback_price = data_con["paperback-selling_price"]
    hardcover_price = data_con["hardcover-selling_price"]
    title = data_con["title"]

    # 計算價格比例
    price_ratio = hardcover_price / paperback_price

    # 剔除缺失值
    valid = ~(paperback_rank.isna() | hardcover_rank.isna() | price_ratio.isna())
    paperback_rank = paperback_rank[valid]
    hardcover_rank = hardcover_rank[valid]
    price_ratio = price_ratio[valid]
    title = title[valid]

    # 計算百分位數
    quantiles = price_ratio.quantile([0, 0.2, 0.4, 0.6, 0.8, 1.0])

    # 分類價格比例
    def get_price_category(ratio):
        if ratio <= quantiles[0.2]:
            return f"0.00~{quantiles[0.2]:.2f}"
        elif quantiles[0.2] < ratio <= quantiles[0.4]:
            return f"{quantiles[0.2]:.2f}~{quantiles[0.4]:.2f}"
        elif quantiles[0.4] < ratio <= quantiles[0.6]:
            return f"{quantiles[0.4]:.2f}~{quantiles[0.6]:.2f}"
        elif quantiles[0.6] < ratio <= quantiles[0.8]:
            return f"{quantiles[0.6]:.2f}~{quantiles[0.8]:.2f}"
        else:
            return f">{quantiles[0.8]:.2f}"

    price_category = price_ratio.apply(get_price_category)

    # 自定義色彩，按範圍大小排序
    color_map = {
        f"0.00~{quantiles[0.2]:.2f}": "purple",
        f"{quantiles[0.2]:.2f}~{quantiles[0.4]:.2f}": "blue",
        f"{quantiles[0.4]:.2f}~{quantiles[0.6]:.2f}": "green",
        f"{quantiles[0.6]:.2f}~{quantiles[0.8]:.2f}": "orange",
        f">{quantiles[0.8]:.2f}": "red",
    }

    # 圖表範圍列表
    ranges = [(0, 4_000_000), (0, 1_000_000), (0, 100_000), (0, 10_000)]

    for min_val, max_val in ranges:
        buffer = max_val * 0.05  # 加5%的buffer
        actual_max_val = max_val + buffer

        # 建立子圖
        fig = make_subplots(rows=1, cols=1)

        # 添加主要散點圖
        for category in sorted(color_map.keys()):
            mask = price_category == category
            fig.add_trace(
                go.Scatter(
                    x=paperback_rank[mask],
                    y=hardcover_rank[mask],
                    mode="markers",
                    marker=dict(color=color_map[category]),
                    name=category,
                    hovertemplate="Paperback Rank: %{x}<br>Hardcover Rank: %{y}<br>Price Ratio: %{customdata[0]:.2f}<br>Title: %{customdata[1]}<extra></extra>",
                    customdata=list(zip(price_ratio[mask], title[mask])),
                )
            )

        # 增加 x=y 的直線
        fig.add_trace(go.Scatter(x=[min_val, actual_max_val], y=[min_val, actual_max_val], mode="lines", line=go.scatter.Line(color="black", dash="dash"), showlegend=False))

        # 標示超出範圍的點
        out_of_range = (paperback_rank > max_val) | (hardcover_rank > max_val)
        fig.add_trace(
            go.Scatter(
                x=paperback_rank[out_of_range].clip(upper=max_val),
                y=hardcover_rank[out_of_range].clip(upper=max_val),
                mode="markers",
                marker=dict(symbol="x", color=price_category[out_of_range].map(color_map)),
                showlegend=False,
                hovertemplate="Paperback Rank: %{x}<br>Hardcover Rank: %{y}<br>Price Ratio: %{customdata[0]:.2f}<br>Title: %{customdata[1]}<extra></extra>",
                customdata=list(zip(price_ratio[out_of_range], title[out_of_range])),
            )
        )

        # 更新圖表布局
        fig.update_layout(
            height=800,
            width=800,
            title=f"Book Rank Comparison (Rank Range: {min_val} to {max_val})",
            xaxis=dict(title="Paperback Whole Rank", range=[-buffer, actual_max_val], scaleanchor="y", scaleratio=1),
            yaxis=dict(title="Hardcover Whole Rank", range=[-buffer, actual_max_val], scaleanchor="x", scaleratio=1),
            legend_title="Price Ratio Category",
            legend=dict(traceorder="normal"),
        )

        # 顯示圖表
        fig.write_image(f"rank_price_ratio_{min_val}_to_{max_val}.svg")
        fig.show()


# 假設 data_2dep_top100 是你的數據
plot_rank_price_ratio(data_2dep_top100)

11/21 更新

In [17]:
test_df = data_2dep_top100["paperback-whole_rank"]
# data_2dep_top100[data_2dep_top100["paperback-whole_rank"] <= 100000]["paperback-whole_rank"]
test_df = test_df[test_df <= 100000]

In [36]:
data_2dep_top100

Unnamed: 0,title,category,paperback-original_price,paperback-selling_price,paperback-publish_year,paperback-discount_rate,paperback-whole_rank,paperback-dep_rank,kindle-original_price,kindle-selling_price,...,other_format-whole_rank,other_format-dep_rank,hardcover-original_price,hardcover-selling_price,hardcover-publish_year,hardcover-discount_rate,hardcover-whole_rank,hardcover-dep_rank,better_whole_rank,category_1dep
0,Mad Libs: For the Fans: Taylor Swift Edition,Arts & Photography | Music;Humor & Entertainme...,5.99,4.78,2024.0,0.797997,138.0,1.0,,,...,211975.0,1.0,,,,,,,138.0,Arts & Photography
1,How To Draw Everything: 300 Drawings of Cute S...,Arts & Photography | Drawing;Arts & Photograph...,10.90,10.90,2023.0,1.000000,280.0,1.0,,,...,51202.0,1.0,,,,,,,280.0,Arts & Photography
2,The Art of Home: A Designer Guide to Creating ...,Arts & Photography | Architecture;Arts & Photo...,,,,,,,21.99,21.99,...,443815.0,1.0,20.0,20.00,2023.0,1.000000,428.0,1.0,428.0,Arts & Photography
3,Piano Adventures - Lesson Book - Level 1,Arts & Photography | Music,9.99,8.99,1996.0,0.899900,464.0,6.0,9.99,6.99,...,,,,,,,,,464.0,Arts & Photography
4,"ABC Coloring Book: Color 100+ Animals, Birds, ...",Arts & Photography | Drawing;Children's Books ...,3.99,3.99,2022.0,1.000000,493.0,5.0,,,...,,,,,,,,,493.0,Arts & Photography
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2391,Rick Steves Provence & the French Riviera (Tra...,Travel | Europe,22.99,13.99,2022.0,0.608525,12937.0,83.0,16.99,16.99,...,,,,,,,,,12937.0,Travel
2392,Rick Steves Pocket Italy's Cinque Terre,"Travel | Food, Lodging & Transportation",14.99,13.22,2023.0,0.881921,12944.0,76.0,14.99,9.99,...,,,,,,,,,12944.0,Travel
2393,Frommer's Paris 2024 (Frommer's Travel Guides),Travel | Europe,22.95,20.99,2023.0,0.914597,13079.0,64.0,22.95,10.99,...,,,,,,,,,13079.0,Travel
2394,National Geographic Atlas of the National Parks,"Travel | Food, Lodging & Transportation;Travel...",14.99,14.99,2021.0,1.000000,82363.0,44.0,,,...,,,65.0,40.99,2019.0,0.630615,13159.0,44.0,13159.0,Travel


In [15]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


def plot_rank_price_ratio(data_con: pd.DataFrame) -> None:
    # 提取相關欄位
    data_con = data_con[data_con["paperback-whole_rank"] <= 100_000]
    data_con = data_con[data_con["hardcover-whole_rank"] <= 100_000]
    paperback_rank = data_con["paperback-whole_rank"]
    hardcover_rank = data_con["hardcover-whole_rank"]
    paperback_price = data_con["paperback-selling_price"]
    hardcover_price = data_con["hardcover-selling_price"]
    title = data_con["title"]

    # 計算價格比例
    price_ratio = hardcover_price / paperback_price

    # 剔除缺失值
    valid = ~(paperback_rank.isna() | hardcover_rank.isna() | price_ratio.isna())
    paperback_rank = paperback_rank[valid]
    hardcover_rank = hardcover_rank[valid]
    price_ratio = price_ratio[valid]
    title = title[valid]

    # 分類價格比例
    def get_price_category(ratio):
        if ratio <= 1.0:
            return f"0.0~{1.0:.1f}"
        elif 1.0 < ratio <= 1.3:
            return f"{1.0:.1f}~{1.3:.1f}"
        elif 1.3 < ratio <= 1.7:
            return f"{1.3:.1f}~{1.7:.1f}"
        elif 1.7 < ratio <= 2.2:
            return f"{1.3:.1f}~{2.2:.1f}"
        else:
            return f">{2.2:.1f}"

    price_category = price_ratio.apply(get_price_category)

    # 自定義色彩，按範圍大小排序
    color_map = {
        f"0.0~{1.0:.1f}": "purple",
        f"{1.0:.1f}~{1.3:.1f}": "blue",
        f"{1.3:.1f}~{1.7:.1f}": "green",
        f"{1.3:.1f}~{2.2:.1f}": "orange",
        f">{2.2:.1f}": "red",
    }

    # 圖表範圍列表
    ranges = [(0, 100_000)]

    for min_val, max_val in ranges:
        buffer = max_val * 0.05
        actual_max_val = max_val + buffer

        # 建立子圖
        fig = make_subplots(rows=1, cols=1)

        # 添加主要散點圖
        for category in sorted(color_map.keys()):
            mask = price_category == category
            fig.add_trace(
                go.Scatter(
                    x=paperback_rank[mask],
                    y=hardcover_rank[mask],
                    mode="markers",
                    marker=dict(color=color_map[category]),
                    name=category,
                    hovertemplate="Paperback Rank: %{x}<br>Hardcover Rank: %{y}<br>Price Ratio: %{customdata[0]:.2f}<br>Title: %{customdata[1]}<extra></extra>",
                    customdata=list(zip(price_ratio[mask], title[mask])),
                )
            )

        # 增加 x=y 的直線
        fig.add_trace(go.Scatter(x=[min_val, actual_max_val], y=[min_val, actual_max_val], mode="lines", line=go.scatter.Line(color="black", dash="dash"), showlegend=False))

        # 更新圖表布局
        fig.update_layout(
            height=700,
            width=830,
            title=f"精裝與平裝總排名散佈圖",
            xaxis=dict(title="平裝總排名", range=[-buffer, actual_max_val], scaleanchor="y", scaleratio=1),
            yaxis=dict(title="精裝總排名", range=[-buffer, actual_max_val], scaleanchor="x", scaleratio=1),
            legend_title="精裝相對平裝價格倍數",
            legend=dict(traceorder="normal"),
        )
        fig.update_layout(font=dict(family=r"C:\Users\USER\OneDrive\桌面\Fonts\TaipeiSansTCBeta-Regular.ttf", size=16, color="Black"))
        fig.update_layout(margin=dict(l=100, r=150, t=100, b=100))
        # 顯示圖表
        fig.write_image(f"精裝與平裝總排名散佈圖 - 以精裝相對平裝價格倍數上色.jpg")
        fig.show()
        # 創建匯出的 DataFrame
        colors = price_category.map(color_map)
        export_data = pd.DataFrame({"Paperback Rank": paperback_rank, "Hardcover Rank": hardcover_rank, "Price Ratio": price_ratio, "Color": colors})

        # 匯出為 Excel
        export_data.to_excel("1125/hardcover_paperback_rank_price_ratio.xlsx", index=False)


# 假設 data_2dep_top100 是你的數據
plot_rank_price_ratio(data_2dep_top100)

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


def plot_rank_price_ratio(data_con: pd.DataFrame) -> None:
    # 提取相關欄位
    data_con = data_con[data_con["paperback-whole_rank"] <= 100_000]
    data_con = data_con[data_con["kindle-whole_rank"] <= 100_000]
    paperback_rank = data_con["paperback-whole_rank"]
    kindle_rank = data_con["kindle-whole_rank"]
    paperback_price = data_con["paperback-selling_price"]
    kindle_price = data_con["kindle-selling_price"]
    title = data_con["title"]

    # 計算價格比例
    price_ratio = kindle_price / paperback_price

    # 剔除缺失值
    valid = ~(paperback_rank.isna() | kindle_rank.isna() | price_ratio.isna())
    paperback_rank = paperback_rank[valid]
    kindle_rank = kindle_rank[valid]
    price_ratio = price_ratio[valid]
    title = title[valid]

    # 分類價格比例
    def get_price_category(ratio):
        if ratio <= 0.3:
            return f"0.0~0.3"
        elif 0.3 < ratio <= 0.5:
            return f"0.3~0.5"
        elif 0.5 < ratio <= 0.7:
            return f"0.5~0.7"
        elif 0.7 < ratio <= 1.0:
            return f"0.7~1.0"
        else:
            return f">1.0"

    price_category = price_ratio.apply(get_price_category)

    # 自定義色彩，按範圍大小排序
    color_map = {
        f"0.0~0.3": "purple",
        f"0.3~0.5": "blue",
        f"0.5~0.7": "green",
        f"0.7~1.0": "orange",
        f">1.0": "red",
    }

    # 圖表範圍列表
    ranges = [(0, 100_000)]

    for min_val, max_val in ranges:
        buffer = max_val * 0.05
        actual_max_val = max_val + buffer

        # 建立子圖
        fig = make_subplots(rows=1, cols=1)

        # 添加主要散點圖
        for category in sorted(color_map.keys()):
            mask = price_category == category
            fig.add_trace(
                go.Scatter(
                    x=paperback_rank[mask],
                    y=kindle_rank[mask],
                    mode="markers",
                    marker=dict(color=color_map[category]),
                    name=category,
                    hovertemplate="Paperback Rank: %{x}<br>Hardcover Rank: %{y}<br>Price Ratio: %{customdata[0]:.2f}<br>Title: %{customdata[1]}<extra></extra>",
                    customdata=list(zip(price_ratio[mask], title[mask])),
                )
            )

        # 增加 x=y 的直線
        fig.add_trace(go.Scatter(x=[min_val, actual_max_val], y=[min_val, actual_max_val], mode="lines", line=go.scatter.Line(color="black", dash="dash"), showlegend=False))

        # 更新圖表布局
        fig.update_layout(
            height=700,
            width=830,
            title=f"電子與平裝總排名散佈圖",
            xaxis=dict(title="平裝總排名", range=[-buffer, actual_max_val], scaleanchor="y", scaleratio=1),
            yaxis=dict(title="電子總排名", range=[-buffer, actual_max_val], scaleanchor="x", scaleratio=1),
            legend_title="電子相對平裝價格倍數",
            legend=dict(traceorder="normal"),
        )
        fig.update_layout(font=dict(family=r"C:\Users\USER\OneDrive\桌面\Fonts\TaipeiSansTCBeta-Regular.ttf", size=16, color="Black"))
        fig.update_layout(margin=dict(l=100, r=150, t=100, b=100))
        # 顯示圖表
        fig.write_image(f"精裝與平裝總排名散佈圖 - 以精裝相對平裝價格倍數上色.jpg")
        fig.show()




# 假設 data_2dep_top100 是你的數據
plot_rank_price_ratio(data_2dep_top100)