In [None]:
import os
import re

import pandas as pd
import numpy as np
from tkinter import StringVar
from tkinter import Tk, Label, Button, filedialog, Listbox, Text, Scrollbar, END, MULTIPLE
from tkinter.messagebox import showinfo
from collections import Counter, defaultdict

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 10000)

menu_font_size = 20
submenu_font_size = 12
last_sorting_method = None

def calc_revenue(df, month=None, store_name=None):
    df.columns = [col.replace('\n', '') for col in df.columns]
    condition = df['狀態'].apply(lambda x: x.split('\n')[0] not in ['取消訂單', '逾期未取件']) 
    df = df.loc[condition]
    
    if month:
        condition = df['訂購日期'].apply(lambda x: x.split('/')[1] in month)
        df = df.loc[condition]

    if store_name:
        condition = df['賣場名稱'].apply(lambda x: x in store_name)
        df = df.loc[condition]

    revenue = df['小計(A)'].apply(lambda x: x.replace(',', '')).astype(int).sum()
    return revenue


def read_data(df_paths, skiprows=None):
    df_list = [pd.read_excel(df_path, skiprows=skiprows) for df_path in df_paths]
    return pd.concat(df_list, axis=0)


def select_files():
    file_paths = filedialog.askopenfilenames(title="選擇檔案", filetypes=[("Excel files", "*.xlsx *.xls")])
    if file_paths:
        file_listbox.delete(0, END)
        selected_files.clear()
        selected_files.extend(file_paths)

        for file in file_paths:
            file_listbox.insert(END, os.path.basename(file))


def calculate_revenue():
    if not selected_files:
        showinfo("提示", "未選擇任何檔案")
        return
    
    selected_months = [month_listbox.get(i) for i in month_listbox.curselection()]
    month = [str(month).zfill(2) for month in selected_months]

    store_name = None
    df = read_data(selected_files, skiprows=2)
    revenue = calc_revenue(df=df, month=month, store_name=store_name)
    showinfo("營收結果", f"總營收: {revenue}")


def load_stroke_data(filepath):
    # Load CSV and create a dictionary of character to stroke count
    df = pd.read_csv(filepath, skiprows=4)
    stroke_dict = dict(zip(df['Character'], df['Strokes']))
    return stroke_dict

def stroke_sort(input_list, stroke_dict):
    def char_key(char):
        # If the character is in stroke dictionary, return its stroke count and char for further sorting
        if char in stroke_dict:
            return (stroke_dict[char], char)
        # Non-Chinese characters are placed with a default stroke count of 0 for initial sorting
        else:
            return (0, char)

    def string_key(s):
        # Convert each string into a tuple of sorting keys for each character
        return [char_key(char) for char in s]

    # Sort the list based on generated keys
    sorted_list = sorted(input_list, key=string_key)
    return sorted_list


def calculate_buyer_data(df, sort_by=None):
    df_filtered = df.loc[df.iloc[:, 0].map(lambda x: x != '下架日/開售日' and x != '團名')]
    df_filtered.iloc[:, 4:] = df_filtered.iloc[:, 4:].replace({'\u3000': ' '}, regex=True)
    prices = df_filtered.iloc[:, 3]
    buyers = df_filtered.iloc[:, 4:]
    buyer_data = defaultdict(lambda: {'count': 0, 'price': 0})

    for i, row in buyers.iterrows():
        price = int(prices[i])
        for buyer in row.dropna():
            buyer_data[buyer]['price'] += price
            buyer_data[buyer]['count'] += 1
    df_val = [[f"{key} ({val['count']})", 1, val['price']] for key, val in buyer_data.items()]
    df_buyer = pd.DataFrame(df_val, columns=['＊規格', '＊數量', '＊價格'])

    # Sort by selected option
    if sort_by == "筆畫":
        stroke_dict = load_stroke_data('kangxi-strokecount/kangxi-strokecount.csv')
        sorted_specifications = stroke_sort(df_buyer['＊規格'].tolist(), stroke_dict)

        # Reindex the DataFrame based on the sorted order
        df_buyer = df_buyer.set_index('＊規格').loc[sorted_specifications].reset_index()
    elif sort_by == "金額":
        df_buyer = df_buyer.sort_values(by="＊價格", ascending=False)
    elif sort_by == "不排序":
        pass
    return df_buyer


def select_files():
    file_paths = filedialog.askopenfilenames(title="選擇檔案", filetypes=[("Excel files", "*.xlsx *.xls")])
    if file_paths:
        file_listbox.delete(0, END)
        selected_files.clear()
        selected_files.extend(file_paths)

        for file in file_paths:
            file_listbox.insert(END, os.path.basename(file))


def display_buyer_data(sort_by=None):
    if not selected_files:
        showinfo("提示", "未選擇任何檔案")
        return
    print(selected_files)
    df = read_data(selected_files)
    df_buyer = calculate_buyer_data(df, sort_by=sort_by)
    
    output_text.delete("1.0", END)
    output_text.insert(END, df_buyer.to_string(index=False))

# def save_batch_upload_file():
#     if not selected_files:
#         showinfo("提示", "未選擇任何檔案")
#         return

#     df = read_data(selected_files)
#     df_buyer = calculate_buyer_data(df)
    
#     save_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx *.xls")])
#     if save_path:
#         df_buyer.to_excel(save_path, index=False)
#         showinfo("成功", f"批次上架檔已儲存至 {save_path}")


def save_batch_upload_file():
    global last_sorting_method

    if last_sorting_method is None:
        showinfo("錯誤", "請先選擇排序方式！")
        return
    
    df = read_data(selected_files)
    df_buyer = calculate_buyer_data(df, sort_by=last_sorting_method)
    # Proceed with file generation using the last selected sorting method
    # Pass `last_sorting_method` to the generation function
    save_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx *.xls")])
    if save_path:
        df_buyer.to_excel(save_path, index=False)
        showinfo("成功", f"批次上架檔已儲存至 {save_path}")


def set_sorting_method(method):
    global last_sorting_method
    last_sorting_method = method
    display_buyer_data(sort_by=method)


def adjust_window_size():
    root.update_idletasks()  # 更新視窗的子元件
    width = root.winfo_reqwidth()   # 獲取視窗需求的寬度
    height = root.winfo_reqheight() # 獲取視窗需求的高度
    root.geometry(f"{width}x{height}")  # 根據內容設置視窗大小


def revenue_menu():
    for widget in root.winfo_children():
        widget.destroy()
    
    Label(root, text="請選擇月份:", font=('Arial', submenu_font_size)).pack(pady=10)
    
    global month_listbox, file_listbox
    month_listbox = Listbox(root, selectmode='extended', width=20, height=12, exportselection=False, font=('Arial', submenu_font_size))
    months = [str(i) for i in range(1, 13)]
    for month in months:
        month_listbox.insert(END, month)
    month_listbox.pack()

    Button(root, text="選擇檔案", font=('Arial', submenu_font_size), command=select_files).pack(pady=10)
    Label(root, text="已選擇的檔案:", font=('Arial', submenu_font_size)).pack()
    file_listbox = Listbox(root, width=50, height=8, font=('Arial', submenu_font_size))
    file_listbox.pack()
    Button(root, text="開始計算", font=('Arial', submenu_font_size), command=calculate_revenue).pack(pady=20)
    Button(root, text="返回主選單", font=('Arial', submenu_font_size), command=main_menu).pack(pady=10)
    adjust_window_size()


def process_count_data(input_data):
    """
    處理數量統計輸入資料，解析並計算每個標題的數據總和。
    :param input_data: str, 輸入框中的多行數據
    :return: dict, 每個標題對應的數量總和
    """
    results = {}
    current_title = None

    # 遍歷每行輸入資料
    for line in input_data.strip().split('\n'):
        line = line.strip()  # 去除前後空格
        if not line:
            continue
        elif not re.search(r'\(.*?\)|（.*?）', line):
            current_title = line
            if current_title not in results:
                results[current_title] = 0
        else:
            # 如果是數據行，提取數量
            match = re.search(r'\((\d+)\)|（(\d+)）', line)
            if match and current_title:
                quantity = int(match.group(1) or match.group(2)) 
                results[current_title] += quantity

    return results


def calculate_counts():
    """
    讀取輸入框資料，處理並顯示計算結果。
    """
    input_data = input_text.get("1.0", END).strip()
    if not input_data:
        showinfo("提示", "請輸入數據！")
        return

    # 使用處理函式計算統計結果
    results = process_count_data(input_data)
    output = "\n".join([f"{title}: {count}" for title, count in results.items()])
    output_text.delete("1.0", END)
    output_text.insert(END, output)


def count_menu():
    for widget in root.winfo_children():
        widget.destroy()

    Label(root, text="數量統計工具", font=("Arial", submenu_font_size)).pack(pady=10)

    Label(root, text="請輸入資料:", font=("Arial", submenu_font_size)).pack(pady=5)

    global input_text, output_text
    input_text = Text(root, width=50, height=15, font=("Arial", submenu_font_size))
    input_text.pack(pady=5)

    Button(root, text="計算", font=("Arial", submenu_font_size), command=calculate_counts).pack(pady=10)

    Label(root, text="輸出結果:", font=("Arial", submenu_font_size)).pack(pady=5)

    output_text = Text(root, width=50, height=15, font=("Arial", submenu_font_size))
    #output_label = Label(root, textvariable=output_text, font=("Arial", submenu_font_size), justify="left")
    output_text.pack(pady=5)

    Button(root, text="返回主選單", font=("Arial", submenu_font_size), command=main_menu).pack(pady=10)

    adjust_window_size()


def batch_upload_menu():
    global last_sorting_method  # Access the global variable

    for widget in root.winfo_children():
        widget.destroy()

    Label(root, text="批次上架工具", font=('Arial', submenu_font_size)).pack(pady=10)
    Button(root, text="選擇檔案", font=('Arial', submenu_font_size), command=select_files).pack(pady=5)
    Label(root, text="已選擇的檔案:", font=('Arial', submenu_font_size)).pack()
    
    global file_listbox
    file_listbox = Listbox(root, width=50, height=5, font=('Arial', submenu_font_size))
    file_listbox.pack()

    Button(root, text="不排序", font=('Arial', submenu_font_size), command=lambda: set_sorting_method("不排序")).pack(pady=5)
    Button(root, text="按買家筆畫排序", font=('Arial', submenu_font_size), command=lambda: set_sorting_method("筆畫")).pack(pady=5)
    Button(root, text="按金額排序", font=('Arial', submenu_font_size), command=lambda: set_sorting_method("金額")).pack(pady=5)

    global output_text
    output_text = Text(root, width=60, height=15)
    output_text.pack(pady=10)
    
    Button(root, text="生成批次上架檔", font=('Arial', submenu_font_size), command=save_batch_upload_file).pack(pady=10)
    Button(root, text="返回主選單", font=('Arial', submenu_font_size), command=main_menu).pack(pady=10)
    adjust_window_size()


def buyer_member_menu():
    def convert_input():
        """將輸入的文字轉換為DataFrame並顯示"""
        raw_text = input_text.get("1.0", "end").strip()  # 獲取輸入框中的文字
        sections = [section.strip() for section in raw_text.split("---") if section.strip()]  # 分段處理並去掉多餘空格
        processed_data = []

        max_columns = 0
        for section in sections:
            rows = [row.strip() for row in section.split("\n") if row.strip()]
            processed_data.append(rows)
            max_columns = max(max_columns, len(rows))

        # 填充短於最大列數的行
        processed_data = [row + [""] * (max_columns - len(row)) for row in processed_data]

        # 轉成DataFrame
        df = pd.DataFrame(processed_data)
        output_text.delete("1.0", "end")
        output_text.insert("1.0", df.to_string(index=False, header=False))

        # 儲存處理後的DataFrame以便生成 CSV 使用
        buyer_member_menu.processed_df = df

    def save_to_csv():
        """將轉換後的DataFrame輸出為CSV檔案"""
        if not hasattr(buyer_member_menu, 'processed_df') or buyer_member_menu.processed_df.empty:
            showinfo("錯誤", "尚未轉換資料，請先輸入並點擊轉換按鈕。")
            return

        file_path = filedialog.asksaveasfilename(defaultextension=".csv",
                                                    filetypes=[("CSV files", "*.csv")],
                                                    title="保存檔案")
        if file_path:
            buyer_member_menu.processed_df.to_csv(file_path, index=False, header=False, encoding='utf-8-sig')
            showinfo("成功", f"已成功保存為 {file_path}")

    for widget in root.winfo_children():
        widget.destroy()

    Label(root, text="團員統計工具", font=("Arial", submenu_font_size)).pack(pady=10)

    Label(root, text="請輸入資料(以 --- 表示換行):", font=("Arial", submenu_font_size)).pack(pady=5)

    global input_text, output_text
    input_text = Text(root, width=50, height=15, font=("Arial", submenu_font_size))
    input_text.pack(pady=5)

    Button(root, text="轉換", font=("Arial", submenu_font_size), command=convert_input).pack(pady=10)

    Label(root, text="輸出結果:", font=("Arial", submenu_font_size)).pack(pady=5)

    # output_text = Text(root, width=50, height=15, font=("Arial", submenu_font_size))
    # #output_label = Label(root, textvariable=output_text, font=("Arial", submenu_font_size), justify="left")
    # output_text.pack(pady=5)

    # Button(root, text="返回主選單", font=("Arial", submenu_font_size), command=main_menu).pack(pady=10)

    output_text = Text(root, width=60, height=15)
    output_text.pack(pady=10)
    
    Button(root, text="生成團員統計表", font=('Arial', submenu_font_size), command=save_to_csv).pack(pady=10)
    Button(root, text="返回主選單", font=('Arial', submenu_font_size), command=main_menu).pack(pady=10)
    adjust_window_size()



def main_menu():
    for widget in root.winfo_children():
        widget.destroy()
    
    Label(root, text="請選擇功能", font=("Arial", menu_font_size)).pack(pady=20)
    Button(root, text="計算營收", font=("Arial", menu_font_size), command=revenue_menu).pack(pady=20)
    Button(root, text="數量統計", font=("Arial", menu_font_size), command=count_menu).pack(pady=20)
    Button(root, text="批次上架", font=("Arial", menu_font_size), command=batch_upload_menu).pack(pady=20)
    Button(root, text="團員統計", font=("Arial", menu_font_size), command=buyer_member_menu).pack(pady=20)

    
root = Tk()
root.title("營收與買家統計工具")
selected_files = []

main_menu()

root.update_idletasks()
root.minsize(root.winfo_reqwidth(), root.winfo_reqheight())
root.geometry('300x200+100+100')
root.mainloop()


In [13]:
import pandas as pd
import Levenshtein
from pprint import pprint
# 讀取筆畫數數據並轉為字典
stroke_data = pd.read_csv('kangxi-strokecount/kangxi-strokecount.csv', skiprows=4)
stroke_dict = dict(zip(stroke_data['Character'], stroke_data['Strokes']))

def get_stroke_count(char):
    # 返回字元的筆畫數，如果不在字典中，返回較高的值
    return stroke_dict.get(char, 0)

def calculate_string_stroke_count(s):
    return sum(get_stroke_count(char) for char in s)

def find_similar_pairs(strings, threshold_distance, threshold_strokes=0):
    similar_pairs = []
    stroke_counts = {s: calculate_string_stroke_count(s) for s in strings}

    for i in range(len(strings)):
        for j in range(i + 1, len(strings)):
            str1, str2 = strings[i], strings[j]
            
            # 筆畫數過濾
            stroke_diff = abs(stroke_counts[str1] - stroke_counts[str2])
            if stroke_diff > threshold_strokes:
                continue  # 如果筆畫數差距過大，跳過這對字串

            # Levenshtein 距離過濾
            distance = Levenshtein.distance(str1, str2)
            if distance <= threshold_distance:
                similar_pairs.append((str1, str2))

    return similar_pairs

df = pd.read_excel('../data/1108(14團)_1_P.xlsx')
buyer_data = calculate_buyer_data(df, sort_by="筆畫").iloc[:, 0].apply(lambda x: x.split(' (')[0]).to_list()
print('規則一、距離2以內 筆畫相同(抓符號或空格錯誤)')
rule1 = find_similar_pairs(buyer_data, threshold_distance=2, threshold_strokes=0)
#pprint(rule1)
print('規則二、距離1以內 不限筆畫差(抓中文錯字，假設只會錯一個字，抓不到錯兩個字以上)')
rule2 = find_similar_pairs(buyer_data, threshold_distance=1, threshold_strokes=999)
#pprint(rule2)
pd.set_option('display.max_rows', None)
pd.concat([pd.DataFrame({'規則一': rule1}), pd.DataFrame({'規則二': rule2})], axis=1)

規則一、距離2以內 筆畫相同(抓符號或空格錯誤)
規則二、距離1以內 不限筆畫差(抓中文錯字，假設只會錯一個字，抓不到錯兩個字以上)


Unnamed: 0,規則一,規則二
0,"(A, さと)","(Ciao Yu, Qiao Yu)"
1,"(Ciao Yu, Qiao Yu)","(王孟潔, 王筱潔)"
2,"(Dola, Moly)","(白玖, 白凌)"
3,"(Lin Yu, Lin Yuan)","(白玖, 白飯)"
4,"(Lin Yu, Tin Yui)","(白玖, 余玖)"
5,"(Mu Ku, Yu Yu)","(白凌, 白飯)"
6,"(Xiao Xiao, Xiao Xie)","(伊絲, 伊凜)"
7,"(Yi Ning, Yi Sin)","(安南, 安琪)"
8,"(ア草, 周月)","(吳宣, 吳雙)"
9,"(予夏, 神父)","(李珮綺, 江珮綺)"
