In [None]:
import pandas as pd
import numpy as np
import os
from tkinter import Tk, Label, Button, filedialog, Listbox, Text, Scrollbar, END, MULTIPLE
from tkinter.messagebox import showinfo
from collections import Counter, defaultdict

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 10000)

menu_font_size = 20
submenu_font_size = 12
last_sorting_method = None

def calc_revenue(df, month=None, store_name=None):
    df.columns = [col.replace('\n', '') for col in df.columns]
    condition = df['狀態'].apply(lambda x: x.split('\n')[0] not in ['取消訂單', '逾期未取件']) 
    df = df.loc[condition]
    
    if month:
        condition = df['訂購日期'].apply(lambda x: x.split('/')[1] in month)
        df = df.loc[condition]

    if store_name:
        condition = df['賣場名稱'].apply(lambda x: x in store_name)
        df = df.loc[condition]

    revenue = df['小計(A)'].apply(lambda x: x.replace(',', '')).astype(int).sum()
    return revenue


def read_data(df_paths, skiprows=None):
    df_list = [pd.read_excel(df_path, skiprows=skiprows) for df_path in df_paths]
    return pd.concat(df_list, axis=0)


def select_files():
    file_paths = filedialog.askopenfilenames(title="選擇檔案", filetypes=[("Excel files", "*.xlsx *.xls")])
    if file_paths:
        file_listbox.delete(0, END)
        selected_files.clear()
        selected_files.extend(file_paths)

        for file in file_paths:
            file_listbox.insert(END, os.path.basename(file))


def calculate_revenue():
    if not selected_files:
        showinfo("提示", "未選擇任何檔案")
        return
    
    selected_months = [month_listbox.get(i) for i in month_listbox.curselection()]
    month = [str(month).zfill(2) for month in selected_months]

    store_name = None
    df = read_data(selected_files, skiprows=2)
    revenue = calc_revenue(df=df, month=month, store_name=store_name)
    showinfo("營收結果", f"總營收: {revenue}")


def load_stroke_data(filepath):
    # Load CSV and create a dictionary of character to stroke count
    df = pd.read_csv(filepath, skiprows=4)
    stroke_dict = dict(zip(df['Character'], df['Strokes']))
    return stroke_dict

def stroke_sort(input_list, stroke_dict):
    def char_key(char):
        # If the character is in stroke dictionary, return its stroke count and char for further sorting
        if char in stroke_dict:
            return (stroke_dict[char], char)
        # Non-Chinese characters are placed with a default stroke count of 0 for initial sorting
        else:
            return (0, char)

    def string_key(s):
        # Convert each string into a tuple of sorting keys for each character
        return [char_key(char) for char in s]

    # Sort the list based on generated keys
    sorted_list = sorted(input_list, key=string_key)
    return sorted_list


def calculate_buyer_data(df, sort_by=None):
    df_filtered = df.loc[df.iloc[:, 0].map(lambda x: x != '下架日/開售日')]
    df_filtered.iloc[:, 4:] = df_filtered.iloc[:, 4:].replace({'\u3000': ' '}, regex=True)
    prices = df_filtered.iloc[:, 3]
    buyers = df_filtered.iloc[:, 4:]
    buyer_data = defaultdict(lambda: {'count': 0, 'price': 0})

    for i, row in buyers.iterrows():
        price = int(prices[i])
        for buyer in row.dropna():
            buyer_data[buyer]['price'] += price
            buyer_data[buyer]['count'] += 1
    df_val = [[f"{key} ({val['count']})", 1, val['price']] for key, val in buyer_data.items()]
    df_buyer = pd.DataFrame(df_val, columns=['＊規格', '＊數量', '＊價格'])

    # Sort by selected option
    if sort_by == "筆畫":
        stroke_dict = load_stroke_data('kangxi-strokecount/kangxi-strokecount.csv')
        sorted_specifications = stroke_sort(df_buyer['＊規格'].tolist(), stroke_dict)

        # Reindex the DataFrame based on the sorted order
        df_buyer = df_buyer.set_index('＊規格').loc[sorted_specifications].reset_index()
    elif sort_by == "金額":
        df_buyer = df_buyer.sort_values(by="＊價格", ascending=False)
    elif sort_by == "不排序":
        pass
    return df_buyer


def select_files():
    file_paths = filedialog.askopenfilenames(title="選擇檔案", filetypes=[("Excel files", "*.xlsx *.xls")])
    if file_paths:
        file_listbox.delete(0, END)
        selected_files.clear()
        selected_files.extend(file_paths)

        for file in file_paths:
            file_listbox.insert(END, os.path.basename(file))


def display_buyer_data(sort_by=None):
    if not selected_files:
        showinfo("提示", "未選擇任何檔案")
        return
    print(selected_files)
    df = read_data(selected_files)
    df_buyer = calculate_buyer_data(df, sort_by=sort_by)
    
    output_text.delete("1.0", END)
    output_text.insert(END, df_buyer.to_string(index=False))

# def save_batch_upload_file():
#     if not selected_files:
#         showinfo("提示", "未選擇任何檔案")
#         return

#     df = read_data(selected_files)
#     df_buyer = calculate_buyer_data(df)
    
#     save_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx *.xls")])
#     if save_path:
#         df_buyer.to_excel(save_path, index=False)
#         showinfo("成功", f"批次上架檔已儲存至 {save_path}")


def save_batch_upload_file():
    global last_sorting_method

    if last_sorting_method is None:
        showinfo("錯誤", "請先選擇排序方式！")
        return
    
    df = read_data(selected_files)
    df_buyer = calculate_buyer_data(df, sort_by=last_sorting_method)
    # Proceed with file generation using the last selected sorting method
    # Pass `last_sorting_method` to the generation function
    save_path = filedialog.asksaveasfilename(defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx *.xls")])
    if save_path:
        df_buyer.to_excel(save_path, index=False)
        showinfo("成功", f"批次上架檔已儲存至 {save_path}")


def set_sorting_method(method):
    global last_sorting_method
    last_sorting_method = method
    display_buyer_data(sort_by=method)


def main_menu():
    for widget in root.winfo_children():
        widget.destroy()
    
    Label(root, text="請選擇功能", font=("Arial", menu_font_size)).pack(pady=20)
    Button(root, text="計算營收", font=("Arial", menu_font_size), command=revenue_menu).pack(pady=20)
    Button(root, text="批次上架", font=("Arial", menu_font_size), command=batch_upload_menu).pack(pady=20)


def adjust_window_size():
    root.update_idletasks()  # 更新視窗的子元件
    width = root.winfo_reqwidth()   # 獲取視窗需求的寬度
    height = root.winfo_reqheight() # 獲取視窗需求的高度
    root.geometry(f"{width}x{height}")  # 根據內容設置視窗大小


def revenue_menu():
    for widget in root.winfo_children():
        widget.destroy()
    
    Label(root, text="請選擇月份:", font=('Arial', submenu_font_size)).pack(pady=10)
    
    global month_listbox, file_listbox
    month_listbox = Listbox(root, selectmode='extended', width=20, height=12, exportselection=False, font=('Arial', submenu_font_size))
    months = [str(i) for i in range(1, 13)]
    for month in months:
        month_listbox.insert(END, month)
    month_listbox.pack()

    Button(root, text="選擇檔案", font=('Arial', submenu_font_size), command=select_files).pack(pady=10)
    Label(root, text="已選擇的檔案:", font=('Arial', submenu_font_size)).pack()
    file_listbox = Listbox(root, width=50, height=8, font=('Arial', submenu_font_size))
    file_listbox.pack()
    Button(root, text="開始計算", font=('Arial', submenu_font_size), command=calculate_revenue).pack(pady=20)
    Button(root, text="返回主選單", font=('Arial', submenu_font_size), command=main_menu).pack(pady=10)
    adjust_window_size()

def batch_upload_menu():
    global last_sorting_method  # Access the global variable

    for widget in root.winfo_children():
        widget.destroy()

    Label(root, text="批次上架工具", font=('Arial', submenu_font_size)).pack(pady=10)
    Button(root, text="選擇檔案", font=('Arial', submenu_font_size), command=select_files).pack(pady=5)
    Label(root, text="已選擇的檔案:", font=('Arial', submenu_font_size)).pack()
    
    global file_listbox
    file_listbox = Listbox(root, width=50, height=5, font=('Arial', submenu_font_size))
    file_listbox.pack()

    Button(root, text="不排序", font=('Arial', submenu_font_size), command=lambda: set_sorting_method("不排序")).pack(pady=5)
    Button(root, text="按買家筆畫排序", font=('Arial', submenu_font_size), command=lambda: set_sorting_method("筆畫")).pack(pady=5)
    Button(root, text="按金額排序", font=('Arial', submenu_font_size), command=lambda: set_sorting_method("金額")).pack(pady=5)

    global output_text
    output_text = Text(root, width=60, height=15)
    output_text.pack(pady=10)
    
    Button(root, text="生成批次上架檔", font=('Arial', submenu_font_size), command=save_batch_upload_file).pack(pady=10)
    Button(root, text="返回主選單", font=('Arial', submenu_font_size), command=main_menu).pack(pady=10)
    adjust_window_size()

root = Tk()
root.title("營收與買家統計工具")
selected_files = []

main_menu()

root.update_idletasks()
root.minsize(root.winfo_reqwidth(), root.winfo_reqheight())
root.geometry('300x200+100+100')
root.mainloop()


In [3]:
haha_dict = {'林芯語': 870, '葉璃': 300, 'さと': 535, '予夏': 225, 'Jyun Ely': 150, '莊筑婷': 2330, 'Chen Pei-Ting': 570, '江曉楓': 510, 'みはる': 450, '依西比': 560, '繾綣': 2615, '熙 んぃ': 150, '公司組の 專屬': 150, '昭燦米': 150, '鍾姍珊': 1945, '李曉隆': 1415, '曾于庭': 225, '錢米拉': 5595, '吳宣': 2685, '周誼芃': 905, 'Čÿ Åñnė': 690, 'Jessica Tung': 1735, 'Ye Yating': 485, '魚兒水中游': 1650, 'リボン': 150, 'Yu Yu': 2225, '米瞇': 300, '夢野丹': 75, 'Tin Yui': 260, 'Amber Sarion': 700, '伊悠奈': 615, '林咲': 300, '陳虹羽': 65, 'Li Ev': 390, '對氯二苯': 325, '好想睡.': 325, '沈音': 130, '聞舟渡舫': 390, '陳婉茜': 1230, '饒佩宜': 1580, '吳亭儀': 130, '白月曦': 820, '涼圓': 660, '黎恩妤': 780, 'Tzyi Peng': 520, '黃可均': 130, '白維洛': 680, '芮昕': 590, '邵一辰': 660, '尤膩': 4070, 'Iris Lai': 1100, '沈瀅棋': 1845, '王孟潔': 1420, '夏蔚凜': 520, '韋宥曈': 825, '陳冠伶': 2425, '梅仁曖': 130, '徐里子': 245, 'Laura Chen': 195, '林芸瑄': 3750, 'Ina Hazel': 850, '張簡甯芳': 450, '蔡嘉良': 1125, '陳俞安': 455, '渡暝': 140, '私の未名': 525, 'Kelly Zhou': 1480, 'Gayview Mahat': 660, 'Liu Lai Wan': 445, '賴雨彤': 55, 'Li Yun Xuan': 910, '邱梅子': 1815, '洪子媛': 1220, 'Moly': 765, '許千薇': 165, 'Louise Liu': 365, 'こまつまる': 590, '吳姵萱': 2345, 'Jen': 550, 'Fang Lin': 435, 'Lin Yu': 685, 'Apple Hsu': 1515, 'Shine Yun': 1035, '顧瑾': 500, '林燕慈': 1970, '月姬リナコ': 325, '白玖': 1065, '伊凜': 380, '牧諾': 195, '林玥': 970, 'ダレモナイ': 2550, '凌洛': 200, '麻吉霜': 65, '林淙歆': 905, '江珮綺': 210, '蕭映雪': 475, '寧寧尹': 320, '電子 洋芋片': 70, 'Mu Ku': 605, '菓冰茶': 1220, '藍因': 740, '林容而': 385, 'Qi Gong': 1020, '秦時宇': 405, '凌雨': 70, 'KB Chen': 70, '施霈妤': 655, '周月': 140, '李素裳': 140, '陳妍綺': 390, 'You Xuan': 140, '林司信': 140, '嵐言': 265, '白凌': 70, '喻澪夏': 350, '黎卓': 530, '施羽柔': 575, 'Xiao Xie': 165, '楊千千': 55, 'マシュマロ チョコクッキー': 55, 'Dola': 55, '烏伊曉': 110, 'Yi Ning': 370, 'くじら': 450, '葉詩星': 2890, '郭家慧': 190, '陳穎禎': 190, '葉圻圻': 640, '桐柚': 120, 'Xiao Chi': 390, 'Zi Yan': 620, '慕容染': 185, 'Feng Alberich': 395, 'ア草': 185, '潘嘉琪': 120, '陳若麒': 630, '每天都想睡': 390, '潤羽 瓶中の露': 260, 'ユージオ': 540, '墨羽緋': 130, 'Karen Huang': 260, '楊米恩': 195, '白飯': 130, '韓千楓': 1560, '楊筑君': 360, '葉稻森冥': 130, '海螺不可生吃': 260, 'Connie Huang': 230, 'Yunro Hu': 130, 'Qiao Yu': 130, '改變': 130, '林意晴': 230, '王筱潔': 3355, '列印白芷': 70, 'ルテ ィシア': 550, '臨韶': 220, 'Xiao Xiao': 600, '洪佳音': 355, '심옥여': 170, 'Ou Uo': 205, '林容廷': 170, 'Tamias Lin': 740, '南ゆきね': 485, '邱筠婷': 130, '莊倢': 130, '林千尋': 130, '蕭伊涵': 270, '張雅婷': 260, '被預購霸凌的大冤種': 390, '撒欸偷': 195, 'Tzu-chun Chen': 380, 'Godof Daidai': 285, '羽葉 はねば': 245, 'YaoYao Fox': 220, 'Leng Jing': 760, '奚嚴奧': 780, 'Shih Yu Zhen': 575, '夏月幻': 170, '詹旻': 270, '醬ソース': 525, 'Yi Sin': 470, '流螢の秘密女友': 1575, 'Bobo Lee': 200, '施晴欣': 610, '蔡昕桐': 175, '艾煥祥': 2500, '賈抄': 1825, '許廷瑋': 1225, '吳濬語': 290, 'Wang Jing Wen': 575, '牧野まさき': 130, 'Jessica Chen': 1785, '咖哩飯要拌': 390, '藜蘿': 420, '咘咘': 260, '葉星染': 180, '陳宇芝': 385, '吳雙': 310, '林姮孜': 450, 'Ou Ling': 785, '葉子': 300, '林梓茵': 180, '玄米律師': 150, '巡林犬': 75, '任瑀瑄': 75, 'Leo Fu': 90, '櫻井神樂': 390, '芯ダンジェリン': 80, '悠雪': 150, '陳奕儒': 290, '安琪': 380, '張芯慈': 740, '鄞丞翊': 150, '林翊全': 150, '紅葉逐荒波': 240, '蕭如晴': 460, '한이슬': 600, '呂沛蓉': 410, '許芝瑜': 360, '陳彥桃': 540, '白噗啾': 1000, 'Chi Syuan Liang': 120, '神父': 720, '落櫻遇海': 240, '陳雅欣': 120, '諾諾': 120, '葉沛凌': 170, '何沛恩': 120, '映月雪': 120, '伊絲': 240, '郁熙': 630, '陳奕均': 120, 'Hao Kun': 520, '蔡幸慈': 235, '沄音': 480, '璃澄 りず': 240, 'KU Wert': 875, '廖卉姍': 305, '張珈語': 390, '黎諾': 370, '戴辰': 220, '火然': 240, '米小婷': 220, '林家誼': 775, 'Xu Xin Yu': 280, '胡柔恩': 180, '林季頤': 50, '簡子瑤': 115, 'Zhu Ting': 600, '夏和': 300, '曾曾': 200, '海星星': 100, '陳瑜君': 250, '楊毓涵': 100, 'Lin Yuan': 100, '雲靜風吟': 50, "A'": 150, '李珮綺': 100, '吳家妤': 945, 'Wei Yu': 780, 'Snow Blind': 975, 'Sheena Chang': 650, 'Lizes Wang': 690, '豆皮 烏龍麵': 195, '余玖': 260, '晴陽暖照冬日霜': 130, 'Hank Yu': 130, '藍映汝': 65, 'East Melon': 325, '許姿穎': 195, '新台幣戰士': 130, '游昶紳': 325, '三葉りな': 260, '莊沛潔': 325, '黃品融': 130, '莫塵': 130, 'La Xi': 130, '梨花白': 195, '我欲乘風去': 130, '呂晴雅': 455, '米修斯': 325, 'Yu-Ting Huang': 390, 'Chun Ouo': 260, '數羊毛辮睡覺': 65, 'Lan Ying': 130, 'Rio Ren': 195, '黃培瑄': 390, 'Kuo Pudding': 130, '范存昀': 130, '安南': 195, '龍龘龘': 130, '路川': 130, 'Ben Crisis': 195, '唐慧潔': 130, '朔の猜': 130, '名字好難取': 65, '張晏溱': 130, 'Lu Yin': 195, 'Betty Cheng': 130, '林婕琳': 130, 'Weita Huang': 130, 'Hermit Reverse': 65, 'Ciao Yu': 65, '一諾千金': 130, '葉采蓁': 65, '梁郡恩': 130, 'MX FI': 65}
sum = 0
for key, val in haha_dict.items():
    sum += val
sum

162945

In [14]:
# check 


from collections import defaultdict
import pandas as pd

df = pd.read_excel('../data/1108(14團)_P.xlsx')

def load_stroke_data(filepath):
    # Load CSV and create a dictionary of character to stroke count
    df = pd.read_csv(filepath, skiprows=4)
    stroke_dict = dict(zip(df['Character'], df['Strokes']))
    return stroke_dict

def stroke_sort(input_list, stroke_dict):
    def char_key(char):
        # If the character is in stroke dictionary, return its stroke count and char for further sorting
        if char in stroke_dict:
            return (stroke_dict[char], char)
        # Non-Chinese characters are placed with a default stroke count of 0 for initial sorting
        else:
            return (0, char)

    def string_key(s):
        # Convert each string into a tuple of sorting keys for each character
        return [char_key(char) for char in s]

    # Sort the list based on generated keys
    sorted_list = sorted(input_list, key=string_key)
    return sorted_list

# Example usage:

def calculate_buyer_data(df, sort_by=None):
    df_filtered = df.loc[df.iloc[:, 0].map(lambda x: x != '下架日/開售日')]
    df_filtered.iloc[:, 4:] = df_filtered.iloc[:, 4:].replace({'\u3000': ' '}, regex=True)
    prices = df_filtered.iloc[:, 3]
    buyers = df_filtered.iloc[:, 4:]
    buyer_data = defaultdict(lambda: {'count': 0, 'price': 0})

    for i, row in buyers.iterrows():
        price = int(prices[i])
        for buyer in row.dropna():
            buyer_data[buyer]['price'] += price
            buyer_data[buyer]['count'] += 1
    df_val = [[f"{key} ({val['count']})", 1, val['price']] for key, val in buyer_data.items()]
    df_buyer = pd.DataFrame(df_val, columns=['＊規格', '＊數量', '＊價格'])

    # Sort by selected option
    if sort_by == "筆畫":
        stroke_dict = load_stroke_data('kangxi-strokecount/kangxi-strokecount.csv')
        sorted_specifications = stroke_sort(df_buyer['＊規格'].tolist(), stroke_dict)

        # Reindex the DataFrame based on the sorted order
        df_buyer = df_buyer.set_index('＊規格').loc[sorted_specifications].reset_index()
    elif sort_by == "金額":
        df_buyer = df_buyer.sort_values(by="＊價格", ascending=False)

    return df_buyer


# if __name__ == '__main__':
#     buyer_data = calculate_buyer_data(df, sort_by="筆畫").iloc[:, 0].apply(lambda x: x.split(' (')[0]).to_list()
#     find_similar_pairs(buyer_data, 2)


In [13]:
import pandas as pd
import Levenshtein
from pprint import pprint
# 讀取筆畫數數據並轉為字典
stroke_data = pd.read_csv('kangxi-strokecount/kangxi-strokecount.csv', skiprows=4)
stroke_dict = dict(zip(stroke_data['Character'], stroke_data['Strokes']))

def get_stroke_count(char):
    # 返回字元的筆畫數，如果不在字典中，返回較高的值
    return stroke_dict.get(char, 0)

def calculate_string_stroke_count(s):
    return sum(get_stroke_count(char) for char in s)

def find_similar_pairs(strings, threshold_distance, threshold_strokes=0):
    similar_pairs = []
    stroke_counts = {s: calculate_string_stroke_count(s) for s in strings}

    for i in range(len(strings)):
        for j in range(i + 1, len(strings)):
            str1, str2 = strings[i], strings[j]
            
            # 筆畫數過濾
            stroke_diff = abs(stroke_counts[str1] - stroke_counts[str2])
            if stroke_diff > threshold_strokes:
                continue  # 如果筆畫數差距過大，跳過這對字串

            # Levenshtein 距離過濾
            distance = Levenshtein.distance(str1, str2)
            if distance <= threshold_distance:
                similar_pairs.append((str1, str2))

    return similar_pairs

df = pd.read_excel('../data/1108(14團)_1_P.xlsx')
buyer_data = calculate_buyer_data(df, sort_by="筆畫").iloc[:, 0].apply(lambda x: x.split(' (')[0]).to_list()
print('規則一、距離2以內 筆畫相同(抓符號或空格錯誤)')
rule1 = find_similar_pairs(buyer_data, threshold_distance=2, threshold_strokes=0)
#pprint(rule1)
print('規則二、距離1以內 不限筆畫差(抓中文錯字，假設只會錯一個字，抓不到錯兩個字以上)')
rule2 = find_similar_pairs(buyer_data, threshold_distance=1, threshold_strokes=999)
#pprint(rule2)
pd.set_option('display.max_rows', None)
pd.concat([pd.DataFrame({'規則一': rule1}), pd.DataFrame({'規則二': rule2})], axis=1)

規則一、距離2以內 筆畫相同(抓符號或空格錯誤)
規則二、距離1以內 不限筆畫差(抓中文錯字，假設只會錯一個字，抓不到錯兩個字以上)


Unnamed: 0,規則一,規則二
0,"(A, さと)","(Ciao Yu, Qiao Yu)"
1,"(Ciao Yu, Qiao Yu)","(王孟潔, 王筱潔)"
2,"(Dola, Moly)","(白玖, 白凌)"
3,"(Lin Yu, Lin Yuan)","(白玖, 白飯)"
4,"(Lin Yu, Tin Yui)","(白玖, 余玖)"
5,"(Mu Ku, Yu Yu)","(白凌, 白飯)"
6,"(Xiao Xiao, Xiao Xie)","(伊絲, 伊凜)"
7,"(Yi Ning, Yi Sin)","(安南, 安琪)"
8,"(ア草, 周月)","(吳宣, 吳雙)"
9,"(予夏, 神父)","(李珮綺, 江珮綺)"


In [9]:
len(rule1)

96