In [1]:
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("mode.chained_assignment", None)
import tkinter as tk
from tkinter import filedialog
from datetime import datetime

In [2]:
def open_file():
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilenames()
    return file_path

In [3]:
def transform(arg):
    if len(arg) == 1 and arg[0].split(".")[-1].lower() == "csv":
        df = pd.read_csv(arg[0])
    else:
        df = pd.concat([pd.read_csv(path)for path in arg])
    return df

In [4]:
path = open_file()

In [5]:
df = transform(path)

In [6]:
df["Period"] = pd.to_datetime(df["Period"], format="%B %Y")
df["FirstActivity"] = pd.to_datetime(df["FirstActivity"], format="%B %Y")

In [7]:
df["BRIDGESTONE TYPE"] = df["BRIDGESTONE TYPE"].map(lambda x: "LSR" if x == "LSR2" else x)

## 開啟上月的匯總數據

In [9]:
historical = open_file()

In [10]:
historical = transform(historical)

In [12]:
historical["BRIDGESTONE TYPE"] = historical["BRIDGESTONE TYPE"].map(lambda x: "LSR" if x == "LSR2" else x)

In [13]:
historical = historical[['Period', 'Item', 'BRAND', 'REGION', 'BRIDGESTONE TYPE',
       'DIMENSION (Truck Tyres)', "RIMDIAMET.'/MM", 'WIDTH IN INCH',
       'WIDTH IN MM', 'ASPECT RATIO', 'TUBE / TUBLESS', 'LOADINDEX',
       'FirstActivity', 'SALES UNITS', 'SALES <LC>', 'PRICE TWD/UN.']]

In [14]:
historical["Period"] = pd.to_datetime(historical["Period"])
historical["FirstActivity"] = pd.to_datetime(historical["FirstActivity"])

In [16]:
df = df.astype({"SALES UNITS": int, "SALES <LC>": int, "PRICE TWD/UN.": int})

In [17]:
df = df[['Period', 'Item', 'BRAND', 'REGION', 'BRIDGESTONE TYPE',
       'DIMENSION (Truck Tyres)', "RIMDIAMET.'/MM", 'WIDTH IN INCH',
       'WIDTH IN MM', 'ASPECT RATIO', 'TUBE / TUBLESS', 'LOADINDEX', 'FirstActivity', 'SALES UNITS', 'SALES <LC>',
       'PRICE TWD/UN.']]

In [19]:
df = df[df["Period"] >= df["Period"].max().date().strftime("%Y-%m-%d")]

In [20]:
result = pd.concat([historical, df])

In [22]:
first_day_cur_year = datetime.now().date().replace(month=1, day=1).strftime("%Y-%m-%d")
last_day_cur_mon =  df["Period"].max().date().strftime("%Y-%m-%d")

In [23]:
tbr_top5_brands = result[(result["BRIDGESTONE TYPE"] == "TBR") & (result["Period"].between(first_day_cur_year, last_day_cur_mon))].groupby(["BRAND"]).sum().sort_values(by="SALES UNITS", ascending=False).head(9).index.tolist()
lsr_top5_brands = result[(result["BRIDGESTONE TYPE"] == "LSR") & (result["Period"].between(first_day_cur_year, last_day_cur_mon))].groupby(["BRAND"]).sum().sort_values(by="SALES UNITS", ascending=False).head(9).index.tolist()
ttl_top5_brands = result[(result["Period"].between(first_day_cur_year, last_day_cur_mon))].groupby(["BRAND"]).sum().sort_values(by="SALES UNITS", ascending=False).head(9).index.tolist()

In [24]:
# tbr_top5_brands = result[(result["BRIDGESTONE TYPE"] == "TBR") & (result["Period"].between("2022-01-01", last_day_cur_mon))].groupby(["BRAND"]).sum().sort_values(by="SALES UNITS", ascending=False).head(9).index.tolist()
# lsr_top5_brands = result[(result["BRIDGESTONE TYPE"] == "LSR") & (result["Period"].between("2022-01-01", last_day_cur_mon))].groupby(["BRAND"]).sum().sort_values(by="SALES UNITS", ascending=False).head(9).index.tolist()
# ttl_top5_brands = result[(result["Period"].between("2022-01-01", last_day_cur_mon))].groupby(["BRAND"]).sum().sort_values(by="SALES UNITS", ascending=False).head(9).index.tolist()

In [25]:
result["TBR BRAND"] = result["BRAND"].map(lambda x: "OTHERS" if x not in tbr_top5_brands else x)
result["LSR2 BRAND"] = result["BRAND"].map(lambda x: "OTHERS" if x not in lsr_top5_brands else x)
result["TTL BRAND"] = result["BRAND"].map(lambda x: "OTHERS" if x not in ttl_top5_brands else x)

In [26]:
result["BRAND"].unique()

array(['MAXXIS', 'LINGLONG', 'BRIDGESTONE', 'MICHELIN', 'GOLDSHIELD',
       'CARGOPOWER', 'UNBRANDED', 'WESTLAKE', 'GITI', 'ADVANCE',
       'YUELONG', 'GOODYEAR', 'DERUIBO', 'ROADWING', 'GOODRIDE',
       'WELLPLUS', 'LANVIGATOR', 'CONSTANCY', 'ALLROUND', 'COCREA',
       'BEFRIEND', 'CST', 'DURATURN', 'JIANXIN', 'DURO', 'AUFINE',
       'GT RADIAL', 'NANKANG', 'FESITE', 'KAPSEN', 'JINYUTIRES',
       'YOKOHAMA', 'PRESA', 'FALKEN', 'SAILUN', 'SAKURA', 'SIERRA',
       'CHAOYANG', 'ROADONE', 'JOYALL', 'DUNLOP', 'CHENGSHAN', 'FRONWAY',
       'ROADSHINE', 'WINDFORCE', 'SAMSON', 'GOODTYRE', 'COMPASAL',
       'EUROKING TIRE', 'AUSTONE', 'TOPRUNNER', 'INNING TYRE', 'GREFORCE',
       'LIONSTONE', 'HANKSUGI', 'ROADLUX', 'OPALS', 'PRIMEWELL', 'TONOVA',
       'LIAOLUN', 'THREE-A', 'TEEREX', 'VGLORY', 'JOYROAD', 'LANDY',
       'GRENLANDER', 'DOUBLE COIN', 'SUPERMEALLIR', 'PIRELLI',
       'GREEN DRAGON', 'URATURN', 'DOUBLEHAPPINESS', 'FIRESTONE',
       'SUNFULL', 'RUNKING', 'EUDEMON', "O'

In [27]:
result["For Report"] = result["BRAND"].map(lambda x: "OTHERS" if x not in ["BRIDGESTONE", "MICHELIN", "MAXXIS"] else x)

In [28]:
result.drop_duplicates(inplace=True)

In [34]:
result.to_csv(r"D:\kc.hsu\OneDrive - Bridgestone\GFK data\gfk_rolling_raw_data.csv", index=False)