In [None]:
import pandas as pd
import atoti as tt
import calendar
pd.set_option("display.max_columns", None)

In [None]:
df = pd.read_csv("GFK_CUSTOMREPORT_TRUCKTIRE_TW_BRIDGESTONE_Feb22 2.csv")

In [None]:
df.columns = df.columns.str.replace("/", "")

In [None]:
df.columns = df.columns.str.replace("'", "")

In [None]:
df = df[['Period', 'Item', 'BRAND', 'REGION', 'BRIDGESTONE TYPE',
       'DIMENSION (Truck Tyres)', 'RIMDIAMET.MM', 'WIDTH IN INCH',
       'WIDTH IN MM', 'ASPECT RATIO', 'TUBE  TUBLESS', 'LOADINDEX',
       'SPEEDINDEX', 'FirstActivity', 'SALES UNITS', 'SALES <LC>',
       'PRICE TWDUN.']]

In [None]:
month = [calendar.month_name[i].upper()[:3] for i in range(1, 13)]
mon_num = [str(i) if len(str(i)) > 1 else "0" + str(i) for i in range(1, 13)]
mon_dict = dict(zip(month, mon_num))
year_dict = {"22": "2022", "21": "2021", "20": "2020"}

In [None]:
def normalize_date(x):
    month = x[:3]
    year = x[-2:]
    mon = mon_dict[month]
    yr = year_dict[year]
    date = yr + mon + "15"
    return date

In [None]:
df["Period"] = df["Period"].map(normalize_date)

In [None]:
df["Period"] = pd.to_datetime(df["Period"], yearfirst=True)

In [None]:
top_5 = df[(df["Period"] == "2022-02-15")].groupby(["BRAND"])["SALES UNITS"].sum().reset_index().sort_values(by="SALES UNITS", ascending=False)["BRAND"].tolist()[:5]

In [None]:
df["Category"] = df["BRAND"].map(lambda x: x if x in top_5 else "THE OTHERS")

In [None]:
df["Year"] = df["Period"].dt.year
df["Month"] = df["Period"].dt.month_name()
df["Date"] = df["Period"].dt.date

In [None]:
df = df.astype({"SALES UNITS": int, "SALES <LC>": int, "PRICE TWDUN.": int, "Year": str, "Month": str})

## Create the session for Atoti

In [None]:
keys = ["Period", "Item", "BRAND", "DIMENSION (Truck Tyres)", "REGION"]

In [None]:
session = tt.create_session()

In [None]:
data = session.read_pandas(df, keys=keys, table_name="gfk", types={"SALES UNITS": tt.type.INT, "SALES <LC>": tt.type.INT, "PRICE TWDUN.": tt.type.INT})

In [None]:
cube = session.create_cube(data, "GFK")

In [None]:
h, l, m = cube.hierarchies, cube.levels, cube.measures

In [None]:
session.link()

In [None]:
h["Period"] = [data["Year"], data["Month"], data["Date"]]

In [None]:
orders = [str(i) for i in range(1, 13)]

In [None]:
month_names = [calendar.month_name[i] for i in range(1, 13)]

In [None]:
month_names

In [None]:
## 對於String類型的值，要建立順序的話，可以用comparator.first_members來指定順序。
## 但是如果level裡面有重複的欄位名稱的話，需要將l["", "", ""]的層級指定出來再呼叫.comparator

l["gfk", "Period", "Month"].comparator = tt.comparator.first_members('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')

In [None]:
m

In [None]:
help(tt.parent_value)

In [None]:
m["SALES UNIT in % of Parent"] = m["SALES UNITS.SUM"] / tt.parent_value(m["SALES UNITS.SUM"], degrees={h["Period"]: 1}, apply_filters=True, total_value=m["SALES UNITS.SUM"])

In [None]:
m["SALES UNIT in % of Parent"].formatter = "DOUBLE[0.0%]"

In [None]:
m["SALES UNIT in % of Parent for categories"] = m["SALES UNITS.SUM"] / tt.parent_value(m["SALES UNITS.SUM"], degrees={h["Category"]: 1}, apply_filters=True, total_value=m["SALES UNITS.SUM"])

In [None]:
m["SALES UNIT in % of Parent for categories"].formatter = "DOUBLE[0.0%]"