In [38]:
import re
import numpy as np
import pandas as pd
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from rich import print
from rich.table import Table
from rich.console import Console

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
terminal_width = shutil.get_terminal_size().columns
df = pd.read_csv("labeled_customer_data.csv")

In [None]:
# Tabulating Data
# 1. Tabel Tabulasi Gender
gender_crosstabe = pd.crosstab(index=df["Gender"], columns="count")
gender_crosstabe["Percentage"] = (
    gender_crosstabe["count"] / gender_crosstabe["count"].sum() * 100
).round(2)
total_row = pd.DataFrame(
    {
        "gender":[gender_crosstabe]"count": [gender_crosstabe["count"].sum()],
        "Percentage": [gender_crosstabe["Percentage"].sum()],
    },
    index=["Total"],
)
gender_crosstabe = pd.concat([gender_crosstabe, total_row])

# Display using rich Table
gender_table = Table(
    title="Tabulasi Gender",
    title_style="bold yellow",
    show_lines=True,
)
for col in gender_crosstabe.columns:
    gender_table.add_column(col, style="white bold", justify="center")
for idx, row in gender_crosstabe.iterrows():
    gender_table.add_row(str(idx), *[str(item) for item in row])
console = Console(width=terminal_width)
console.print(gender_table)

# 2. Tabel Tabulasi Age-Group
# 3. Tabel Tabulasi Product Type
summary_df = (
    df.groupby("Product Type")
    .agg(
        Total_Revenue=("Total Price", "sum"),
        Average_Rating=("Rating", "mean"),
        Average_Quantity=("Quantity", "mean"),
        Buyer_Age=("Age", "mean"),
        Average_Addon_Total=("Add-on Total", "mean"),
        Transaction_Count=("Product Type", "count"),
    )
    .reset_index()
)

# Formatting
summary_df["Total_Revenue"] = summary_df["Total_Revenue"].apply(
    lambda x: f"${x/1000:,.0f}k"
)
summary_df["Average_Rating"] = summary_df["Average_Rating"].round(2)
summary_df["Average_Quantity"] = summary_df["Average_Quantity"].round(2)
summary_df["Buyer_Age"] = summary_df["Buyer_Age"].round(1)
summary_df["Average_Addon_Total"] = summary_df["Average_Addon_Total"].apply(
    lambda x: f"${x:.2f}"
)

table = Table(
    title="Tabulasi Utama Berdasarkan Product Type",
    title_style="bold yellow",
    show_lines=True,
)
for column in summary_df.columns:
    table.add_column(column, style="white bold", justify="center", no_wrap=True)
for _, row in summary_df.iterrows():
    table.add_row(*[str(item) for item in row])
console = Console()
console.print(table)

# Filter Gender
filtered_female_df = df[(df["Gender"] == "Female") & (df["Age_Group"])]
filtered_male_df = df[(df["Gender"] == "Male") & (df["Age_Group"])]

# 2. Tabel Distribusi Frekuensi Shipping Type terhadap Kelompok Umur Perempuan dan Laki-Laki
crosstab_df = pd.crosstab(
    index=df["Age_Group"], columns=[df["Shipping Type"], df["Gender"]]
)
crosstab_df.columns = [f"{col[0]} ({col[1]})" for col in crosstab_df.columns]
crosstab_df.reset_index(inplace=True)
console = Console(width=200)
table = Table(
    title="Distribusi Frekuensi Shipping Type",
    title_style="bold yellow",
    show_lines=True,
    expand=True,
)
for column in crosstab_df.columns:
    table.add_column(column, style="white bold", justify="center", no_wrap=True)
for _, row in crosstab_df.iterrows():
    table.add_row(*[str(item) for item in row])
console.print(table)

# 3. Tabel Distribusi Frekuensi Payment Method terhadap Kelompok Umur Perempuan dan Laki-laki
Console(width=terminal_width).rule(
    "[bold yellow]Distribusi Frekuensi Payment Method terhadap Kelompok Usia Perempuan dan Laki-laki"
)
female_payment = pd.crosstab(
    index=filtered_female_df["Age_Group"], columns=filtered_female_df["Payment Method"]
)
print(female_payment.head(5))
male_payment = pd.crosstab(
    index=filtered_male_df["Age_Group"], columns=filtered_male_df["Payment Method"]
)
print(male_payment.head(5))

# 4. Tabel Distribusi Frekuensi Product Type terhadap Kelompok Umur Perempuan dan Laki-laki
Console(width=terminal_width).rule(
    "[bold yellow]Distribusi Frekuensi Product Type terhadap Kelompok Usia Perempuan dan Laki-laki"
)
female_shipping = pd.crosstab(
    index=filtered_female_df["Age_Group"], columns=filtered_female_df["Product Type"]
)
male_product = pd.crosstab(
    index=filtered_male_df["Age_Group"], columns=filtered_male_df["Product Type"]
)
print("Distribusi Frekuensi Product Type untuk Kelompok Usia Perempuan:")
print(female_shipping)
print("Distribusi Frekuensi Product Type untuk Kelompok Usia Laki-laki:")
print(male_product)