In [24]:
import re
import numpy as np
import pandas as pd
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from rich import print
from rich.table import Table
from rich.console import Console

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
terminal_width = shutil.get_terminal_size().columns
df = pd.read_csv("labeled_customer_data.csv")
print(df.head(10))

In [None]:
# Tabulating Data
# 1. Tabel Tabulasi Product Type
summary_df = (
    df.groupby("Product Type")
    .agg(
        Total_Revenue=("Total Price", "sum"),
        Average_Rating=("Rating", "mean"),
        Average_Quantity=("Quantity", "mean"),
        Buyer_Age=("Age", "mean"),
        Average_Addon_Total=("Add-on Total", "mean"),
        Transaction_Count=("Product Type", "count"),
    )
    .reset_index()
)

# Formatting
summary_df["Total_Revenue"] = summary_df["Total_Revenue"].apply(
    lambda x: f"${x/1000:,.0f}k"
)
summary_df["Average_Rating"] = summary_df["Average_Rating"].round(2)
summary_df["Average_Quantity"] = summary_df["Average_Quantity"].round(2)
summary_df["Buyer_Age"] = summary_df["Buyer_Age"].round(1)
summary_df["Average_Addon_Total"] = summary_df["Average_Addon_Total"].apply(
    lambda x: f"${x:.2f}"
)

table = Table(
    title="Tabulasi Utama Berdasarkan Product Type",
    title_style="bold yellow",
    show_lines=True,
)
for column in summary_df.columns:
    table.add_column(column, style="white bold", justify="center", no_wrap=True)
for _, row in summary_df.iterrows():
    table.add_row(*[str(item) for item in row])
console = Console()
console.print(table)

# 2. Tabel Distribusi Frekuensi Product Type terhadap Age Group
age_product_table = pd.crosstab(df["Product Type"], df["Age_Group"])
print("\nDistribusi Frekuensi Product Type terhadap Age Group")
print(age_product_table)

# 3. Tabel Distribusi Frekuensi Product Type terhadap Shipping Type
shipping_product_table = pd.crosstab(df["Age_Group"], df["Payment Method"])
print("\nDistribusi Frekuensi Gender terhadap Payment Method")
print(shipping_product_table)

# 4. Tabel Distribusi Frekuensi Product Type terhadap Kelompok Umur Perempuan dan Laki-laki
filtered_female_df = df[(df["Gender"] == "Female") & (df["Age_Group"])]
filtered_male_df = df[(df["Gender"] == "Male") & (df["Age_Group"])]
female_shipping = pd.crosstab(
    index=filtered_female_df["Age_Group"], columns=filtered_female_df["Product Type"]
)
male_product = pd.crosstab(
    index=filtered_male_df["Age_Group"], columns=filtered_male_df["Product Type"]
)
print("Distribusi Frekuensi Product Type untuk Kelompok Usia Perempuan:")
print(female_shipping)
print("Distribusi Frekuensi Product Type untuk Kelompok Usia Laki-laki:")
print(male_product)