In [None]:
import re
import numpy as np
import pandas as pd
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from rich import print
from rich.table import Table
from rich.console import Console

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
terminal_width = shutil.get_terminal_size().columns
df = pd.read_csv("labeled_customer_data.csv")

numeric_columns = df.select_dtypes(include=[np.number]).columns
numeric_noID = [col for col in numeric_columns if col != "Customer ID"]
categorical_columns = df.select_dtypes(include=["string", "object"]).columns

# **Statistika Deskriptif**

Menunjukkan analisis kuantitatif populasi pada:

<li>Usia</li>
<li>Gender</li>
<li>Total Price</li>
<li>Unit Price</li>
<li>Add-On Total</li>
<li>Rating</li>


In [None]:
descriptive_stats = df[numeric_noID].describe()

console = Console()
descriptive_table = Table(
    title="Tabel Statistik Deskriptif Data Numerik",
    title_style="bold yellow",
    show_lines=True,
)
descriptive_table.add_column("Statistics", style="yellow bold", justify="left")
for col in descriptive_stats.columns:
    descriptive_table.add_column(col, style="white bold", justify="center")
for idx, row in descriptive_stats.iterrows():
    row_data = [idx] + [f"{item:.2f}" for item in row]
    descriptive_table.add_row(*row_data)

console.print(descriptive_table)

# **Analisis Deskriptif Data Kuantitatif**


In [None]:
Console(width=terminal_width).rule(
    "[bold yellow]Deskripsi Data Kuantitatif berdasarkan Mean, Median, Standar Deviasi"
)

for col in numeric_noID:
    Console(width=terminal_width).rule(f"[bold yellow]Analisis Data {col}")
    data_mean = df[col].mean()
    data_median = df[col].median()
    data_std = df[col].std()
    # Berikan deskripsi berdasarkan hasil
    if data_mean > data_median:
        print(
            f"Rata-rata ({data_mean:.2f}) lebih besar dari median ({data_median:.2f})."
        )
        print(
            "Distribusi data mungkin miring ke kanan (right-skewed), seringkali karena adanya outlier / nilai besar."
        )
    elif data_mean < data_median:
        print(
            f"Rata-rata ({data_mean:.2f}) lebih kecil dari median ({data_median:.2f})."
        )
        print("Distribusi data mungkin miring ke kiri (left-skewed).")
    else:
        print(f"Rata-rata dan median hampir sama ({data_mean:.2f}).")
        print("Distribusi data cenderung simetris.")

    if data_std > 100:
        print(
            f"Standar deviasi besar ({data_std:.2f}) → variasi data tinggi, sebaran luas."
        )
    else:
        print(
            f"Standar deviasi kecil ({data_std:.2f}) → data cenderung berkelompok di sekitar rata-rata."
        )

In [28]:
# Loop untuk semua kolom kategorikal
exclude_cols = ["SKU", "Purchase Date", "Customer ID", "Add-ons Purchased"]
categorical_analyze = [col for col in categorical_columns if col not in exclude_cols]
for col in categorical_analyze:
    Console(width=terminal_width).rule(f"[bold yellow]Analisis Data {col}")

    value_counts = df[col].value_counts()
    percentages = (df[col].value_counts(normalize=True) * 100).round(2)
    for idx, val in value_counts.items():
        print(f"{idx}: {val} data ({percentages[idx]}%)")

    # Deskripsi sederhana
    top_category = value_counts.index[0]
    top_percentage = percentages.iloc[0]
    if top_percentage > 70:
        print(f"Kategori '{top_category}' sangat dominan ({top_percentage}%).")
    elif top_percentage > 40:
        print(f"Kategori '{top_category}' cukup dominan ({top_percentage}%).")
    else:
        print(f"Distribusi kategori relatif merata, tidak ada yang terlalu dominan.")