In [None]:
import pandas as pd
import numpy as np   
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import math

In [None]:
# Rumus distribusi frekuensi

def distribusi_frekuensi(x, k=None):
    n = len(x)
    if n == 0 or (k is not None and k <= 0):
        return [], [], [], [], [], [], []

    # jika k tidak diberikan, gunakan rumus Sturges
    if k is None:
        k = int(np.ceil(1 + 3.322 * np.log10(n)))

    x_min = min(x)
    x_max = max(x)
    R = x_max - x_min
    i = math.ceil(R / k) if R > 0 else 1

    intervals = [x_min + q * i for q in range(k + 1)]
    mid = [intervals[q] + 0.5 * i for q in range(k)]
    f = [0] * k
    for val in x:
        placed = False
        for q in range(k):
            left = intervals[q]
            right = intervals[q+1]
            if q == k-1:
                if left <= val <= right:
                    f[q] += 1
                    placed = True
                    break
            else:
                if left <= val < right:
                    f[q] += 1
                    placed = True
                    break
        if not placed and val >= intervals[-1]:
            f[-1] += 1

    fr = [freq / n for freq in f]
    fk = []
    cum = 0
    for freq in f:
        cum += freq
        fk.append(cum)
    frk = [c / n for c in fk]

    rows = []
    for q in range(k):
        left = intervals[q]
        right = intervals[q+1]
        label = f"[{left}, {right})" if q < k-1 else f"[{left}, {right}]"
        rows.append({
            "interval": label,
            "midpoint": mid[q],
            "f": f[q],
            "fr": fr[q],
            "fk": fk[q],
            "frk": frk[q]
        })

    return rows, intervals, mid, f, fr, fk, frk

In [None]:
# membaca data csv yang didapat dari kaggle
data = pd.read_csv('"C:\Users\LENOVO\Downloads\top2022.csv"')

drop_cols = ["gap", "npsn", "kab.kota", "ranking"] # Mengabaikan kolom yang ada
data = data.drop(columns=drop_cols, errors="ignore") # drop kolom
values = data["Nilai.Total"].dropna().tolist()

data = data[['Nilai.Total', 'Provinsi', 'Jenis']]
data.head()

In [None]:
# Percobaan menggunakan dataset berat badan
data = [128,63,97,134,133,136,125,110,118,94,
        76,84,132,105,80,87,100,77,120,109,
        90,72,103,78,94,118,117,80,140,94]

n = len(data)
edges = np.histogram_bin_edges(data, bins="sturges")   # edges dari Sturges
counts, edges = np.histogram(data, bins=edges)        # counts per class
mid = (edges[:-1] + edges[1:]) / 2.0
fk = np.cumsum(counts)
fr = counts / n
frk = fk / n

rows = []
for left, right, m, c, r, cum, rcum in zip(edges[:-1], edges[1:], mid, counts, fr, fk, frk):
    label = f"[{left:g}, {right:g})"
    rows.append({"interval": label, "midpoint": m, "f": int(c), "fr": float(r), "fk": int(cum), "frk": float(rcum)})

# tampilkan rapi
import pandas as pd
df = pd.DataFrame(rows)
print(df.to_string(index=False))