In [35]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler

In [36]:
wine_data = load_wine()

In [37]:
wine_df = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
wine_df["target"] = wine_data.target

In [38]:
X = wine_df[wine_data.feature_names].copy()
y = wine_df["target"].copy()

In [39]:
# Standardisasi fitur menggunakan StandardScaler.
# StandardScaler akan mengubah setiap fitur menjadi skala standar
# dengan nilai mean = 0 dan standar deviasi = 1.
# 1) scaler.fit(X)   -> menghitung mean dan std dari setiap kolom fitur
# 2) scaler.transform(X.values) -> menormalkan data menggunakan mean & std tersebut

                # ┌────────────────────────────┐
                # │      Data Fitur X          │
                # │  (DataFrame: alkohol, dll) │
                # └──────────────┬─────────────┘
                #                │
                #                ▼
                #  ┌──────────────────────────┐
                #  │  scaler = StandardScaler │
                #  └──────────────┬───────────┘
                #                 │
                #                 ▼
                #  ┌──────────────────────────┐
                #  │       scaler.fit(X)      │
                #  │  - Hitung mean tiap kolom│
                #  │  - Hitung std tiap kolom │
                #  └──────────────┬───────────┘
                #                 │
                #                 ▼
                #  ┌────────────────────────── ┐
                #  │ scaler.transform(X.values)│
                #  │  - Setiap nilai dihitung  │
                #  │    (x - mean) / std       │
                #  └─────────────┬───────────  ┘
                #                │
                #                ▼
                #  ┌──────────────────────────┐
                #  │     X_scaled (array)     │
                #  │  Data sudah distandardkan│
                #  │   mean ≈ 0, std ≈ 1      │
                #  └──────────────────────────┘

# Scaling dengan StandardScaler
scaler = StandardScaler()

# 1. Fit = hitung mean dan std dari setiap fitur
scaler.fit(X)

# 2. Transform = ubah data X berdasarkan mean & std hasil fit
X_scaled_array = scaler.transform(X)

# 3. Ubah hasil scaling menjadi DataFrame agar mudah dibaca
X_scaled = pd.DataFrame(X_scaled_array, columns=wine_data.feature_names)

In [40]:
# Tampilkan hasil
print("=== Data Original ===")
print(X.head())

print("\n=== Data Setelah StandardScaler ===")
print(X_scaled.head())

=== Data Original ===
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline