In [3]:
import numpy as np
import pandas as pd

# Custom Ordinal Encoder
class CustomOrdinalEncoder:
    def __init__(self, cols=None):
        self.cols = cols  # List kolom yang akan diencode
        self.mappings = {}  # Dictionary untuk menyimpan mapping nilai unik

    def fit(self, X):
        """Mempelajari mapping untuk setiap kolom kategorikal"""
        for col in self.cols:
            unique_values = sorted(X[col].unique())  # Urutkan nilai unik
            self.mappings[col] = {val: idx for idx, val in enumerate(unique_values)}
        return self

    def transform(self, X):
        """Ubah data menggunakan mapping yang telah dipelajari"""
        X_transformed = X.copy()
        for col in self.cols:
            X_transformed[col] = X[col].map(self.mappings[col])
        return X_transformed

    def fit_transform(self, X):
        """Gabungan fit + transform"""
        return self.fit(X).transform(X)


# Fungsi train_test_split
def train_test_split(X, y, random_seed=42, test_size=0.2):
    n = X.shape[0]
    np.random.seed(random_seed)
    shuffled_indices = np.random.permutation(np.arange(n))
    test_size = int(n * test_size)
    test_indices = shuffled_indices[:test_size]
    train_indices = shuffled_indices[test_size:]
    
    X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
    y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
    
    return X_train, X_test, y_train, y_test


# Contoh dataset campuran
data = {
    'buying': ['vhigh', 'high', 'med', 'low', 'high'],
    'maint': ['vhigh', 'vhigh', 'low', 'med', 'med'],
    'doors': ['3', '4', '5more', '3', '4'],
    'persons': ['more', '4', '2', '2', 'more'],
    'lug_boot': ['med', 'small', 'big', 'big', 'med'],
    'safety': ['low', 'low', 'high', 'med', 'high']
}

df = pd.DataFrame(data)  # Buat DataFrame
y = df['safety']  # Target
X = df.drop(columns=['safety'])  # Fitur

# Bagi data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Gunakan CustomOrdinalEncoder
encoder = CustomOrdinalEncoder(cols=['buying', 'maint', 'doors', 'persons', 'lug_boot'])
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

# Tampilkan hasil
print("Sebelum encoding:\n", X_train)
print("\nSetelah encoding:\n", X_train_encoded)


Sebelum encoding:
   buying  maint  doors persons lug_boot
4   high    med      4    more      med
2    med    low  5more       2      big
0  vhigh  vhigh      3    more      med
3    low    med      3       2      big

Setelah encoding:
    buying  maint  doors  persons  lug_boot
4       0      1      1        1         1
2       2      0      2        0         0
0       3      2      0        1         1
3       1      1      0        0         0
