In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,classification_report
from sklearn.preprocessing import StandardScaler

In [2]:
# Contoh memuat dataset dan melakukan one-hot encoding
df = pd.read_csv("./data/fitness_tracker_dataset.csv").drop(columns=["user_id","date"])

# Mengubah nilai boolean menjadi 0 dan 1
x = pd.get_dummies(df[df.columns[:-1]], drop_first=True)
# Mengonversi semua nilai boolean di dalam DataFrame x menjadi 0 dan 1
x = x.astype(int)
y = df["mood"]

In [3]:
# Label encoding untuk target
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Membagi data menjadi data latih dan uji
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)

In [4]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [None]:
class LVQ:
    def __init__(self, n_prototypes_per_class=1, learning_rate=0.1, max_iter=100, clip_value=1e6):
        self.n_prototypes_per_class = n_prototypes_per_class
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.prototypes = []  # Menyimpan prototipe (setiap prototipe berisi [vektor fitur, kelas])
        self.clip_value = clip_value  # Batas untuk pembaruan prototipe
    
    def fit(self, X_train, y_train):
        # Jika y_train adalah numpy.ndarray, konversi menjadi pandas Series
        if isinstance(y_train, np.ndarray):
            y_train = pd.Series(y_train)
        
        # Normalisasi data
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        
        # Reset indeks untuk memastikan konsistensi
        X_train = pd.DataFrame(X_train)  # Pastikan X_train dalam bentuk DataFrame
        X_train = X_train.reset_index(drop=True)
        y_train = y_train.reset_index(drop=True)
        
        classes = np.unique(y_train)
        for c in classes:
            # Pilih secara acak data untuk setiap kelas sebagai prototipe awal
            class_data = X_train[y_train == c]
            for i in range(self.n_prototypes_per_class):
                prototype = class_data.iloc[np.random.choice(class_data.shape[0])]
                self.prototypes.append([prototype.values.astype(np.float64), c])  # Mengonversi prototipe ke float64
        
        # Training LVQ
        for _ in range(self.max_iter):
            print(f"epoch : {_}")
            for i in range(X_train.shape[0]):
                x = X_train.iloc[i].values.astype(np.float64)  # Ubah x menjadi float64
                y_true = y_train.iloc[i]  # Kelas sebenarnya
                
                # Cari prototipe terdekat
                distances = [euclidean_distance(x, prototype[0]) for prototype in self.prototypes]
                closest_prototype_idx = np.argmin(distances)
                closest_prototype, closest_class = self.prototypes[closest_prototype_idx]
                
                # Update prototipe berdasarkan apakah kelas sama atau tidak
                if closest_class == y_true:
                    # Pindahkan prototipe mendekati data
                    self.prototypes[closest_prototype_idx][0] += self.learning_rate * (x - closest_prototype)
                else:
                    # Pindahkan prototipe menjauh dari data
                    self.prototypes[closest_prototype_idx][0] -= self.learning_rate * (x - closest_prototype)

                # Pembatasan pada pembaruan prototipe
                self.prototypes[closest_prototype_idx][0] = np.clip(self.prototypes[closest_prototype_idx][0], -self.clip_value, self.clip_value)
    
    def predict(self, X_test):
        # Normalisasi data uji
        scaler = StandardScaler()
        X_test = scaler.fit_transform(X_test)
        
        X_test = pd.DataFrame(X_test)  # convart x_test ke dataframe
        X_test = X_test.reset_index(drop=True)
        predictions = []
        for i in range(X_test.shape[0]):
            x = X_test.iloc[i].values.astype(np.float64)  # convert x menjadi float64
            
            # Cari prototipe terdekat
            distances = [euclidean_distance(x, prototype[0]) for prototype in self.prototypes]
            closest_prototype_idx = np.argmin(distances)
            closest_class = self.prototypes[closest_prototype_idx][1]
            
            predictions.append(closest_class)
        
        return np.array(predictions)


In [6]:
lvq = LVQ(n_prototypes_per_class=1, learning_rate=0.1, max_iter=10)
# Melatih model dengan data latih
lvq.fit(x_train, y_train)

# Menguji model dengan data uji
y_pred = lvq.predict(x_test)

# Menghitung akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

epoch : 0
epoch : 1
epoch : 2
epoch : 3
epoch : 4
epoch : 5
epoch : 6
epoch : 7
epoch : 8
epoch : 9
Accuracy: 25.00%


In [None]:
print(classification_report(y_test,y_pred))