In [1]:
import numpy as np

class LinearRegressionNE:
    def __init__(self):
        self.theta = None  # Koefisien regresi (termasuk bias)

    def fit(self, X, y):
        """
        Melatih model menggunakan Normal Equation
        X: array 2D (n_samples, n_features) -> Variabel independen
        y: array 1D (n_samples,) -> Variabel dependen
        """
        # Menambahkan bias (intercept) -> Kolom 1 di depan
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        
        # Menghitung parameter theta menggunakan Normal Equation
        self.theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

    def predict(self, X):
        """
        Memprediksi nilai y berdasarkan X
        X: array 2D (n_samples, n_features)
        Return: array prediksi y
        """
        if self.theta is None:
            raise ValueError("Model belum dilatih. Jalankan fit() terlebih dahulu.")
        
        # Menambahkan bias (intercept)
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        
        return X_b.dot(self.theta)

    def get_equation(self):
        """
        Mengembalikan persamaan regresi dalam bentuk string
        """
        if self.theta is None:
            return "Model belum dilatih."
        
        intercept = self.theta[0]
        coefficients = self.theta[1:]
        equation = f"y = {intercept:.3f} "
        
        for i, coef in enumerate(coefficients):
            equation += f"+ {coef:.3f} * X{i+1} "
        
        return equation.strip()


In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd

# 1. Load Data
file_path = "../data iris.csv"
df = pd.read_csv(file_path, delimiter=";")

# 2. Pilih variabel
X = df[['sepal_length']].values
y = df['petal_length'].values

# 3. Split data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Inisialisasi model
model = LinearRegressionNE()

# 5. Training model dengan data training
model.fit(X_train, y_train)

# 6. Prediksi data test
y_pred_test = model.predict(X_test)

# 7. Evaluasi menggunakan R-squared (Koefisien Determinasi)
def r2_score(y_true, y_pred):
    ss_res = sum((y_true - y_pred)**2)
    ss_tot = sum((y_true - np.mean(y_true))**2)
    return 1 - (ss_res / ss_tot)

r2 = r2_score(y_test, y_pred_test)
print(f"R-squared (R²) pada data test: {r2:.3f}")

# 8. Tampilkan persamaan regresi
print("Persamaan Regresi:", model.get_equation())

# 9. Visualisasi hasil regresi
import matplotlib.pyplot as plt
plt.scatter(X_test, y_test, color='blue', label='Data Test')  # Data test asli
plt.plot(X_test, y_pred_test, color='red', label='Regresi Linear')  # Garis regresi
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Linear Regression dengan Normal Equation")
plt.legend()
plt.show()


NameError: name 'pd' is not defined