In [None]:
# Ucitaj numpy i pandas
import numpy as np
import pandas as pd

In [None]:
# Ucitaj trening podatke
data_train = pd.read_csv('500train_BoW.csv')

# Smanji TARGET za 1
data_train['TARGET'] = data_train['TARGET'].apply(lambda x: x - 1)

# Prikazi podatke
data_train.head()

In [None]:
# Pretvori pandas DataFrame u numpy array

# Ulazni podaci X sadrze sve osim stupca TARGET, stoga ga maknemo
X_train = data_train.drop(columns=['TARGET']).to_numpy()[:int(data_train.shape[0] * 0.7)]

#Izlazni podaci y su samo stupac TARGET
y_train = data_train['TARGET'].to_numpy()[:int(data_train.shape[0] * 0.7)].reshape(-1, 1)

X_test = data_train.drop(columns=['TARGET']).to_numpy()[int(data_train.shape[0] * 0.7):]
y_test = data_train['TARGET'].to_numpy()[int(data_train.shape[0] * 0.7):].reshape(-1, 1)

# Ispisi dimenzije podataka
print(f'X_train shape: {X_train.shape}, y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}, y_test shape: {y_test.shape}')

In [None]:
# Funkcija logisticke regresije za predvidanje
def lr_h(x, w):
    x = np.append(np.ones((x.shape[0], 1)), x, axis=1)
    return 1 / (1 + np.exp(-1 * x @ w))

# Funkcija pogreske logisticke regresije
def cross_entropy_error(X, y, w):
    return np.mean(-y.reshape(-1, 1) * np.log(np.clip(lr_h(X, w), 1e-7, 1 - 1e-7)) - (1 - y.reshape(-1, 1)) * np.log(1 - np.clip(lr_h(X, w), 1e-7, 1 - 1e-7)))

# Funkcija kojom se trenira model logisticke regresije
def lr_train(X, y, eta=0.01, max_iter=2000, alpha=0, epsilon=0.0001, trace=False):
    
    weight_matrix = []
    last_error = 0
    w = np.zeros((X.shape[1] + 1, 1))

    for i in range(max_iter + 1):

        weight_matrix.append(w.copy())

        cur_error = cross_entropy_error(X, y, w)
        if np.abs(cur_error - last_error) < epsilon:
            break
        last_error = cur_error

        dw = np.zeros((X.shape[1] + 1, 1))

        for j in range(X.shape[0]):

            h = lr_h(X[j].reshape(1, -1), w)
            dw = dw - (h - y[j].reshape(1, 1)) * np.append(1, X[j]).reshape(-1, 1)

        w[0] = w[0] + eta * dw[0]
        w[1:] = w[1:] * (1 - eta * alpha) + eta * dw[1:]

    weight_matrix = np.array(weight_matrix)

    if trace:
        return w, weight_matrix
    else:
        return w

In [None]:
# Treniraj model, vrati tezine
weights = lr_train(X_train, y_train, max_iter=2000, alpha=0.1)

# Ispis pogreske
print(f'Error: {cross_entropy_error(X_train, y_train, weights)}')
print(f'Accuracy: {np.mean(lr_h(X_train, weights).round() == y_train)}')

In [None]:
# Ispis pogreske
print(f'Error: {cross_entropy_error(X_test, y_test, weights)}')
print(f'Accuracy: {np.mean(lr_h(X_test, weights).round() == y_test)}')

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train.ravel())

predict = model.predict(X_test)
print(np.mean(predict.reshape(-1 ,1) == y_test))