In [90]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.bias = None
        self.weights = None
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0

        # Gradient descent
        for _ in range(self.num_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self.sigmoid(linear_model)

            # Compute gradients
            dw = (1 / num_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / num_samples) * np.sum(y_pred - y)

            # Update parameters
            self.weights = self.weights - self.learning_rate * dw
            self.bias = self.bias - self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_model)
        y_pred_class = [1 if i > 0.5 else 0 for i in y_pred]
        return y_pred_class

In [91]:
df_cars = pd.read_csv('../dados/car-evaluation-uci/cars.csv', sep=',', header=None)
df_cars.columns = ["buying", "maintenance", "doors", "persons", "lug_boot", "safety", "class"]

for col in df_cars.columns[:-1]:
    df_cars[col] = df_cars[col].astype('category')
    df_cars[col] = df_cars[col].cat.codes
    
# unacc = 0
# others = 1
df_cars[df_cars["class"] != 'unacc'] = 0
df_cars["class"] = df_cars["class"].replace('unacc', 1)

df_cars

Unnamed: 0,buying,maintenance,doors,persons,lug_boot,safety,class
0,3,3,0,0,2,1,1
1,3,3,0,0,2,2,1
2,3,3,0,0,2,0,1
3,3,3,0,0,1,1,1
4,3,3,0,0,1,2,1
...,...,...,...,...,...,...,...
1723,0,0,0,0,0,0,0
1724,0,0,0,0,0,0,0
1725,1,1,3,2,0,1,1
1726,0,0,0,0,0,0,0


In [92]:
X_train, X_test, y_train, y_test = train_test_split(df_cars[df_cars.columns[:-1]], df_cars[df_cars.columns[-1]], test_size=0.2)

clf = LogisticRegression()
clf.fit(X_train, y_train)
print(confusion_matrix(y_test, clf.predict(X_test)))
print(accuracy_score(y_test, clf.predict(X_test)))

[[110   0]
 [  2 234]]
0.9942196531791907
