In [75]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [76]:
class MyLogisticRegression:
  def __init__(self, learning_rate=0.01, max_iter=1000, threshold=0.5):
    self.classifiers = {}
    self.learning_rate = learning_rate
    self.max_iter = max_iter
    self.threshold = threshold

  def loss(self, y, hypothesis):
    m = len(y)
    epsilon = 1e-10
    j_theta = (-1 / m) * np.sum((y * np.log(hypothesis + epsilon)) + ((1 - y) * np.log(1 - hypothesis + epsilon)))
    return j_theta
  
  def sigmoid(self, z):
    return 1 / (1 + np.exp(-z))

  def hypothesis(self, X, weights, bias):
    return self.sigmoid(np.dot(X, weights) + bias)
  
  def fit(self, X, y):
    classes = np.unique(y)
    for c in classes:
        binary_y = (y == c).astype(int)
        self.classifiers[c] = self.train_binary_classifier(X, binary_y)

  def train_binary_classifier(self, X, y):
    m, n = X.shape
    weights = np.zeros(n)
    bias = 0

    for _ in range(self.max_iter):
      y_hat = self.hypothesis(X, weights, bias)

      # avg_loss = self.loss(y, y_hat)

      derivative_theta = (1 / m) * np.dot(y_hat - y, X)
      derivative_bias = (1 / m) * np.sum(y_hat - y)
      weights -= self.learning_rate * derivative_theta
      bias -= self.learning_rate * derivative_bias

    return weights, bias

  def predict(self, X):
    predicted_classes = []
    for c in self.classifiers:
        weights, bias = self.classifiers[c]
        probabilities = self.hypothesis(X, weights, bias)
        class_pred = (probabilities >= self.threshold).astype(int)
        predicted_classes.append(class_pred)

    # I will have 4 arrays of 1s and 0s, each array consists of a particular class
    # The class with the highest probability is the predicted class
    return np.argmax(predicted_classes, axis=0)

    


### Iris dataset 

In [77]:
# from sklearn.datasets import load_iris

# df = load_iris()
# X = df.data
# y = df.target

# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size=0.7)

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

### Hogwarts Dataset

In [78]:
df = pd.read_csv('../datasets/dataset_train.csv')

label_encoder = LabelEncoder()
df['target'] = label_encoder.fit_transform(df['Hogwarts House'])

numeric_features = df.select_dtypes(include=[np.number])
imp = SimpleImputer(strategy='mean')
numeric_features = pd.DataFrame(imp.fit_transform(numeric_features), columns=numeric_features.columns)

X = numeric_features.drop(columns=['target'])
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [79]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(accuracy)

0.9875


In [85]:
MyLR = MyLogisticRegression(learning_rate=0.1, max_iter=100, threshold=0.5)

MyLR.fit(X_train, y_train)
y_pred = MyLR.predict(X_test)

In [86]:
accuracy_score(y_test, y_pred)

0.9875

In [82]:
y_pred = label_encoder.inverse_transform(y_pred)
y_pred

array(['Gryffindor', 'Hufflepuff', 'Slytherin', 'Ravenclaw', 'Hufflepuff',
       'Hufflepuff', 'Hufflepuff', 'Ravenclaw', 'Gryffindor', 'Slytherin',
       'Hufflepuff', 'Ravenclaw', 'Gryffindor', 'Ravenclaw', 'Ravenclaw',
       'Hufflepuff', 'Hufflepuff', 'Slytherin', 'Hufflepuff',
       'Gryffindor', 'Slytherin', 'Hufflepuff', 'Slytherin', 'Hufflepuff',
       'Hufflepuff', 'Slytherin', 'Hufflepuff', 'Hufflepuff',
       'Hufflepuff', 'Gryffindor', 'Ravenclaw', 'Gryffindor',
       'Gryffindor', 'Hufflepuff', 'Slytherin', 'Gryffindor',
       'Hufflepuff', 'Ravenclaw', 'Hufflepuff', 'Ravenclaw', 'Ravenclaw',
       'Hufflepuff', 'Ravenclaw', 'Hufflepuff', 'Ravenclaw', 'Ravenclaw',
       'Hufflepuff', 'Hufflepuff', 'Ravenclaw', 'Gryffindor', 'Ravenclaw',
       'Ravenclaw', 'Hufflepuff', 'Ravenclaw', 'Hufflepuff', 'Slytherin',
       'Gryffindor', 'Hufflepuff', 'Ravenclaw', 'Ravenclaw', 'Gryffindor',
       'Hufflepuff', 'Hufflepuff', 'Ravenclaw', 'Ravenclaw', 'Gryffindor',
      