In [1]:
import numpy as np
import csv
from sklearn.preprocessing import StandardScaler
import math
class MyLogisticReggresion:
    def __init__(self, learning_rate=0.05, epochs=5000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.w = None
        self.free = 0

    # σ(z)= 1 / 1 + e ^ (-z)

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def transpose(matrix):
        return [[matrix[i][j] for i in range(len(matrix))] for j in range(len(matrix[0]))]
    
    @staticmethod
    def mat_vec_dot(transposed_matrix, vector):
        result = []
        for row in transposed_matrix:
            s = 0
            for i in range(len(vector)):
                s += row[i] * vector[i]
            result.append(s)
        return result
    
    def fit(self, x, y):
        x = np.array(x)
        y = np.array(y)
        
        n_samples, n_features = x.shape
        self.w = [0.0 for _ in range(n_features)]
        
        for _ in range(self.epochs):
            linear_model = [self.free + sum(self.w[j] * x[i][j] for j in range(n_features)) for i in range(n_samples)]
            pred = [self.sigmoid(z) for z in linear_model]
            
            errors = [pred[i] - y[i] for i in range(n_samples)]
            
            x_t = self.transpose(x)
            
            w_gradient = self.mat_vec_dot(x_t, errors)
            w_gradient = [g / n_samples for g in w_gradient]
            
            free_gradient = sum(errors) / n_samples
            
            self.w = [self.w[i] - self.learning_rate * w_gradient[i] for i in range(n_features)]
            self.free -= self.learning_rate * free_gradient
    
    def predict(self, x):
        x = np.array(x)
        linear_model = np.dot(x, self.w) + self.free
        predictions = self.sigmoid(linear_model)
        return (predictions >= 0.5).astype(int)

def load_data(file_name):
    data = []
    with open(file_name) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            radius = float(row[2])
            texture = float(row[3])
            if row[1] == 'M':
                label = 1
            else:
                label = 0
            data.append([[radius, texture], label])
    x = [d[0] for d in data]
    y = [d[1] for d in data]
    return x, y

file = '../data/wdbc.data'
inputs, outputs = load_data(file)
feature1 = [ex[0] for ex in inputs]
feature2 = [ex[1] for ex in inputs]

np.random.seed(5)
indexes = [i for i in range(len(inputs))]
trainSample = np.random.choice(indexes, int(0.8 * len(inputs)), replace=False)
testSample = [i for i in indexes if not i in trainSample]

trainInputs = [inputs[i] for i in trainSample]
trainOutputs = [outputs[i] for i in trainSample]
testInputs = [inputs[i] for i in testSample]
testOutputs = [outputs[i] for i in testSample]

scaler = StandardScaler()

trainInputs = scaler.fit_transform(trainInputs)
testInputs = scaler.transform(testInputs)

feature1train = [ex[0] for ex in trainInputs]
feature2train = [ex[1] for ex in trainInputs]
feature1test = [ex[0] for ex in testInputs]
feature2test = [ex[1] for ex in testInputs]

regressor = MyLogisticReggresion()
regressor.fit(trainInputs, trainOutputs)

w0, w1, w2 = regressor.free, regressor.w[0], regressor.w[1]
print(f'the learnt model: f(x) = {w0} + {w1} * x1 + {w2} * x2')

computedTestOutputs = regressor.predict(testInputs)
new_datail = scaler.transform([[18, 10]])
rez = regressor.predict(new_datail)
print("Noua leziune este:", "Maligna" if rez[0] == 1 else "Benigna")


the learnt model: f(x) = -0.9477163231738559 + 4.294968291614496 * x1 + 1.0172143426534384 * x2
Noua leziune este: Maligna
