In [35]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [84]:
from math import exp

def sigmoid(x):
    return 1/(1 + exp(-x))

class MyLinnearRegressor:

    def __init__(self) -> None:
        self.W = []
        self.dict = {}
        
    def train(self, input_set, output_set, learning_rate=0.001, noEpochs=100):
        count = 0
    
        for value in set(output_set):
            self.dict[value] = count
            count += 1
        
        X = [[1] + line for line in input_set]
        m = len(X[0])
        for i in range(m):
            self.W.append(np.random.random())

        for _ in range(noEpochs):
            errors = []
            for i in range(m):
                errors.append(0.0)

            for input_line, output_line in zip(X, output_set):

                prediction = sum(input_line[j] * self.W[j] for j in range(m))
                error = self.dict[output_line] - prediction

                for j in range(m):
                    errors[j] += error * input_line[j]

            for j in range(m):
                self.W[j] += learning_rate * errors[j] / len(input_set)

    def find_key_by_value(self,value):
        for key in self.dict.keys():
            if self.dict[key] == value:
                return key
        return None

    def predict(self, input_set):
        X = [[1] + line for line in input_set]
        output_set = []
        for line in X:
            value = sum([w*v for w,v in zip(self.W,line)])
            print(value)
            if value > 0.5:
                output_set.append(self.find_key_by_value(1))
            else:
                output_set.append(self.find_key_by_value(0))
                
        return output_set

In [85]:
def read_datas():
    breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)   
    X = breast_cancer_wisconsin_diagnostic.data.features 
    y = breast_cancer_wisconsin_diagnostic.data.targets 

    new_data = {
        'Radius': X['radius1'],
        'Texture': X['texture1'],  
        'Diagnosis': y['Diagnosis']  
    }

    df = pd.DataFrame(new_data)

    return df
    

In [86]:
df = read_datas()
df

Unnamed: 0,Radius,Texture,Diagnosis
0,17.99,10.38,M
1,20.57,17.77,M
2,19.69,21.25,M
3,11.42,20.38,M
4,20.29,14.34,M
...,...,...,...
564,21.56,22.39,M
565,20.13,28.25,M
566,16.60,28.08,M
567,20.60,29.33,M


In [87]:
def getTrainingAndValidationData():
    df = read_datas()

    dataSize = df.shape[0]
    trainingIndexSet = np.random.choice(range(dataSize),size=int(0.8 * dataSize),replace=False)
    validationIndexSet = [i for i in range(dataSize) if i not in trainingIndexSet] 

    trainingInputSet = [[df['Radius'].iloc[i],df['Texture'].iloc[i]] for i in trainingIndexSet]
    trainingOutputSet = [df['Diagnosis'].iloc[i] for i in trainingIndexSet]

    validationInputSet = [[df['Radius'].iloc[i],df['Texture'].iloc[i]] for i in validationIndexSet]
    validationOutputSet = [df['Diagnosis'].iloc[i] for i in validationIndexSet]

    return trainingInputSet, trainingOutputSet, validationInputSet, validationOutputSet    

def getRegressorTool():
    trainingInputSet, trainingOutputSet, _, _ = getTrainingAndValidationData()
    xx = [el for el in trainingInputSet]

    regressor = LogisticRegression()
    regressor.fit(xx, trainingOutputSet)

    return regressor


def getMyRegressor():
    trainingInputSet, trainingOutputSet, _, _ = getTrainingAndValidationData()
    xx = [el for el in trainingInputSet]

    regressor = MyLinnearRegressor()
    regressor.train(xx, trainingOutputSet,noEpochs=10000)

    return regressor

In [88]:
# consideram M - positive
def get_TP_TN_FP_FN(computedValues,groundTruth):
    TP,TN,FP,FN = 0,0,0,0
    for cv,gtv in zip(computedValues, groundTruth):
        if cv == 'M':
            if gtv == 'M':
                TP += 1
            else:
                FP += 1
        else:
            if gtv == 'M':
                FN += 1
            else:
                TN += 1
    return TP,TN,FP,FN

def getAccuracy(TP,TN,FP,FN):
    return (TP + TN) / (TP+TN+FP+FN)

def getPrecision(TP,TN,FP,FN):
    return TP/(TP + FP)

def getRecall(TP,TN,FP,FN):
    return TP/(TP+FN)

In [89]:
regressor = getRegressorTool()

_,_,validationInputSet,validationOutputSet = getTrainingAndValidationData()

computedOutputSet = regressor.predict(validationInputSet)
TP,TN,FP,FN = get_TP_TN_FP_FN(computedOutputSet, validationOutputSet)
acc = getAccuracy(TP,TN,FP,FN)
precision = getPrecision(TP,TN,FP,FN)
recall = getRecall(TP,TN,FP,FN)
print("TP: {}; TN: {}; FP: {}; FN: {}\nAccuracy: {}\nPrecision: {}\nRecall: {}".format(TP,TN,FP,FN,acc,precision,recall))

TP: 37; TN: 67; FP: 5; FN: 5
Accuracy: 0.9122807017543859
Precision: 0.8809523809523809
Recall: 0.8809523809523809


In [90]:
myregressor = getMyRegressor()
computedOutputSet = myregressor.predict(validationInputSet)

TP,TN,FP,FN = get_TP_TN_FP_FN(computedOutputSet, validationOutputSet)
acc = getAccuracy(TP,TN,FP,FN)
precision = getPrecision(TP,TN,FP,FN)
recall = getRecall(TP,TN,FP,FN)
print("TP: {}; TN: {}; FP: {}; FN: {}\nAccuracy: {}\nPrecision: {}\nRecall: {}".format(TP,TN,FP,FN,acc,precision,recall))

0.6829362404424318
0.4588386621541706
0.3514794781117806
0.5089346740047929
0.6735205044189971
0.4376135481868253
0.6444984162756158
0.38642838795019907
0.36865855530462854
0.16572752958808173
0.34446061521740845
0.3348337040678743
0.27513935111079124
0.3863205461783373
0.543613919371704
0.44844137934918304
0.6667643632441379
0.4684110138132549
0.3541705118000191
0.43639297889921214
0.16212794071634448
0.7013842023077199
0.6878916893164965
0.32986972085426314
0.3300965913834532
0.4476485856893146
0.8541540722771355
0.5087550578020619
0.351653428771555
0.5358697689981196
0.6629549364971917
0.34286543249975715
0.20594531170579328
0.6350793289598897
0.3269125901374788
0.3559649599951908
0.49676606365542036
0.28588036305207876
0.302642176973018
0.6289206458223364
0.14710163917457908
0.37056465504291447
0.7038496186590653
0.44093695012803014
0.23399764263249523
0.7938556547651274
0.6595136389072498
0.4270035947351505
0.4865827287061579
0.5627409492704926
0.7212184860152246
0.547753694102524