In [2]:
import math
import random
import codecs
import numpy as np
import plotly.graph_objects as go
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [12]:
training = []
test = []
trainingLabels = []
testLabels = []

iterations = 100
featureNumber = 5

print("Loading training samples")
with codecs.open("cancerTraining.txt", "r", "utf-8") as f:
    for line in f:
        elements=(line.rstrip('\n')).split(",")
        feat = [float(x) for x in elements[:-1]]
        if elements[-1] == "malignant":
            label = 1
        else:
            label = 0
        training.append(feat)
        trainingLabels.append(label)

print("Load test samples")

with codecs.open("cancerTest.txt","r","UTF-8") as f:
    for line in f:
        elements=(line.rstrip('\n')).split(",")
        feat = [float(x) for x in elements[:-1]]
        if elements[-1] == "malignant":
            label = 1
        else:
            label = 0
        test.append(feat)
        testLabels.append(label)

Loading training samples
Load test samples


In [6]:
def sigmoid(z):
    return 1 / (1 + math.exp(-z))

In [7]:
def gradient(sampleList, weights):
    sumElements = 0.0
    for x, y in zip(sampleList, weights):
        sumElements += x * y
    return sigmoid(sumElements)

In [8]:
def stochasticGradientAscent(trainingLists, trainingLabels, featureNumber, iterations = 150):
    sampleNumber = len(trainingLists)
    weights = [1.0] * featureNumber
    for x in range(iterations):
        sampleIndex = list(range(sampleNumber))
        for y in range(sampleNumber):
            alpha = 4/(1.0 + x + y) + 0.01
            randIndex = int(random.uniform(0, len(sampleIndex)))
            sampleGradient = gradient(trainingLists[randIndex], weights)
            error = trainingLabels[randIndex] - sampleGradient
            temp = []
            for index in range(featureNumber):
                temp.append(alpha * (error * trainingLists[randIndex][index]))
            for z in range(featureNumber):
                weights[z] += temp[z]
            del(sampleIndex[randIndex])
        return weights

In [9]:
def classifyList(testList, weights):
    sumElements = 0.0
    for x, y in zip(testList, weights):
        sumElements += x * y
        probability = sigmoid(sumElements)
        if probability > 0.5:
            return 1
        else:
            return 0

In [13]:
print("Apply the stochastic gradient ascent over training samples")
optimalWeights = stochasticGradientAscent(training, trainingLabels, featureNumber, iterations)

Apply the stochastic gradient ascent over training samples


In [16]:
print("Use the obtained weights over test samples for clasifying")

correctPredictions = 0
totalPredictions = 0

for x, y in zip(test, testLabels):
    totalPredictions += 1
    predicted = classifyList(x, optimalWeights)
    if predicted == y:
        correctPredictions += 1
    print("Predicted:", predicted, "Actual:", y)

print("Model accuracy:", round((correctPredictions / totalPredictions) * 100, 2), "%")

Use the obtained weights over test samples for clasifying
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0

In [18]:
logistic = LogisticRegression()

logistic.fit(training, trainingLabels)

predictions = logistic.predict(test)

print("Predictions vs. True Labels:")
for i in range(len(predictions)):
    print("Predicted:", predictions[i], "Actual:", testLabels[i])

accuracy = accuracy_score(testLabels, predictions)
print("Model accuracy: ", round(accuracy * 100, 2), "%")

Predictions vs. True Labels:
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 1 Actual: 1
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 1 Actual: 0
Predicted: 1 Actual: 1
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 1 Actual: 1
Predicted: 1 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 1 Actual: 0
Predicted: 1 Actual: 1
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predicted: 0 Actual: 0
Predi