<h1 style="align:center"> Logistic Regression From Scratch on Skin Segmentation Data set</h1>

## Import the numpy library and load the data set

In [12]:
import numpy as np
filename_train = "Skin_NonSkin.txt"
filename_test = "Skin_NonSkin_Test.txt"
data = np.genfromtxt(filename_train, dtype=np.float64)
data1=np.genfromtxt(filename_test, dtype=np.float64)
Y = data[:, 3]
Y_test = data1[:,3]

## Simple function to encode (One-Hot Encoding):

In [13]:
def encode():
    for i in range(0, len(Y)):
        if Y[i] == 1:
            Y[i] = 1.0
        else:
            Y[i] = 0.0
    for i in range(0, len(Y_test)):
        if Y_test[i] == 1:
            Y_test[i] = 1.0
        else:
            Y_test[i] = 0.0

## Function to normalize the data

In [14]:
def normalize():
    max1 = max(data[:, 0])
    min1 = min(data[:, 0])
    max2 = max(data[:, 1])
    max3 = max(data[:, 2])
    min2 = min(data[:, 1])
    min3 = min(data[:, 2])
    data[:, 0] = (data[:, 0] - min1) / (max1 - min1)
    data[:, 1] = (data[:, 1] - min2) / (max2 - min2)
    data[:, 2] = (data[:, 2] - min3) / (max3 - min3)

## Our Hypothesis Function

In [15]:
def hOFx(theta, x):
    z = 0
    for i in range(0, len(theta)):
        z += x[i] * theta[i]
    return sig(z)

## Sigmoid Function

In [16]:
def sig(z):
    s= float(1.0 / float((1.0 + np.exp(-1.0 * z))))
    return s

## Cost Function

In [17]:
def cost_func(X, Y, theta, m):
    err = 0
    for i in range(m):
        xi = X[i]
        hi = hOFx(theta, xi)
        e=(Y[i]*np.log(hi))+((1 - Y[i])*np.log(1 - hi))
        err += e
    J = (-1.0/m)*err
    return J

## Calculation of derivative of cost function

In [18]:
def derivative_cost(X, Y, theta, j, m, a):
    err=0
    for i in range(0, m):
        hx=hOFx(theta,X[i])
        e=(hx-Y[i])*X[i,j]
        err+=e
    m=len(Y)
    J=float(a/m)*err
    return J

## Gradient Descent Function

In [19]:
def grad(X, Y, theta, m, alpha):
    t= []
    for j in range(0, len(theta)):
        cd = derivative_cost(X, Y, theta, j, m, alpha)
        nj = theta[j] - cd
        t.append(nj)
    return t

## Logistic Regression Function

In [20]:
def Logistic_Regression(X, Y, a1, theta,iters):
    m = len(Y)
    encode()
    normalize()
    for x in range(0,iters):
        nt= grad(X, Y, theta, m,a1)
        theta = nt
        if x % 100 == 0:
            cost_func(X, Y, theta, m)
            print("Theta=", theta)
            print("Cost=", cost_func(X, Y, theta, m))
    return theta

## Driver Program

In [21]:
theta0 = [0, 0, 0]
global theta
a = 0.1
iter = 1000
X = data[:, 0:3]
theta=Logistic_Regression(X, Y, a, theta0, iter)
print(theta)
e=np.zeros(shape=(len(data1),1),dtype=np.float64)
with open("skin_notskin_result.txt", "w") as infile:
    for i in range(0,len(data1)):
        e[i]=hOFx(theta,X[i])
        if e[i]>0.40:
            infile.write(str(data1[i])+"\t\t"+str(1.0)+"\n")
            e[i] = 1.0
        else:
            infile.write(str(data1[i])+"\t\t"+str(0.0)+ "\n")
            e[i] = 0.0
tot=0.0
for i in range(0,len(data1)):
    if Y_test[i]==e[i]:
        tot+=1.0
print(data1)
print(e)
print("Accuracy Score= ",tot/len(data1))

Theta= [-0.015255458589734051, -0.014050601846047906, -0.007551242441610384]
Cost= 0.6883264150545672
Theta= [-0.8169636848699138, -0.6625041251601019, -0.06811710372257088]
Cost= 0.5577944551753007
Theta= [-1.1812664274118307, -0.8618273297884026, 0.25516016940400627]
Cost= 0.5297007453723476
Theta= [-1.4642907760527926, -0.9874900216951243, 0.6087121900787088]
Cost= 0.5075971689085714
Theta= [-1.717549443969175, -1.0934333598460222, 0.9438444198462651]
Cost= 0.4888267398446989
Theta= [-1.9515448886494822, -1.1899611384182, 1.2539038362371488]
Cost= 0.47280398037428917
Theta= [-2.169370595171101, -1.2794506773326304, 1.5398822103684313]
Cost= 0.4590785194056733
Theta= [-2.3726703401005302, -1.362760505166809, 1.804042828226949]
Cost= 0.4472722477723135
Theta= [-2.5627497814599316, -1.440447311894005, 2.0486578503081603]
Cost= 0.43707131003810773
Theta= [-2.740775035433081, -1.512989950717077, 2.2757718368630986]
Cost= 0.42821718505031503
[-2.9061853768340895, -1.5801691317686346, 2.48