# Linear Models
This file is for test and creation of linear/logistic regression models
1. Linear_regression built - testing to be done
2. Logistic_regression to be built as a class
3. Neural_network - simple perceptron to be built as class

In [1]:
import numpy as np

In [8]:
"""
 feature scaling occurs across categories i.e. we're training a model
 to recognise dogs, we normalise across:
 tail length, leg length, nose-pointy-outness
                  normalisation (x-xmin)/(xmax-xmin)
                 standardisation (X-Xsample_mean)/Xsample_std
"""
def normalize(X):
    # normalize across features
    m,n = X.shape
    for col in range(n):
        X[:,col] = ((X[:,col]-np.min(X[:,col]))/(np.max(X[:,col])-np.min(X[:,col])))
    return X
def standardize(X):
    # standardize across features
    m,n = X.shape
    for col in range(n):
        X[:,col] = (X[:,col]-np.mean(X[:,col]))/np.std(X[:,col])
    return X
# Creation of linear regression class:
class Linear_regression:
    """Linear regression model
    Parameters
    -----------
    alpha : float, default 0.01
        learning rate of gradient descent
    degree_accuracy : float, default 0.05
        degree of accuracy that linear
        regression is looking for during 
        gradient descent
    feature_scaling : string, default None
        options: None,'normalize'
        """
    def __init__(self,alpha=0.01, degree_accuracy=0.05,feature_scaling=None):
        self.alpha = alpha
        self.degree_accuracy = degree_accuracy
        self.thetas_set = False
        self.feature_scaling_options = [None,'normalize']
        assert feature_scaling in self.feature_scaling_options,"no such feature scaling option"
        self.feature_scaling = feature_scaling
        self.theta = np.array([])
    def generate_theta(self,X):
        self.theta = np.random.rand(X.shape[1],1)
    def add_intercept(self,X):
        return np.concatenate((X,np.ones((X.shape[0],1))),axis=1)
    def computeCost(self,X,y,theta):
        # Cost function
        m,n=X.shape
        J=0
        X=(np.dot(X,theta))
        J=sum(np.power(X-y,2))
        J*=(1.0/(2.0*m))
        return J
    def coefficients(self):
        # prints coefficients
        print(self.theta[:-1,:])
    def intercept(self):
        # print intercept
        print(self.theta[-1,:])
    def fit(self,X,y):
        """fits model
        Parameters
        --------------
        X : numpy array
            Array should be independent variables.
            Shape must be m * n, where m is cases
            and n is features
        y : numpy array
            Array should be dependent variable
            Shape should be m * 1, where m is cases
        Returns
        --------------
        theta : numpy array
            These are the coefficients of the linear
            regression. This will be stored in the 
            model for use in predict"""
        
        alpha=self.alpha
        degree_accuracy=self.degree_accuracy
        if self.thetas_set == False:
            if self.feature_scaling == 'normalize':
                X=normalize(X)
            X = self.add_intercept(X)
            self.generate_theta(X)
            self.thetas_set = True
        theta = self.theta
        h=0
        m,n=X.shape
        J_history=[]
        J=999.0
        iterations=0
        print(theta)
        while J>=degree_accuracy:
            h=np.dot(X,theta)
            #print('h1: {0}'.format(h))
            h=np.dot(X.T,h-y)/float(m)
            #print('h2: {0}'.format(h))
            theta=theta-(h*alpha)
            #print(theta)
            J=self.computeCost(X,y,theta)
            J_history.append(J)
            iterations+=1
            if iterations > 2 and J > J_history[-2]:
                print('J increasing, reducing alpha to: {0}'.format(alpha/2))
                self.generate_theta(X)
                self.alpha = alpha/2
                theta = self.fit(X,y)
                return theta
        #print(J_history)
        print('Returned Thetas are: {0}'.format(theta))
        print('degree of accuracy: %s' % J_history[-1])
        print('number of iterations: %s' % iterations)
        self.theta = theta
        return theta
    def predict(self,X):
        """Use this function to predict y
        from a set of X data
        Parameters
        ------------
        X : numpy array
            independent variables, shaped
            m * n, where m is case and n is 
            feature
        """
        if self.feature_scaling == 'normalize':
            X=normalize(X)
        X = self.add_intercept(X)
        return np.dot(X,self.theta)

In [9]:
# Testing
import matplotlib.pyplot as plt
%matplotlib inline
# Step 1 generate FAKE data
# These are the test thetas being used to generate fake data
theta0=13
theta1=21
X=np.random.randint(1,high=80,size=(90,1))
X=X.reshape(len(X),1)
#one=np.ones(X.shape)
#X=np.concatenate((one,X),axis=1)
y=np.array([[theta0+X[i,0]*theta1] for i in range(len(X))])

# Step 2 - create and fit linear regression model
lr=Linear_regression(feature_scaling=None)
lr.fit(X,y)



[[0.34082353]
 [0.65065563]]
J increasing, reducing alpha to: 0.005
[[0.16133195]
 [0.34896986]]
J increasing, reducing alpha to: 0.0025
[[0.21451453]
 [0.5643427 ]]
J increasing, reducing alpha to: 0.00125
[[0.54974239]
 [0.60002906]]
J increasing, reducing alpha to: 0.000625
[[0.207827  ]
 [0.06004075]]
Returned Thetas are: [[21.01362283]
 [12.34994406]]
degree of accuracy: [0.04998874]
number of iterations: 20000


array([[21.01362283],
       [12.34994406]])

In [10]:
lr.intercept()

[12.34994406]


In [46]:
from scipy.optimize import fmin_cg #fmin_cg to train neural network
class NeuralNetwork:
    def __init__(self,hidden_layer_size=20,epsilon=0.12,lamb=1,feature_scaling=None):
        self.hidden_layer_size=hidden_layer_size
        self.epsilon=epsilon
        self.lamb=lamb
        self.X=np.array([[]])
        self.y=np.array([[]])
        self.theta=np.array([[]])
        self.input_layer_size=5
        self.output_layer_size=1
        self.feature_scaling_options = [None,'normalize','standardize']
        assert feature_scaling in self.feature_scaling_options,"no such feature scaling option"
        self.feature_scaling=feature_scaling
    def sigmoid(self,z):
        return 1 / (1 + np.exp(-z))
    def sigmoidGradient(self,z):
        return np.multiply(self.sigmoid(z),(1-self.sigmoid(z)))
    def initialise_thetas(self,input_layer_size,hidden_layer_size,output_layer_size):
        theta1=np.random.rand(input_layer_size+1,hidden_layer_size)
        theta2=np.random.rand(hidden_layer_size+1,output_layer_size)
        theta=np.array([theta1,theta2])
        theta=self.theta_flatten(theta)*2*self.epsilon-self.epsilon
        return theta
    def theta_flatten(self,theta):
        theta_t=theta[:]
        theta=np.array([])
        #fmin_cg requires a gradient to be (m,0) dimensions
        for x in theta_t:
            theta=np.concatenate((theta,x.flatten()),0)
        #theta=theta.reshape(len(theta),0)
        #print(theta.dtype)
        return theta
    def theta_unflatten(self,theta,input_layer_size,hidden_layer_size,output_layer_size):
        theta1=theta[:(input_layer_size+1)*hidden_layer_size].reshape((input_layer_size+1),hidden_layer_size)
        theta2=theta[(input_layer_size+1)*hidden_layer_size:].reshape(hidden_layer_size+1,output_layer_size)
        return theta1, theta2
    def costFunction(self,theta,X,y,input_layer_size,hidden_layer_size,output_layer_size,lamb):
        m,n=X.shape
        theta1,theta2=self.theta_unflatten(theta,input_layer_size,hidden_layer_size,output_layer_size)
        one=np.ones((m,1))
        a1=np.concatenate((one,X),1)
        a2=np.concatenate((one,self.sigmoid(np.dot(a1,theta1))),1)
        sig=self.sigmoid(np.dot(a2,theta2))
        cost=np.multiply(-y,np.log(sig))-np.multiply((1-y),np.log(1-sig))
        theta1_bias=theta1[1:,:]
        theta2_bias=theta2[1:,:]
        J=(1/m)*sum(sum(cost))+(lamb/(2*m))*(sum(sum(np.square(theta1_bias)))+sum(sum(np.square(theta2_bias))))
        return J
    def nnGradient(self,theta,X,y,input_layer_size,hidden_layer_size,output_layer_size,lamb):
        m,n=X.shape
        theta1,theta2=self.theta_unflatten(theta,input_layer_size,hidden_layer_size,output_layer_size)
        one=np.ones((m,1))
        a1=np.concatenate((one,X),1)
        a2=np.concatenate((one,self.sigmoid(np.dot(a1,theta1))),1)
        sig=self.sigmoid(np.dot(a2,theta2))
        d3=sig-y
        d2=np.dot(d3,theta2.T)
        z2=self.sigmoidGradient(np.concatenate((one,np.dot(a1,theta1)),1))
        d2=np.multiply(d2,z2)
        delta1=np.dot(a1.T,d2[:,1:])
        delta2=np.dot(a2.T,d3)
        one=np.ones((1,hidden_layer_size))
        theta1=np.concatenate((one,theta1[1:,:]),0)
        one=np.ones((1,output_layer_size))
        theta2=np.concatenate((one,theta2[1:,:]),0)
        t1_grad=(1/m)*delta1+(lamb/m)*theta1
        t2_grad=(1/m)*delta2+(lamb/m)*theta2
        grad=self.theta_flatten([t1_grad,t2_grad])
        #print(grad.shape)
        return grad
    def test(self,X):
        if self.feature_scaling == 'normalize':
            X=normalize(X)
        elif self.feature_scaling == 'standardize':
            X=standardize(X)
        m,n=X.shape
        theta1,theta2=self.theta_unflatten(self.theta,self.input_layer_size,self.hidden_layer_size,self.output_layer_size)
        one=np.ones((m,1))
        a1=np.concatenate((one,X),1)
        a2=np.concatenate((one,self.sigmoid(np.dot(a1,theta1))),1)
        sig=self.sigmoid(np.dot(a2,theta2))
        sig[sig>0.5]=1
        sig[sig<0.5]=0
        #print((sig[sig==y].shape[0]/X.shape[0])/output_layer_size)
        return sig
    def train(self,X,y):
        if self.feature_scaling == 'normalize':
            X=normalize(X)
        elif self.feature_scaling == 'standardize':
            X=standardize(X)
        m,n=X.shape
        self.input_layer_size=n
        self.hidden_layer_size=n+1
        m,n=y.shape
        self.output_layer_size=n
        self.theta=self.initialise_thetas(self.input_layer_size,self.hidden_layer_size,self.output_layer_size)
        arg=X,y,self.input_layer_size,self.hidden_layer_size,self.output_layer_size,self.lamb
        self.theta=fmin_cg(self.costFunction,x0=self.theta, fprime= self.nnGradient,args=arg)
        print('Training complete')
        print(self.theta.shape)

In [47]:
nn=NeuralNetwork(feature_scaling=None)


In [58]:
X1=np.random.randint(1,high=20,size=(200,2))
X2=np.random.randint(17,high=32,size=(200,2))
X=np.concatenate((X1,X2),axis=0)
y1=np.zeros((200,1))
y2=np.ones((200,1))
y=np.concatenate((y1,y2),axis=0)

In [15]:
from sklearn.model_selection import train_test_split

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [60]:
nn.train(X_train,y_train)

         Current function value: 0.163121
         Iterations: 146
         Function evaluations: 385
         Gradient evaluations: 372
Training complete
(13,)


In [61]:
np.concatenate((nn.test(X_test),y_test),axis=1)

array([[1., 1.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [1., 1.],
       [0., 1.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 1.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [1., 1.],
       [0., 0.],
       [0., 0.