In [None]:
import os, cv2, re
import numpy as np
from matplotlib import pyplot as plt

In [None]:
class LogisticRegression():

    def __init__(self):
        '''
            Initialize parameters and set step size to zero.
        '''

        self.w = np.array([0, 0, 0, 0]).T
        self.alpha = 0.1

        
    def load_data(self, trainset_path, mask_path, valimg_path, valmasks_path):
        '''
            Load the validation set and the training set
            
            Input:
                trainset_path - string with path to training images
                mask_path - string with path to training label masks
                valimg_path - string with path to validation images
                valmasks_path - string with path to validation label masks
        '''
        # organize validation image data into stack of pixel values i.e. matrix: (# of pixels x 4)
        # and organize validation mask data into column of labels, y = {0, 1}
        # ---------------------------------------------------------------
        # find total number of pixels in validation set so numpy arrays can be initialized
        num_pixels = 0
        for filename in os.listdir(valmasks_path):
            if filename != '.DS_Store':
                image = cv2.imread(os.path.join(valmasks_path,filename))
                num_pixels += image.shape[0] * image.shape[1]

        self.valset = np.zeros((num_pixels, 3))
        self.vallabels = np.zeros((num_pixels, 1))
        
        # fill the valset and vallabel numpy arrays with validation set data
        start = 0
        for image_filename in os.listdir(valimg_path):
            if image_filename != '.DS_Store':
                
                image = cv2.imread(os.path.join(valimg_path,image_filename))

                size = image.shape[0] * image.shape[1]

                self.valset[start:start + size, :] = image.reshape((-1,3))
                
                image_id = re.search("\d+", image_filename).group()
                label_filename = [file for file in os.listdir(valmasks_path) if image_id + '.jpg' == file]

                label = cv2.imread(os.path.join(valmasks_path,label_filename[0]), cv2.IMREAD_GRAYSCALE)

                self.vallabels[start:start + size, :] = label.reshape((-1, 1)) 

                start += size

        # set all vallabels that are nonzero to one and map image data from 0 to 1
        self.vallabels[self.vallabels > 0] = 1
        self.valset = self.valset / 255
        
        # add column of ones
        self.valset = np.hstack([np.ones([self.valset.shape[0], 1]), self.valset])

        print("Validation set prepared")
        
        # ---------------------------------------------------------------
        # organize training data into stack of pixel values i.e. matrix: (# of pixels x 4)
        # and organize mask data into column of labels, y = {-1, 1}
        # ---------------------------------------------------------------
        # find total number of pixels in training set so numpy arrays can be initialized
        num_pixels = 0
        for filename in os.listdir(mask_path):
            if filename != '.DS_Store':
                image = cv2.imread(os.path.join(mask_path,filename))
                num_pixels += image.shape[0] * image.shape[1]

        # initialize numpy arrays
        trainset = np.zeros((num_pixels, 3))
        labels = np.zeros((num_pixels, 1))

        # fill numpy arrays with data
        start = 0
        for image_filename in os.listdir(trainset_path):
            if image_filename != '.DS_Store':
                
                image = cv2.imread(os.path.join(trainset_path,image_filename))

                size = image.shape[0] * image.shape[1]

                trainset[start:start + size, :] = image.reshape((-1,3))
                
                image_id = re.search("\d+", image_filename).group()
                label_filename = [file for file in os.listdir(mask_path) if image_id + '.jpg' == file]

                label = cv2.imread(os.path.join(mask_path,label_filename[0]), cv2.IMREAD_GRAYSCALE)

                labels[start:start + size, :] = label.reshape((-1, 1)) 

                start += size
           
        # set nonzero labels to 1
        labels[labels>0] = 1 
                
        # get only the red pixels in training set
        self.red_set = trainset[labels[:,0]>0]
        
        # Remove white pixels
        white_pix = self.red_set > np.array([224, 224, 224])
        white_pix = white_pix.astype(int)
        white_pix = white_pix.sum(axis=1)
        self.red_set = self.red_set[white_pix!=3]
        self.red_set = self.red_set / 255
        
        # add column of ones
        self.red_set = np.hstack([np.ones([self.red_set.shape[0], 1]), self.red_set])
        
        self.red_labels = np.ones((self.red_set.shape[0], 1))
        print('red set prepared')
        
        # get only non red pixels in training set
        self.background_set = trainset[labels[:,0]==0]
        # only keep same number in background_set as belong in the red_set
        self.background_set = self.background_set[:self.red_set.shape[0], :]
        self.background_set = self.background_set / 255
        
        # add column of ones
        self.background_set = np.hstack([np.ones([self.background_set.shape[0], 1]), self.background_set])
        
        # create background_labels of y = -1
        self.background_labels = -np.ones((self.background_set.shape[0], 1))
        print('background set prepared')
        # ---------------------------------------------------------------
   
    def train_model(self):
        '''
            Train the logistic regression model with the loaded data.  Use gradient descent to update parameters and 
            plot accuracy on validation set between each iteration.
        '''
        
        count = 1
        self.accuracy = []
        # Perform gradient descent
        while True:
                print('Iteration: ', count)
                
                # get new parameters
                w_new = self.update_parameter(np.vstack((self.red_set, self.background_set)), np.vstack((self.red_labels, self.background_labels)))
                print(w_new)
                count += 1
                
                # check if parameters converge
                if (np.abs((w_new - self.w)) < np.array([10, 10, 10, 10])).all():
                        self.w = w_new
                        print('Convergence!')
                        break

                self.w = w_new
                
                # plot accuracy
                self.accuracy.append(self.validate())
                print('accuracy: ', self.accuracy)
                plt.plot(range(count-1), self.accuracy)
                plt.xlabel('Iterations')
                plt.ylabel('Accuracy')
                plt.title('Accuracy of Segmentation')
                plt.show()

    def update_parameter(self, dataset, labels):
        '''
            Calculate the gradient and calculate new parameter value
            
            Input:
                dataset - numpy array column of pixels
                labels - numpy array column of label values
                
            Output:
                updated parameter value
            
        '''
        gradient = (labels * dataset * (1 - self.sigmoid(labels * (dataset @ self.w).reshape(-1,1)))).sum(axis=0).T
        
        return self.w + self.alpha * gradient

    def sigmoid(self, x):
        '''
            Sigmoid function
            
            Input:
                number or array of numbers
            
            Output:
                result of sigmoid function with input as parameter, either number or array of numbers
        '''
                
        return 1 / (1 + np.exp(-x))

    def validate(self):
        '''
            Calculate accuracy score by segmenting validation set and comparing with mask
            
                Ouput: 
                    result - float accuracy score
        '''

        result = ((self.valset @ self.w) >= 0)
        result = (result.astype(int) == self.vallabels[:,0])
        result = result.sum() / self.valset.shape[0]
        
        return result

In [None]:
dataset_path = '/Users/jamessalem/Documents/ECE276A_PR1/trainset'
mask_path = '/Users/jamessalem/Documents/ECE276A_PR1/masks'
valimg_path = '/Users/jamessalem/Documents/ECE276A_PR1/valset'
vallabel_path = '/Users/jamessalem/Documents/ECE276A_PR1/valmasks'

my_logreg = LogisticRegression()
my_logreg.load_data(dataset_path, mask_path, valimg_path, vallabel_path)



In [None]:
my_logreg.train_model()