In [1]:
import numpy as np
import cv2
import glob
from MyPCA import MyPCA
from sklearn.model_selection import train_test_split

In [2]:
class LogisticRegression:
    def __init__(self, learn_rate = 0.001, num_iters = 100):
        self.learning_rate = learn_rate
        self.n_iters = num_iters
        self.weights = None
        self.bias = None
        
    def train(self, data, labels):
        self.data = self.add_bias_col(data)
        self.n_samples, self.n_features = self.data.shape 
        self.classes = np.unique(labels)
        self.class_labels = {c:i for i,c in enumerate(self.classes)}
        labels = self.one_hot_encode(labels)
        self.weights = np.zeros(shape=(len(self.classes),self.data.shape[1]))
        for _ in range(self.n_iters):
            y = np.dot(self.data, self.weights.T).reshape(-1,len(self.classes)) ## y = m*x + c
            ## apply softmax
            y_predicted = self.softmax(y)
            #y_predicted = self.sigmoidfn(y)
            
            # compute gradients
            dw = np.dot((y_predicted - labels).T, self.data)
            # update parameters
            self.weights -= self.learning_rate * dw
        print(self.weights)
    
    def add_bias_col(self,X):
        return np.insert(X, 0, 1, axis=1)
    
    def one_hot_encode(self, y):
        return np.eye(len(self.classes))[np.vectorize(lambda c: self.class_labels[c])(y).reshape(-1)]
    '''
    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)
    '''
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=1).reshape(-1,1)
    
    def predict(self, X, y):
        X = self.add_bias_col(X)
        pred_vals = np.dot(X, self.weights.T).reshape(-1,len(self.classes))
        self.probs_ = self.softmax(pred_vals)
        pred_classes = np.vectorize(lambda c: self.classes[c])(np.argmax(self.probs_, axis=1))
        return np.mean(pred_classes == y)

In [6]:
def read_data(path):
        img_files = glob.glob(path)
        #print(img_files)
        gray_images = []
        labels = []
        for file in img_files:
            img = cv2.imread(file)
            img = cv2.resize(img,(64,64),interpolation=cv2.INTER_AREA) #None,fx=0.5,fy=0.5
            flat_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).flatten()
            gray_images.append(flat_img)
            lab = ((file.split('/')[-1]).split('_')[0]).lstrip('0')
            if not lab:
                labels.append(0)
            else :
                labels.append(int(lab))
        return np.asarray(gray_images), labels
    
data, labels = read_data("./dataset/*")
pca = MyPCA(n_components = 0.8)
data = pca.fit(data)
print(data.shape)
print(labels)

4096
(520, 4096)
(520, 4096)
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4

In [5]:
def test_train_split(data,labels, percent = 0.8):
    train_size = int(np.shape(data)[0]*percent)/len(np.unique(labels))
    print(train_size)
    label_count = np.zeros(len(np.unique(labels)))
    train_X, test_X, train_y, test_y = [], [], [], []
    for i in range(len(data)):
        if label_count[labels[i]] < train_size:
            train_X.append(data[i])
            train_y.append(labels[i])
            label_count[labels[i]]+=1
        else:
            test_X.append(data[i])
            test_y.append(labels[i])
    return train_X, test_X, train_y, test_y

#train_X, test_X, train_y, test_y = test_train_split(data,labels, percent = 0.8)
train_X, test_X, train_y, test_y = train_test_split(data, labels, train_size=0.8, random_state=666)
print(np.shape(train_X), np.shape(test_X), np.shape(train_y))
logreg = LogisticRegression()
logreg.train(np.asarray(train_X), np.asarray(train_y))
print(logreg.predict(np.asarray(test_X), np.asarray(test_y)))

(416, 4096) (104, 4096) (416,)
[[-1.57850475e-02  9.95677569e-15  1.18620220e-14 ...  3.13691784e-01
   7.82973391e-02 -1.59373176e-01]
 [ 4.72893853e-01 -2.33775333e-14 -6.00215448e-14 ... -5.91954293e-02
   1.92096658e-01  2.29958730e-02]
 [-1.68166905e-01  5.93023780e-15  7.51430982e-15 ... -2.08863988e-01
  -1.61255961e-02 -1.12150173e-01]
 ...
 [ 1.10973531e-01 -2.77852312e-14 -6.98699429e-15 ... -3.08008345e-01
   8.12316300e-02  1.70591496e-01]
 [-6.45200420e-01  3.33977355e-14  3.55255429e-14 ...  1.01410024e-01
   1.06618356e-02 -1.44772117e-01]
 [ 1.27572793e-01 -1.33444928e-14  1.25060966e-14 ...  4.77806290e-01
  -2.76736415e-01 -6.77700393e-03]]
0.7692307692307693
