In [27]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn import decomposition


In [74]:
def return_list_of_data(bpt = 2000):
    data_dir = 'E:\ELL_Project\problem3\\'
    lis_folders = os.listdir(data_dir+'.')[:-1]
    X = []
    cur_class = 0
    for folder in lis_folders:
        i=0
        lis_files = os.listdir(data_dir+folder+'\.')
        im = []
        for files in lis_files:
            i+=1
            if(i==bpt+1):
                break
            img = '\\'.join([data_dir,folder,files])
            image = plt.imread(img)
            im.append(image.flatten().reshape(-1,1))
        im_np = np.array(im)[:,:,0]
        im_np = np.append(im_np,cur_class*np.ones((im_np.shape[0],1)),axis=1)
        cur_class+=1
        X.append(im_np)
    return X

def dimensional_shrinking_pca(X,n_components=2):
    pca = decomposition.PCA(n_components)
    X_train = X[0]
    for x in X[1:]: 
        X_train = np.append(X_train,x,axis=0)
    X_pca = X_train
    np.random.shuffle(X_pca)
    pca.fit(X_pca[:,:-1])
    X_pca_ret = np.zeros((X_train.shape[0],n_components))
    y_pca_ret = X_pca[:,-1].reshape(-1,1)
    X_pca_ret = pca.transform(X_pca[:,:-1])

    return (X_pca_ret,y_pca_ret)

def one_hot(y,num_feats):
    y = y.reshape(-1,1)
    print(y)
    ret = np.zeros((y.shape[0],num_feats))
    rows = np.arange(y.shape[0])
    # print(y)
    # print(rows)
    ret[ rows , y[:,0].astype(int)] = 1
    return ret

def import_dataset(X,y,split_percent = 70):
    
    datasize = X.shape[0]

    split_point = split_percent//10

    X_train = X[:(datasize*split_point)//10,:]
    y_train = y[:(datasize*split_point)//10,:]
    X_test = X[(datasize*split_point)//10:,:]
    y_test = y[(datasize*split_point)//10:,:]

    return(X_train,X_test,y_train,y_test)

In [75]:
def lin_hyp(X,theta):
    return (np.dot(X,theta))
def null_regularizer(alpha,theta):
    a = np.zeros(theta.shape)
    return(a,a)
def l1_reg(alpha,theta):
    reg_loss = alpha*theta
    reg_grad = alpha
    return(reg_loss,reg_grad)
def l2_reg(alpha,theta):
    reg_loss = alpha * np.square(theta)
    reg_grad = 2 * alpha * theta
    return(reg_loss,reg_grad)
def elastic_net_reg(lambda1,theta,lambda2=0):
    if(lambda2==0):
        lambda2=lambda1
    a1,a2 = l1_reg(lambda1,theta)
    b1,b2 = l2_reg(lambda2,theta)
    return( a1+b1 , a2+b2 )

In [76]:
def mse_loss(X,theta,y,hypothesis):
    siz = y.shape[0]
    h = hypothesis(X,theta)
    diff = h-y
    mse = ( np.sum(np.square(diff),axis=0))[0] / siz
    gradient = np.dot(np.transpose(X),diff) / siz
    return(h,mse,gradient)

def mae_loss(X,theta,y,hypothesis):
    siz = y.shape[0]
    num_feat = y.shape[1]
    h = hypothesis(X,theta)
    diff = h-y
    diff_sign = np.ones((siz,1))
    diff_sign[diff[:,0]<0] = -1
    mae = np.sum(np.abs(h),axis=0)[0] / siz
    gradient = np.sum(diff_sign*X,axis=0) / siz
    return(h,mae,gradient)

def ce_loss(X,theta,y,hypothesis):
    siz = y.shape[0]
    h = hypothesis(X,theta)
    h_comp = 1-h
    diff = h - y
    ce = np.sum(-y*log(h)-(1-y)*log(1-h),axis=0) [0] / siz
    gradient = np.dot(np.transpose(X),diff) / siz
    return(h,ce,gradient)   

In [77]:
def gradDesc(X,y,theta,hypothesis,loss_function,regularizer,alpha):
    h,loss,gradient = loss_function(X,theta,y,hypothesis)
    reg_loss,reg_grad = regularizer(alpha,theta)
    loss += reg_loss
    gradient += reg_grad

    return(loss,gradient)

In [None]:
def logreg(X,y,iter=1000,alpha=0.01,batchSize=32,n_components=2,classes=6):
    (X_mean,X_var,X) = feature_scaling(X)
    X = np.append(np.ones((X.shape[0],1)),X,axis=1)
    theta = np.random.random((X.shape[1],classes))
    datasize = X.shape[0]
    for cl in range(classes):
        y_class = y[:,cl:cl+1]
        loss_epoch,_ = gradDesc(X,y_class,theta[:,cl:cl+1],lin_hyp,mse_loss,l2_reg,alpha)
        for i in range(iter+1):
            if((i)%100000==0):
                print('Loss for {} iterations: {}'.format(i,np.sum(loss_epoch,axis=0)[0]))
            fro = 0 
            loss_epoch = 0
            while(True):
                to = min(fro+batchSize,datasize)
                l,theta_grad = gradDesc(X[fro:to,:],y_class[fro:to,:],theta[:,cl:cl+1],lin_hyp,mse_loss,l2_reg,alpha)
                loss_epoch += l
                theta[:,cl:cl+1] -= (alpha*theta_grad)
                fro = to

                if(to>=datasize):
                    break
    return (X_mean,X_var,theta,loss_epoch)
def sigmoid(X):
    ret = 1/(1+np.exp(-X))
    return ret

In [None]:
def feature_scaling(X_train):
    training_size = X_train.shape[0]
    X_mean = np.sum(X_train,axis=0) / training_size
    X_var = np.sqrt(np.sum((np.square(X_train-X_mean)),axis=0)/training_size)
    X_train_reg = (X_train - X_mean) / X_var
    return (X_mean,X_var,X_train_reg)

In [119]:
def predicter(X_mean, X_var, X_test, opt_theta):
    X_test_norm = (X_test - X_mean) / X_var
    X_test_cross = X_test_norm
    test_size = X_test_cross.shape[0]
    X_test_cross = np.append( np.ones((test_size,1)) , X_test_cross , axis=1)
    y_pred = np.dot(X_test_cross, opt_theta)
    y_pred = sigmoid(y_pred)
    y_pred_norm = y_pred / (np.sum(y_pred,axis=1).reshape(-1,1))
    y_pred_class = y_pred.argmax(1)
    return y_pred_class,y_pred_norm

def acc(X_mean,X_var,X_test,y_test,opt_theta,n_classes=6):
    y_pred_class,y_pred_norm = predicter(X_mean, X_var, X_test, opt_theta)
    y_pred_class=y_pred_class.reshape(-1,1)
    y_test_one_hot = one_hot(y_test,n_classes)
    y_pred_one_hot = one_hot(y_pred_class,n_classes)
    print(y_test.shape)
    print(y_pred_class.shape)
    print(y_test[y_test[:,0]==y_pred_class[:,0],0].shape)
    print('OVERALL ACCURACY = {}'.format(y_test[y_test[:]==y_pred_class[:]].shape[0]))
    for i in range(n_classes):
        print('class {} .......................................'.format(i))
        accuracy_metrics_classif(y_pred_one_hot[:,i:i+1],y_test_one_hot[:,i:i+1])

def accuracy_metrics_classif(y_pred,y_test):
    
    loss_y = y_pred - y_test
    test_size = y_pred.shape[0]
    total_loss_y = np.dot(np.ones((1,test_size)),np.square(loss_y))[0,0] / test_size
    y_pred_thresh = y_pred>=0.5

    tp = np.sum((y_pred_thresh+y_test)==2 , axis=0)[0]
    tn = np.sum(y_pred_thresh==y_test , axis=0)[0] - tp
    fp = np.sum(y_pred_thresh , axis=0)[0]-tp
    fn = test_size-tp-tn-fp

    print('tp: {} , tn: {} , fp: {} , fn: {}'.format(tp,tn,fp,fn))
    acc = (tp+tn)/test_size
    prec = (tp)/(tp+fp)
    recl = (tp)/(tp+fn)
    f1 = 2*prec*recl/(prec+recl)
    print('Accuracy: {}'.format( acc  ))
    print('Precision: {}'.format( prec  ))
    print('Recall: {}'.format( recl  ))
    print('F1 score: {}'.format( f1  ))


In [120]:
def model_run(X_train,X_test,y_train,y_test,degree = 3, plot = False,alpha=0.0001,n_classes = 6):
    # X_mean,X_var, X_train_feat_scaled = feature_scaling(X_train , X_test)
    y_train_one_hot = one_hot(y_train,n_classes)
    y_test_one_hot = one_hot(y_test,n_classes)
    train_datasize = X_train.shape[0]
    X_mean, X_var, opt_theta, train_loss = logreg(X_train,y_train_one_hot,50000,alpha,train_datasize,degree,n_classes)
    y_pred_class,y_pred_norm = predicter(X_mean, X_var, X_test, opt_theta)
    acc(X_mean,X_var,X_test,y_test,opt_theta)

    return(X_mean, X_var,opt_theta, degree,y_pred)
# print_accuracy_metric_classif(X_mean,X_var,X_train,X_test,y_train,opt_theta,degree=2)

In [121]:
n_components = 2
n_classes = 6
data = return_list_of_data(bpt = 1000)
X_pca_ret,y_pca_ret = dimensional_shrinking_pca(data,n_components)
X_train,X_test,y_train,y_test = import_dataset(X_pca_ret,y_pca_ret)

In [122]:
X_mean, X_var,opt_theta, degree,y_pred = model_run(X_train,X_test,y_train,y_test,power,False)


[[1.]
 [3.]
 [3.]
 ...
 [3.]
 [5.]
 [1.]]
[[1.]
 [2.]
 [3.]
 ...
 [2.]
 [5.]
 [1.]]
Loss for 0 iterations: 2.1291350733539534
Loss for 0 iterations: 4.046016816906684
Loss for 0 iterations: 2.2472293475704808
Loss for 0 iterations: 3.3995341057385655
Loss for 0 iterations: 2.4648503465528404
Loss for 0 iterations: 4.1948049730353105
[[1.]
 [2.]
 [3.]
 ...
 [2.]
 [5.]
 [1.]]
[[1]
 [2]
 [3]
 ...
 [2]
 [4]
 [1]]
(1800, 1)
(1800, 1)
(1169,)
OVERALL ACCURACY = 1169
class 0 .......................................
tp: 0 , tn: 1492 , fp: 0 , fn: 308
Accuracy: 0.8288888888888889
Precision: nan
Recall: 0.0
F1 score: nan
class 1 .......................................
tp: 311 , tn: 1283 , fp: 206 , fn: 0
Accuracy: 0.8855555555555555
Precision: 0.6015473887814313
Recall: 1.0
F1 score: 0.751207729468599
class 2 .......................................
tp: 299 , tn: 1189 , fp: 312 , fn: 0
Accuracy: 0.8266666666666667
Precision: 0.48936170212765956
Recall: 1.0
F1 score: 0.6571428571428571
class 3 ....

NameError: name 'y_pred' is not defined

In [118]:
# a = np.array([1,2,2,2])
# print(a.reshape(2,2))

[[1 2]
 [2 2]]
