In [None]:
import numpy as np
import pandas as pd
import matplotlib as plt

In [None]:
def import_dataset(split_percent = 70):
    dataset = pd.read_csv('E:\ELL_project\problem2\weather_data.csv')
    dataset = dataset.sample(frac = 1)
    X = dataset.iloc[:,:-1].values
    y = dataset.iloc[:,-1:].values
    datasize = X.shape[0]

    split_point = split_percent//10

    X_train = X[:(datasize*split_point)//10,:]
    y_train = y[:(datasize*split_point)//10,:]
    X_test = X[(datasize*split_point)//10:,:]
    y_test = y[(datasize*split_point)//10:,:]

    return(X_train,X_test,y_train,y_test)


In [None]:
def feature_scaling(X_train,X_test):
    X_mean = np.sum(X_train,axis=0)
    X_var = np.sqrt(np.sum(np.square(X_train - X_mean), axis=0))

    X_train_feat_scaled = (X_train - X_mean ) / X_var
    X_test_feat_scaled = (X_test - X_mean) / X_var

    return (X_mean,X_var, X_train_feat_scaled,X_test_feat_scaled)

In [None]:
def gradDesc(X,y,theta,hypothesis,loss_function,regularizer):
    h,loss,gradient = loss_function(X,theta,y,hypothesis)
    reg_loss,reg_grad = regularizer(theta)
    loss += reg_loss
    gradient += reg_grad

    return(loss,gradient)

In [None]:
def null_regularizer(theta):
    a = np.zeros(theta.shape)
    return(a,a)

In [None]:
def l1_reg(alpha,theta):
    reg_loss = alpha*theta
    reg_grad = alpha
    return(reg_loss,reg_grad)

In [None]:
def l2_reg(alpha,theta):
    reg_loss = alpha * np.square(theta)
    reg_grad = 2 * alpha * theta
    return(reg_loss,reg_grad)

In [None]:
def elastic_net_reg(lambda1,lambda2,theta):
    a1,a2 = l1_reg(lambda1,theta)
    b1,b2 = l2_reg(lambda2,theta)
    return( a1+b1 , a2+b2 )

In [None]:
def mse_loss(X,theta,y,hypothesis):
    siz = y.shape[0]
    h = hypothesis(X,theta)
    diff = h-y
    mse = ( np.sum(np.square(diff),axis=0))[0] / siz
    gradient = np.dot(np.transpose(X),diff) / siz

    return(h,mse,gradient)


In [None]:
def mae_loss(X,theta,y,hypothesis):
    siz = y.shape[0]
    num_feat = y.shape[1]

    h = hypothesis(X,theta)
    diff = h-y
    diff_sign = np.ones((siz,1))
    diff_sign[diff[:,0]<0] = -1

    mae = np.sum(np.abs(h),axis=0)[0] / siz
    gradient = np.sum(diff_sign*X,axis=0) / siz

    return(h,mae,gradient)



In [None]:
def ce_loss(X,theta,y,hypothesis):
    siz = y.shape[0]

    h = hypothesis(X,theta)
    h_comp = 1-h
    diff = h - y

    ce = np.sum(-y*log(h)-(1-y)*log(1-h),axis=0) [0] / siz
    gradient = np.dot(np.transpose(X),diff) / siz

    return(h,ce,gradient)   

In [None]:
def linReg(X,y,iter=100,alpha=0.01,batchSize=32):
    # print(X.shape)
    theta = np.random.random((X.shape[1],1))
    # print(theta)
    datasize = X.shape[0]
    loss_epoch = 0
    for i in range(iter):
        if((i+1)%1000==0 and i>0):
            print('Loss for {} iterations: {}'.format(i+1,loss_epoch))
        fro = 0
        loss_epoch = 0
        while(True):
            to = min(fro+batchSize,datasize)
            l,theta_grad = gradDesc(X[fro:to,:],y[fro:to,:],theta)
            # print(theta_grad)
            loss_epoch += l
            theta -= (alpha*theta_grad)
            fro = to

            if(to>=datasize):
                break
    
    return (theta,loss_epoch)

In [None]:
def feature_scaling(X_train):
    training_size = X_train.shape[0]
    X_mean = np.sum(X_train,axis=0) / training_size
    X_var = np.sqrt(np.sum((np.square(X-X_mean)),axis=0)/training_size)
    X_train_reg = (X_train - X_mean) / X_var
    return (X_mean,X_var,X_train_reg)

In [None]:
def poly_feat(X,degree=2):
    num_feats = X.shape[1]
    num_vals = X.shape[0]
    X_cross = X
    for i in range(num_feats):
        for j in range(i+1,num_feats):
            X_cross = np.append( X_cross ,  np.multiply ( X[:,i:i+1] , X[:,j:j+1] ) , axis=1 )
    for i in range(3,degree):
        X_power = np.power(X,i)
        X_cross = np.append(X_cross,X_power,axis=1)

    return ( X_cross )

In [None]:
def polyReg(X,y,iter=100,alpha=0.01,batchSize=32,degree=2):
    X_mean,X_var,X_norm = feature_scaling(X)
    X_cross = poly_feat(X_norm,degree)
    train_size = X_cross.shape[0]
    X_cross = np.append(np.ones((train_size,1)),X_cross,axis=1)
    opt, tl = linReg(X_cross,y,iter=iter,alpha=alpha,batchSize=batchSize)
    return ( X_mean, X_var, opt , tl )


In [None]:
train_datasize = X_train.shape[0]
X_mean, X_var, opt_theta, train_loss = polyReg(X_train,y_train,200000,0.01,train_datasize,6)
print(X_mean.shape)
print(opt_theta)

In [None]:
def predicter(X_mean, X_var, X_test, opt_theta,degree=2):
    X_test_norm = (X_test - X_mean) / X_var
    X_test_cross = poly_feat(X_test_norm,degree)
    test_size = X_test_cross.shape[0]
    X_test_cross = np.append( np.ones((test_size,1)) , X_test_cross , axis=1)

    # print(X_test_cross.shape)
    # print(opt_theta.shape)
    y_pred = np.dot(X_test_cross, opt_theta)

    return y_pred


In [None]:
def accuracy_metrics(X_mean,X_var,X_test,y_test,opt_theta,degree=2):
    y_pred = predicter(X_mean, X_var, X_test, opt_theta, degree)
    loss_y = y_pred - y_test

    test_size = y_pred.shape[0]
    total_loss_y = np.dot(np.ones((1,test_size)),np.square(loss_y))[0,0] / test_size

    # print(total_loss_y)

    y_pred_thresh = y_pred>=0.5

    tp = np.sum((y_pred_thresh+y_test)==2 , axis=0)[0]
    tn = np.sum(y_pred_thresh==y_test , axis=0)[0] - tp
    fp = np.sum(y_pred_thresh , axis=0)[0]-tp
    fn = test_size-tp-tn-fp


    print('tp: {} , tn: {} , fp: {} , fn: {}'.format(tp,tn,fp,fn))

    acc = (tp+tn)/test_size
    prec = (tp)/(tp+fp)
    recl = (tp)/(tp+fn)
    f1 = 2*prec*recl/(prec+recl)

    print('Accuracy: {}'.format( acc  ))
    print('Precision: {}'.format( prec  ))
    print('Recall: {}'.format( recl  ))
    print('F1 score: {}'.format( f1  ))




In [None]:
print('Train Accuracy')

accuracy_metrics(X_mean,X_var,X_train,y_train,opt_theta,degree=6)
print('..............................................')
print('Test Accuracy')
accuracy_metrics(X_mean,X_var,X_test,y_test,opt_theta,degree=6)