In [1]:
import numpy as np
from scipy.special import softmax
import pandas as pd
train_path = 'data/train.csv'
test_path = 'data/test.csv'

In [2]:
with open('testcases/3/param3.txt') as f:
    lines = f.readlines()

In [3]:
train = pd.read_csv(train_path, index_col = 0)    
test = pd.read_csv(test_path, index_col = 0)
y_train = np.array(train['Length of Stay'])
train = train.drop(columns = ['Length of Stay'])
data = pd.concat([train, test], ignore_index = True)
cols = train.columns
cols = cols[:-1]
data = pd.get_dummies(data, columns=cols, drop_first=True)
data = data.to_numpy()

X_train = data[:train.shape[0], :]
X_test = data[train.shape[0]:, :]

# b = np.zeros((X_train.shape[0], 1))
b = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((b,X_train), axis=1)

# b = np.zeros((X_test.shape[0], 1))
b = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((b,X_test), axis=1)

y_true = pd.get_dummies(y_train)
y_true = y_true.to_numpy()

In [4]:
X_train.shape, X_test.shape, y_true.shape, w.shape

((100000, 1633), (10000, 1633), (100000, 8), (1633, 8))

In [5]:
def y_hat(X,w):
    return softmax(np.dot(X,w),axis = 1)

def gradient(X,w,y_true):
    return np.dot(X.T,(y_hat(X,w)-y_true))/X.shape[0]
    
def loss(X,w,y_true):
    k =np.take_along_axis(y_hat(X,w), np.argmax(y_true,axis = 1)[:,None], axis=1)
    return -np.sum(np.log(k))/X.shape[0]

def fixed_gradient(X,y_true,w,n_iter,lr):
    step = 0
    while step<n_iter:
        w -= lr*gradient(X,w,y_true)
        step += 1
    return w

def adaptive_gradient(X,y_true,w,n_iter,lr):
    step = 0
    while step<n_iter:
        w -= (lr/np.sqrt(step+1))*gradient(X,w,y_true)
        step += 1
    return w
    
def alpha_beta_gradient(X,y_true,w,n_iter,alpha,beta,lr):
    step = 0
    while step<n_iter:
        new_lr = lr
        grad = gradient(X,w,y_true)
        diff = -alpha*new_lr*(np.linalg.norm(grad, ord = 'fro')**2)
        curr_loss = loss(X,w,y_true)
        new_loss = loss(X,w - new_lr*grad,y_true)

        while new_loss - curr_loss> diff:
            new_lr *= beta
            new_loss = loss(X,w - new_lr*grad,y_true)
            diff *= beta
        
        step+=1
        w -= new_lr*gradient(X,w,y_true)
    return w

In [21]:
def mini_batch_gradient(X,y_true,w,n_iter,batch_size,lr):
    steps = X.shape[0]//batch_size
    i= 0
    while i<n_iter:
        for j in range(steps):
            grad = gradient(X[j*batch_size:(j+1)*batch_size,:],w,y_true[j*batch_size:(j+1)*batch_size,:])
            w -= lr*grad
        i+=1
        print(i,end = '\t')
    return w

In [22]:
w = np.zeros(shape = (X_train.shape[1], y_true.shape[1]))
w_new = mini_batch_gradient(X_train,y_true,w,n_iter = 50,batch_size = 200,lr = 0.2)

1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18	19	20	21	22	23	24	25	26	27	28	29	30	31	32	33	34	35	36	37	38	39	40	41	42	43	44	45	46	47	48	49	50	

In [23]:
w_new.flatten()

array([1.19890632, 0.7060623 , 0.1191276 , ..., 0.02092259, 0.08047424,
       0.09566638])

In [24]:
y_test = y_hat(X_test,w_new)
y_out = y_test.argmax(axis = 1)+1
y_out

array([8, 8, 8, ..., 1, 1, 2])

In [None]:
np.savetxt('outputfile.txt', y_out, delimiter='\n')
np.savetxt('weightfile.txt', w_new.flatten(), delimiter='\n')