In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
import numpy as np
EPS = np.float64(1e-12)
train_path = '/content/drive/MyDrive/IITD/assignment2/train1.csv'#"train1.csv"
test_path  = '/content/drive/MyDrive/IITD/assignment2/test1.csv'#"test1.csv"
pred_path  = '/content/drive/MyDrive/IITD/assignment2/test_pred1.csv'#"test_pred1.csv"
param_path = '/content/drive/MyDrive/IITD/assignment2/tests/test1/params.txt'#".\\tests\\test1\\params.txt"

def get_train_data(train_file):
    df = pd.read_csv(train_file)
    #columns_to_update = ['Operating Certificate Number', 'Permanent Facility Id', 'Total Costs', 'Length of Stay', 'Birth Weight']
    #for column in columns_to_update:
    #    df[column] = pd.to_numeric(df[column], errors='coerce')
    #    df[column] = (df[column] > 0).astype(int)

    X0 = df.iloc[:, :-1].values

    # Add bias term (column of ones)
    X_train = np.hstack([np.ones((X0.shape[0], 1)), X0])

    # Extract labels and adjust class labels to start from 0
    Y_train = df.iloc[:, -1].values
    Y_train = Y_train - 1  # Assuming labels start from 1, we subtract 1 to start from 0

    # One-hot encoding of labels
    num_class = len(np.unique(Y_train))
    Y_train_oh = np.eye(num_class)[Y_train]

    return X_train, Y_train, Y_train_oh

def get_test_data(test_file, pred_file):
    X0 = pd.read_csv(test_file).values
    X_test = np.hstack([np.ones((X0.shape[0], 1)), X0])  # Adding bias
    Y_test = pd.read_csv(pred_file).iloc[:, -1].values
    Y_test = Y_test - 1  # Adjust class labels to start from 0

    # One-hot encoding of labels
    num_class = len(np.unique(Y_test))
    Y_test_oh = np.eye(num_class)[Y_test]

    return X_test, Y_test, Y_test_oh

def get_params(param_file):
    with open(param_file, 'r') as file:
        lines = file.readlines()

    # Read parameters from the file
    learning_strategy = np.int32(lines[0].strip())  # First line as an integer
    second_line_values = np.array(lines[1].strip().split(','), dtype=np.float64)

    if len(second_line_values) == 1:
        n0 = second_line_values[0]
        k = None
    else:
        n0, k = second_line_values

    epochs = np.int32(lines[2].strip())
    batch_size = np.int32(lines[3].strip())

    return learning_strategy, n0, k , epochs, batch_size

# Load training data
X_train, Y_train_indices, Y_train_oh = get_train_data(train_path)

# Number of features and classes
n_features = X_train.shape[1]
num_class = len(np.unique(Y_train_indices))

# Initialize weights
W = np.zeros((n_features, num_class), dtype=np.float64)

# Load parameters
strategy, step, k, epochs, batch_size = get_params(param_path)
print(strategy, step, k, epochs, batch_size)

# Calculate the frequency of each class using the original class labels

1 1e-09 None 25 87595


Class Frequencies: [16131  1032 25406 45026]
Loss: 4.770042276676647e-05


In [7]:
#W = np.zeros((n_features, num_class), dtype=np.float64)


print("Class Frequencies:", freq)
pred_probs = softmax(X_train, W)
vertical_sum = np.sum(pred_probs, axis=0)
print(vertical_sum)


Class Frequencies: [16131  1032 25406 45026]
[19204.04391665  2721.69205303 28804.46072119 36864.80330914]


In [None]:
# @title
import numpy as np

def calculate_loss(y_true, y_pred):
    m = y_true.shape[0]
    y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = -np.sum(y_true * np.log(y_pred)) / (2*m)
    return loss

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)





def ternary_search(X, Y, W, base=1e-9, max_iter=20):
    low = 0
    high = base

    gradient = calculate_gradient(X, Y, get_prediction(X, W))

    # Expand the high boundary if necessary
    while calculate_loss(Y, get_prediction(X, W)) > calculate_loss(Y, get_prediction(X, W - high * gradient)):
        high *= 2
        if high > 1e6:  # Prevents the search from expanding indefinitely
            break

    for i in range(max_iter):
        rate_1 = (2 * low + high) / 3
        rate_2 = (2 * high + low) / 3

        losslow = calculate_loss(Y, get_prediction(X, W - low * gradient))
        losshigh = calculate_loss(Y, get_prediction(X, W - high * gradient))
        loss1 = calculate_loss(Y, get_prediction(X, W - rate_1 * gradient))
        loss2 = calculate_loss(Y, get_prediction(X, W - rate_2 * gradient))

        print(f"Iteration {i+1}: low = {low}, high = {high}, rate1 = {rate_1}, rate2 = {rate_2}")
        print(f"Iteration {i+1}: losslow = {losslow}, losshigh = {losshigh}, loss1 = {loss1}, loss2 = {loss2}")

        if loss1 < loss2:
            high = rate_2
        elif loss1 > loss2:
            low = rate_1
        else:
            low = rate_1
            high = rate_2

    return (low + high) / 2


W = np.zeros((n_features, num_class))  # Random initialization

# Find the best learning rate using ternary search
learning_rate = ternary_search(X_train, y_train_oh, W)
print(learning_rate)
epsilon = 1e-7

for i in range(200):
    y_pred = get_prediction(X_train, W)
    loss_old = calculate_loss(y_train_oh, y_pred)
    gradient = calculate_gradient(X_train, y_train_oh, y_pred)

    # Update weights
    Wnew = W - learning_rate * gradient
    y_pred_new = get_prediction(X_train, Wnew)
    loss_new = calculate_loss(y_train_oh, y_pred_new)

    if loss_new < loss_old:
        W = Wnew
    else:
        print("Loss did not decrease. Halving learning rate.")
        learning_rate /= 2  # Halve the learning rate if loss doesn't decrease

    if i% 50 == 0:
        print(f"Iteration {i+1}, Loss: {loss_new}")

    if np.abs(loss_new - loss_old) < epsilon:
        print(f"Converged after {i+1} iterations")
        break



Iteration 1: low = 0, high = 1e-09, rate1 = 3.3333333333333337e-10, rate2 = 6.666666666666667e-10
Iteration 1: losslow = 0.6931471805599445, losshigh = 8.392494847622308, loss1 = 8.370435971279305, loss2 = 8.392494847622308
Iteration 2: low = 0, high = 6.666666666666667e-10, rate1 = 2.2222222222222224e-10, rate2 = 4.444444444444445e-10
Iteration 2: losslow = 0.6931471805599445, losshigh = 8.392494847622308, loss1 = 8.355575473164082, loss2 = 8.379840795402895
Iteration 3: low = 0, high = 4.444444444444445e-10, rate1 = 1.4814814814814817e-10, rate2 = 2.9629629629629634e-10
Iteration 3: losslow = 0.6931471805599445, losshigh = 8.379840795402895, loss1 = 8.337552688256432, loss2 = 8.367301030105443
Iteration 4: low = 0, high = 2.9629629629629634e-10, rate1 = 9.876543209876544e-11, rate2 = 1.9753086419753088e-10
Iteration 4: losslow = 0.6931471805599445, losshigh = 8.367301030105443, loss1 = 8.32382057211481, loss2 = 8.350802356687877
Iteration 5: low = 0, high = 1.9753086419753088e-10, ra