In [12]:
import numpy as np
from sklearn import preprocessing

def step_gradient(X, Y, m, learning_rate):
    m_slope = np.zeros(len(X[0]))
    for i in range(len(X)):
        x = X[i]
        y = Y[i]
        for j in range(len(x)):
            m_slope[j] += (-2/len(X)) * (y - sum(m*x)) * x[j]
    new_m = m - (learning_rate * m_slope)
    return new_m

def cost(m, x, y):
    total_cost = 0
    for i in range(len(x)):
        total_cost += (1/len(x)) * ((y[i] - sum(m*x[i])) ** 2)
    mse = total_cost / 2
    return mse

def gd(x, y, learning_rate, iterations):
    m = np.zeros(len(x[0]))
    for i in range(iterations):
        m = step_gradient(x, y, m, learning_rate)
        print("Iteration:", i, "Cost:", cost(m, x, y))
    return m

def main():
    # Load training and testing data
    training = np.loadtxt('D:/Train.csv', delimiter=',')
    testing = np.loadtxt('D:/Test.csv', delimiter=',')
    
    # Separate features and target
    x = training[:, :-1]
    y = training[:, -1]
    
    # Feature engineering (if justified)
    # Example: adding squared values and interactions
    sq_features = []
    for i in range(len(x[0])):
        for j in range(i, len(x[0])):
            for k in range(j, len(x[0])):
                sq_features.append(x[:, i] * x[:, j] * x[:, k])
    sq_features = np.array(sq_features)
    for i in sq_features:
        x = np.append(x, i.reshape(-1, 1), axis=1)
    
    # Feature scaling
    scaler = preprocessing.StandardScaler()
    scaler.fit(x)
    x = scaler.transform(x)
    
    # Add a column of ones for bias term
    x = np.append(x, np.ones(len(x)).reshape(-1, 1), axis=1)
    
    # Perform Gradient Descent
    iterations = 300
    learning_rate = 0.005
    m = gd(x, y, learning_rate, iterations)
    
    # Prepare and scale the testing data
    sq_features = []  # Repeat the same feature engineering on testing data
    for i in range(len(testing[0])):
        for j in range(i, len(testing[0])):
            for k in range(j, len(testing[0])):
                sq_features.append(testing[:, i] * testing[:, j] * testing[:, k])
    sq_features = np.array(sq_features)
    for i in sq_features:
        testing = np.append(testing, i.reshape(-1, 1), axis=1)
    
    testing_scaled = scaler.transform(testing)
    x_test = np.append(testing_scaled, np.ones(len(testing_scaled)).reshape(-1, 1), axis=1)
    
    # Make predictions
    predictions = np.dot(x_test, m)
    
    # Save predictions to a CSV file
    np.savetxt(fname='predictions.csv', delimiter=',', fmt='%.5f', X=predictions)

if __name__ == "__main__":
    main()


Iteration: 0 Cost: 277.78547596199525
Iteration: 1 Cost: 268.4140907697869
Iteration: 2 Cost: 261.18270913529346
Iteration: 3 Cost: 254.70619195504727
Iteration: 4 Cost: 248.69422419132306
Iteration: 5 Cost: 243.02955811592653
Iteration: 6 Cost: 237.64259341676922
Iteration: 7 Cost: 232.48607079856004
Iteration: 8 Cost: 227.52607342839798
Iteration: 9 Cost: 222.73742812668127
Iteration: 10 Cost: 218.10097690756317
Iteration: 11 Cost: 213.60183789659354
Iteration: 12 Cost: 209.22824344884876
Iteration: 13 Cost: 204.97073624769587
Iteration: 14 Cost: 200.82159814532562
Iteration: 15 Cost: 196.77443610634722
Iteration: 16 Cost: 192.82387735033328
Iteration: 17 Cost: 188.96534212621012
Iteration: 18 Cost: 185.19487264293147
Iteration: 19 Cost: 181.50900317722284
Iteration: 20 Cost: 177.90466070620548
Iteration: 21 Cost: 174.3790883742372
Iteration: 22 Cost: 170.92978617391972
Iteration: 23 Cost: 167.55446469337525
Iteration: 24 Cost: 164.2510088425843
Iteration: 25 Cost: 161.01744924418253