In [1]:
from linear_regression_scratch import LinearRegressionScratch
from custom_scaler import CustomScaler
from config import config
import csv
import random
from math import sqrt

In [2]:
def preprocess_csv(file_path):
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        data = list(reader)
    headers = data[0]  # Extract headers
    data = data[1:]  # Exclude headers
    X = [list(map(float, row[:-1])) for row in data]
    y = [float(row[-1]) for row in data]
    return X, y, headers

In [3]:
def train_test_split(X, y, test_size=0.2):
    data = list(zip(X, y))
    random.shuffle(data)
    split_index = int(len(data) * (1 - test_size))
    train_data = data[:split_index]
    test_data = data[split_index:]
    X_train, y_train = zip(*train_data)
    X_test, y_test = zip(*test_data)
    return list(X_train), list(y_train), list(X_test), list(y_test)

In [4]:
def mean_absolute_error(y_true, y_pred):
    return sum(abs(y_true[i] - y_pred[i]) for i in range(len(y_true))) / len(y_true)

def root_mean_squared_error(y_true, y_pred):
    mse = sum((y_true[i] - y_pred[i]) ** 2 for i in range(len(y_true))) / len(y_true)
    return sqrt(mse)

In [5]:
file_path = 'experimental_data.csv'  
X, y, headers = preprocess_csv(file_path)

In [6]:
scaler = CustomScaler()
X = scaler.fit_transform(X)

# split the data
X_train, y_train, X_test, y_test = train_test_split(X, y)

In [7]:
model = LinearRegressionScratch(
    lr=config['learning_rate'],
    epochs=config['epochs'],
    regularization=config['regularization'],
    reg_lambda=config['reg_lambda']
)
model.fit(X_train, y_train)

In [8]:
y_pred_test = model.predict(X_test)
print("Mean Squared Error (Test):", model.mean_squared_error(y_test, y_pred_test))
print("R2 Score (Test):", model.r2_score(y_test, y_pred_test))
print("Mean Absolute Error (Test):", mean_absolute_error(y_test, y_pred_test))
print("Root Mean Squared Error (Test):", root_mean_squared_error(y_test, y_pred_test))

Mean Squared Error (Test): 25033.212433101442
R2 Score (Test): 0.8841364180432827
Mean Absolute Error (Test): 121.50143960623298
Root Mean Squared Error (Test): 158.218875084806


In [9]:
# testing some random inputs
custom_data = [
    [4, 2, 2500, 5000, 1.0, 4, 2000, 500],  ## 4-bedroom, 2-bathroom house
    [3, 1.5, 1800, 4000, 1.0, 3, 1500, 300]  # Total random
]

## scale the custom data -- very important in the input functionallity too
scaled_custom_data = scaler.transform(custom_data)


custom_predictions = model.predict(scaled_custom_data)
print("custom predictions for prices of the two test houses :", custom_predictions)


Custom Predictions for Specific Targets: [409872.3267950247, 297371.0032489255]
