In [52]:
import csv
import random

In [53]:
data = []

with open("insurance.csv", "r") as file:
    reader = csv.reader(file)
    header = next(reader)

    for row in reader:
        data.append(row)

print("Header:", header)
print("First row:", data[0])

Header: ['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges']
First row: ['19', 'female', '27.9', '0', 'yes', 'southwest', '16884.924']


In [54]:
X = []
y = []

for row in data:
    age = float(row[0])
    sex = 1 if row[1] == "male" else 0
    bmi = float(row[2])
    children = float(row[3])
    smoker = 1 if row[4] == "yes" else 0

    # region encoding
    if row[5] == "southeast":
        region = 1
    elif row[5] == "southwest":
        region = 2
    elif row[5] == "northwest":
        region = 3
    else:
        region = 0

    charges = float(row[6])

    X.append([age, sex, bmi, children, smoker, region])
    y.append(charges)

print("Encoded sample:", X[:2])

Encoded sample: [[19.0, 0, 27.9, 0.0, 1, 2], [18.0, 1, 33.77, 1.0, 0, 1]]


In [55]:
combined = list(zip(X, y))
random.shuffle(combined)

split = int(0.8 * len(combined))
train = combined[:split]
test = combined[split:]

X_train = [i[0] for i in train]
y_train = [i[1] for i in train]

X_test = [i[0] for i in test]
y_test = [i[1] for i in test]


In [56]:
for j in range(len(X_train[0])):
    col = [row[j] for row in X_train]
    mn = min(col)
    mx = max(col)

    for row in X_train:
        row[j] = (row[j]-mn)/(mx-mn+1e-8)

    for row in X_test:
        row[j] = (row[j]-mn)/(mx-mn+1e-8)


In [57]:
def cost_function(X, y, w, b, reg=None, lam=0):

    n = len(y)
    total = 0

    for i in range(n):
        pred = sum(w[j]*X[i][j] for j in range(len(w))) + b
        total += (y[i]-pred)**2

    mse = total/n

    if reg == "ridge":
        mse += lam*sum(wj**2 for wj in w)

    elif reg == "lasso":
        mse += lam*sum(abs(wj) for wj in w)

    return mse


In [58]:
class MyRegression:

    def __init__(self, lr=5e-7, epochs=3000, reg=None, lam=1):

        self.lr = lr
        self.epochs = epochs
        self.reg = reg
        self.lam = lam
        self.w = None
        self.b = 0
        self.history = []

    def fit(self, X, y):

        n = len(y)
        features = len(X[0])

        self.w = [0]*features
        self.b = 0

        for epoch in range(self.epochs):

            for i in range(n):

                pred = sum(self.w[j]*X[i][j] for j in range(features)) + self.b
                error = pred - y[i]

                for j in range(features):

                    grad = error*X[i][j]

                    if self.reg == "ridge":
                        grad += self.lam*self.w[j]

                    elif self.reg == "lasso":
                        sign = 1 if self.w[j]>0 else -1
                        grad += self.lam*sign

                    self.w[j] -= self.lr*grad

                self.b -= self.lr*error

            cost = cost_function(X, y, self.w, self.b, self.reg, self.lam)
            self.history.append(cost)

        print(self.reg.upper(), "training complete")

    def predict(self, X):

        preds = []

        for row in X:
            pred = sum(self.w[j]*row[j] for j in range(len(self.w))) + self.b
            preds.append(pred)

        return preds


In [59]:
ridge = MyRegression(reg="ridge", lam=10)
ridge.fit(X_train, y_train)

print("Ridge Weights:", ridge.w)


RIDGE training complete
Ridge Weights: [243.53148330030453, 183.39143391804143, 158.5990979555795, 82.17381498388086, 441.54682822974877, 119.71807938326894]


In [60]:
lasso = MyRegression(reg="lasso", lam=0.1)
lasso.fit(X_train, y_train)

print("Lasso Weights:", lasso.w)


LASSO training complete
Lasso Weights: [4057.0166856307483, 2640.27580068022, 2888.309265376093, 1452.110348154201, 6456.848527671781, 2177.6031634603237]


In [61]:
preds = ridge.predict(X_test)

for i in range(5):
    print("Actual:", y_test[i], "Predicted:", preds[i])


Actual: 4137.5227 Predicted: 10191.18100546561
Actual: 6555.07035 Predicted: 10247.131609543367
Actual: 14001.2867 Predicted: 10433.495352622347
Actual: 17942.106 Predicted: 10904.507809258921
Actual: 4686.3887 Predicted: 10537.29986698503


In [62]:
mse = sum((preds[i]-y_test[i])**2 for i in range(len(preds))) / len(preds)
print("MSE:", mse)


MSE: 152352044.44250143
