# Error Based Learning (Linear Regression)

by: Blaise Geronimo

# Data Loading

In [1]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDClassifier

In [2]:
# fetch dataset
wine_quality = fetch_ucirepo(id=186)

# data (as pandas dataframes)
X = wine_quality.data.features
y = wine_quality.data.targets

In [3]:
# metadata
print(wine_quality.metadata)

{'uci_id': 186, 'name': 'Wine Quality', 'repository_url': 'https://archive.ics.uci.edu/dataset/186/wine+quality', 'data_url': 'https://archive.ics.uci.edu/static/public/186/data.csv', 'abstract': 'Two datasets are included, related to red and white vinho verde wine samples, from the north of Portugal. The goal is to model wine quality based on physicochemical tests (see [Cortez et al., 2009], http://www3.dsi.uminho.pt/pcortez/wine/).', 'area': 'Business', 'tasks': ['Classification', 'Regression'], 'characteristics': ['Multivariate'], 'num_instances': 4898, 'num_features': 11, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['quality'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2009, 'last_updated': 'Wed Nov 15 2023', 'dataset_doi': '10.24432/C56S3T', 'creators': ['Paulo Cortez', 'A. Cerdeira', 'F. Almeida', 'T. Matos', 'J. Reis'], 'intro_paper': {'ID': 252, 'type': 'NATIVE', 'title': 'Modeling wine preferences

In [4]:
# variable information
print(wine_quality.variables)

                    name     role         type demographic  \
0          fixed_acidity  Feature   Continuous        None   
1       volatile_acidity  Feature   Continuous        None   
2            citric_acid  Feature   Continuous        None   
3         residual_sugar  Feature   Continuous        None   
4              chlorides  Feature   Continuous        None   
5    free_sulfur_dioxide  Feature   Continuous        None   
6   total_sulfur_dioxide  Feature   Continuous        None   
7                density  Feature   Continuous        None   
8                     pH  Feature   Continuous        None   
9              sulphates  Feature   Continuous        None   
10               alcohol  Feature   Continuous        None   
11               quality   Target      Integer        None   
12                 color    Other  Categorical        None   

               description units missing_values  
0                     None  None             no  
1                     None  Non

# Train-Test Split

In [5]:
import random
random.seed(16)

def train_test_split(X, y, train_split=0.8):  # so it can be reused

    train_size = int(train_split * len(X))

    train_indices = random.sample(range(len(X)), train_size)
    X_train = X.iloc[train_indices]
    y_train = y.iloc[train_indices]

    test_indices = [i for i in range(len(X)) if i not in train_indices]  # to avoid rows in train
    X_test = X.iloc[test_indices]
    y_test = y.iloc[test_indices]

    return X_train, X_test, y_train, y_test

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_split=0.8)

In [7]:
X_train

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol
2961,6.4,0.25,0.33,1.40,0.040,42.0,115.0,0.9906,3.19,0.48,11.3
3843,7.0,0.36,0.14,11.60,0.043,35.0,228.0,0.9977,3.13,0.51,8.9
3936,6.5,0.28,0.35,9.80,0.067,61.0,180.0,0.9972,3.15,0.57,9.0
2334,6.4,0.25,0.30,5.50,0.038,15.0,129.0,0.9948,3.14,0.49,9.6
3415,6.7,0.27,0.30,13.90,0.029,34.0,131.0,0.9953,3.36,0.50,12.0
...,...,...,...,...,...,...,...,...,...,...,...
2824,6.8,0.32,0.37,3.40,0.023,19.0,87.0,0.9902,3.14,0.53,12.7
326,11.6,0.53,0.66,3.65,0.121,6.0,14.0,0.9978,3.05,0.74,11.5
3717,6.4,0.20,0.32,3.10,0.041,18.0,126.0,0.9914,3.43,0.42,12.0
1687,7.1,0.43,0.61,11.80,0.045,54.0,155.0,0.9974,3.11,0.45,8.7


In [8]:
y_train

Unnamed: 0,quality
2961,7
3843,5
3936,4
2334,6
3415,7
...,...
2824,6
326,7
3717,6
1687,5


In [9]:
X_test

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.40
6,7.9,0.60,0.06,1.6,0.069,15.0,59.0,0.99640,3.30,0.46,9.40
16,8.5,0.28,0.56,1.8,0.092,35.0,103.0,0.99690,3.30,0.75,10.50
17,8.1,0.56,0.28,1.7,0.368,16.0,56.0,0.99680,3.11,1.28,9.30
23,8.5,0.49,0.11,2.3,0.084,9.0,67.0,0.99680,3.17,0.53,9.40
...,...,...,...,...,...,...,...,...,...,...,...
6469,6.1,0.32,0.28,6.6,0.021,29.0,132.0,0.99188,3.15,0.36,11.45
6472,5.7,0.21,0.32,1.6,0.030,33.0,122.0,0.99044,3.33,0.52,11.90
6484,6.6,0.34,0.40,8.1,0.046,68.0,170.0,0.99494,3.15,0.50,9.55
6490,5.7,0.21,0.32,0.9,0.038,38.0,121.0,0.99074,3.24,0.46,10.60


In [10]:
y_test

Unnamed: 0,quality
4,5
6,5
16,7
17,5
23,5
...,...
6469,7
6472,6
6484,6
6490,6


# Auxiliary Functions

In [11]:
def calculate_SSE(predicted_values, actual_values):
    if len(predicted_values) == len(actual_values):
        sse = 0
        for i in range(len(predicted_values)):
            sse += (predicted_values[i] - actual_values[i])**2
        return sse
    else:
        return None

In [13]:
def initialize_weights(X_columns):
    if len(X_columns) > 0:
        return [0] * (len(X_columns) + 1)  # +1 for bias
    else:
        return None

In [14]:
random.seed(42)

def initialize_weights2(columns):
    # +1 for bias term
    return [random.uniform(-1, 1) for _ in range(len(columns) + 1)]

# Linear Regression

In [15]:
def predict(row, weights):
    row_with_bias = [1] + list(row)  # to align with bias
    prediction = 0
    for i in range(len(weights)):
        prediction += weights[i] * row_with_bias[i]
    return prediction

In [16]:
def minmax_scale(X):
    return (X - X.min()) / (X.max() - X.min())

In [17]:
def linear_regression(X, y, alpha=0.01, max_iterations=1000):

    X = minmax_scale(X)

    # CALL initialize_weights
    weights = initialize_weights2(X.columns)
    y = y.squeeze()

    prev_sse = float('inf')

    # for each iteration (gradient descent)
    for iteration in range(max_iterations):
        predicted_values = []

        # for each data point
        for i in range(len(X)):
            row = X.iloc[i]
            prediction = predict(row, weights)
            predicted_values.append(prediction)

        # debug
        # print(f"Predicted values: {predicted_values}")
        # print(f"N predicted: {len(predicted_values)}")
        # print(f"Actual values: {y}")
        # print(f"N actual: {len(y)}")

        # CALL calculate_sse
        current_sse = calculate_SSE(predicted_values, y.tolist())
        print(f"Iteration {iteration}: SSE = {round(current_sse, 3)}")

        # stop condition
        if abs(prev_sse - current_sse) < 0.01 * prev_sse:
            print("Stopping early: SSE improvement < 1%")
            break

        # compute delta errors
        delta_errors = [0.0] * len(weights)
        for i in range(len(X)):
            row = X.iloc[i]
            row_with_bias = [1] + list(row)
            error = y.iloc[i] - predicted_values[i]
            for j in range(len(weights)):
                delta_errors[j] += error * row_with_bias[j]

        # update weights
        m = len(X)
        for j in range(len(weights)):
            weights[j] = weights[j] + alpha * delta_errors[j]

        prev_sse = current_sse

    return [round(float(w), 3) for w in weights]

In [18]:
# train
weights = linear_regression(X_train, y_train, alpha=0.0001, max_iterations=1000)
print(f"Final Weights: {weights}")

Iteration 0: SSE = 225025.874
Iteration 1: SSE = 7634.591
Iteration 2: SSE = 3920.262
Iteration 3: SSE = 3821.073
Iteration 4: SSE = 3784.728
Stopping early: SSE improvement < 1%
Final Weights: [4.183, 0.138, 0.178, 0.206, 1.036, 0.638, 1.18, 0.138, 1.038, 0.573, 0.127, 1.545]


In [19]:
# train using sklearn
sk_model = LinearRegression()
sk_model.fit(minmax_scale(X_train), y_train)

In [20]:
# predict using sklearn
sk_preds = sk_model.predict(minmax_scale(X_test))

In [21]:
# custom predict
X_test_scaled = minmax_scale(X_test)

In [22]:
custom_preds = []
for i in range(len(X_test_scaled)):
    row = X_test_scaled.iloc[i]
    pred = predict(row, weights)
    custom_preds.append(pred)

In [23]:
custom_sse = calculate_SSE(custom_preds, y_test.squeeze().tolist())
sklearn_sse = calculate_SSE(sk_preds.ravel().tolist(), y_test.squeeze().tolist())

print(f'Custom Linear Regression SSE:  {round(custom_sse, 4)}')
print(f'Sklearn Linear Regression SSE: {round(sklearn_sse, 4)}')

Custom Linear Regression SSE:  827.7451
Sklearn Linear Regression SSE: 754.0258


In [24]:
#sklearn weights
sk_bias = sk_model.intercept_.flatten()[0]
sk_weights = sk_model.coef_.flatten()

sklearn_full_weights = [round(float(sk_bias), 3)] + [round(float(w), 3) for w in sk_weights]
print(f"Sklearn Weights: {sklearn_full_weights}")

Sklearn Weights: [5.141, 0.593, -1.997, -0.178, 1.221, -0.475, 1.819, -1.092, -0.984, 0.397, 1.529, 1.996]


In [25]:
# manual weights
print(f"Manual Weights: {weights}")

Manual Weights: [4.183, 0.138, 0.178, 0.206, 1.036, 0.638, 1.18, 0.138, 1.038, 0.573, 0.127, 1.545]


For linear regression task, I built a model from scratch using the gradient descent method and Sum of Squared Errors (SSE) as the loss function. After training the model on scaled data, I compared its predictions and learned weights with scikit-learn’s LinearRegression. While the SSE values were fairly close, the learned weights differed noticeably. This is expected since scikit-learn uses a closed-form solution based on the Normal Equation, which computes the optimal weights analytically, while my implementation approximates them iteratively.

# Logistic Regression (Titanic Dataset)

Using `survival` as target

In [26]:
df_test = pd.read_csv('test.csv')
df_train = pd.read_csv('train.csv')

In [27]:
df_test

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [28]:
df_train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [29]:
df_train = df_train[['Pclass', 'Sex', 'Age', 'Fare', 'SibSp', 'Parch', 'Survived']]
df_test = df_test[['Pclass', 'Sex', 'Age', 'Fare', 'SibSp', 'Parch']]

In [30]:
df_train

Unnamed: 0,Pclass,Sex,Age,Fare,SibSp,Parch,Survived
0,3,male,22.0,7.2500,1,0,0
1,1,female,38.0,71.2833,1,0,1
2,3,female,26.0,7.9250,0,0,1
3,1,female,35.0,53.1000,1,0,1
4,3,male,35.0,8.0500,0,0,0
...,...,...,...,...,...,...,...
886,2,male,27.0,13.0000,0,0,0
887,1,female,19.0,30.0000,0,0,1
888,3,female,,23.4500,1,2,0
889,1,male,26.0,30.0000,0,0,1


In [31]:
df_test

Unnamed: 0,Pclass,Sex,Age,Fare,SibSp,Parch
0,3,male,34.5,7.8292,0,0
1,3,female,47.0,7.0000,1,0
2,2,male,62.0,9.6875,0,0
3,3,male,27.0,8.6625,0,0
4,3,female,22.0,12.2875,1,1
...,...,...,...,...,...,...
413,3,male,,8.0500,0,0
414,1,female,39.0,108.9000,0,0
415,3,male,38.5,7.2500,0,0
416,3,male,,8.0500,0,0


In [32]:
# clean df_train

# impute missing values
# mode for categorical
for column in ['Pclass', 'Sex']:
    mode_value = df_train[column].mode()[0]
    df_train.loc[df_train[column].isna(), column] = mode_value

# median for numerical
for column in ['Age', 'Fare', 'SibSp', 'Parch']:
    median_value = df_train[column].median()
    df_train.loc[df_train[column].isna(), column] = median_value

# one hot encode categorical columns
df_train = pd.get_dummies(df_train, columns=['Pclass'], dtype = int)
df_train = pd.get_dummies(df_train, columns=['Sex'], drop_first=True, dtype = int)  # to avoid multi collinearity

# normalize
for column in ['Age', 'Fare', 'SibSp', 'Parch']:
    df_train[column] = (df_train[column] - df_train[column].min()) / (df_train[column].max() - df_train[column].min())

In [33]:
df_train

Unnamed: 0,Age,Fare,SibSp,Parch,Survived,Pclass_1,Pclass_2,Pclass_3,Sex_male
0,0.271174,0.014151,0.125,0.000000,0,0,0,1,1
1,0.472229,0.139136,0.125,0.000000,1,1,0,0,0
2,0.321438,0.015469,0.000,0.000000,1,0,0,1,0
3,0.434531,0.103644,0.125,0.000000,1,1,0,0,0
4,0.434531,0.015713,0.000,0.000000,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...
886,0.334004,0.025374,0.000,0.000000,0,0,1,0,1
887,0.233476,0.058556,0.000,0.000000,1,1,0,0,0
888,0.346569,0.045771,0.125,0.333333,0,0,0,1,0
889,0.321438,0.058556,0.000,0.000000,1,1,0,0,1


In [34]:
# similarly clean df_test

# impute missing values
# mode for categorical
for column in ['Pclass', 'Sex']:
    mode_value = df_test[column].mode()[0]
    df_test.loc[df_test[column].isna(), column] = mode_value

# median for numerical
for column in ['Age', 'Fare', 'SibSp', 'Parch']:
    median_value = df_test[column].median()
    df_test.loc[df_test[column].isna(), column] = median_value

# one hot encode categorical columns
df_test = pd.get_dummies(df_test, columns=['Pclass'], dtype = int)
df_test = pd.get_dummies(df_test, columns=['Sex'], drop_first=True, dtype = int)  # to avoid multi collinearity

# normalize
for column in ['Age', 'Fare', 'SibSp', 'Parch']:
    df_test[column] = (df_test[column] - df_test[column].min()) / (df_test[column].max() - df_test[column].min())

In [35]:
df_test

Unnamed: 0,Age,Fare,SibSp,Parch,Pclass_1,Pclass_2,Pclass_3,Sex_male
0,0.452723,0.015282,0.000,0.000000,0,0,1,1
1,0.617566,0.013663,0.125,0.000000,0,0,1,0
2,0.815377,0.018909,0.000,0.000000,0,1,0,1
3,0.353818,0.016908,0.000,0.000000,0,0,1,1
4,0.287881,0.023984,0.125,0.111111,0,0,1,0
...,...,...,...,...,...,...,...,...
413,0.353818,0.015713,0.000,0.000000,0,0,1,1
414,0.512066,0.212559,0.000,0.000000,1,0,0,0
415,0.505473,0.014151,0.000,0.000000,0,0,1,1
416,0.353818,0.015713,0.000,0.000000,0,0,1,1


## Logistic Regression

In [36]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [37]:
def logistic_regression(X, y, alpha=0.01, max_iterations=1000):

    # initialize weights
    weights = initialize_weights2(X.columns)
    y = y.squeeze()

    prev_sse = float('inf')

    for iteration in range(max_iterations):
        predicted_probs = []

        for i in range(len(X)):
            row = X.iloc[i]
            z = predict(row, weights)
            prediction = sigmoid(z)
            predicted_probs.append(prediction)

        # calculate SSE
        current_sse = calculate_SSE(predicted_probs, y.tolist())
        print(f"Iteration {iteration}: SSE = {round(current_sse, 3)}")

        if abs(prev_sse - current_sse) < 0.01 * prev_sse:
            print("Stopping early: SSE improvement < 1%")
            break

        # compute gradient
        delta_errors = [0.0] * len(weights)
        for i in range(len(X)):
            row = X.iloc[i]
            row_with_bias = [1] + list(row)
            error = y.iloc[i] - predicted_probs[i]
            for j in range(len(weights)):
                delta_errors[j] += error * row_with_bias[j]

        # update weights
        m = len(X)
        for j in range(len(weights)):
            weights[j] = weights[j] + alpha * delta_errors[j]

        prev_sse = current_sse

    return [round(float(w), 3) for w in weights]

In [38]:
# Separate features and target
X_train = df_train.drop(columns='Survived')
y_train = df_train['Survived']

In [39]:
# train
manual_weights = logistic_regression(X_train, y_train, alpha=0.0001, max_iterations=1000)
print('Final weights:', manual_weights)

Iteration 0: SSE = 232.999
Iteration 1: SSE = 232.291
Stopping early: SSE improvement < 1%
Final weights: [-0.936, -0.599, 0.301, 0.091, -0.558, 0.185, 0.619, -0.983, 0.606]


In [40]:
def predict_class(row, weights, threshold=0.5):
    z = predict(row, weights)
    return 1 if sigmoid(z) >= threshold else 0

In [41]:
def predict_logistic(X, weights, threshold=0.5):
    predictions = []
    for i in range(len(X)):
        row = X.iloc[i]
        pred = predict_class(row, weights, threshold)
        predictions.append(pred)
    return predictions

In [42]:
custom_preds = predict_logistic(df_test, manual_weights) # predict

### Compare with Sklearn

In [43]:
sgd_model = SGDClassifier(loss='squared_error',  # to match error
                          learning_rate='constant',
                          eta0=0.0001,
                          max_iter=1000,
                          penalty=None,
                          random_state=42)
sgd_model.fit(X_train, y_train)

In [44]:
sklearn_probs = sgd_model.predict(df_test)
sklearn_preds = [1 if p >= 0.5 else 0 for p in sklearn_probs]

In [45]:
df_test['Manual_prediction'] = custom_preds
df_test['SklearnPrediction'] = sklearn_preds

In [46]:
df_test

Unnamed: 0,Age,Fare,SibSp,Parch,Pclass_1,Pclass_2,Pclass_3,Sex_male,Manual_prediction,SklearnPrediction
0,0.452723,0.015282,0.000,0.000000,0,0,1,1,0,0
1,0.617566,0.013663,0.125,0.000000,0,0,1,0,0,0
2,0.815377,0.018909,0.000,0.000000,0,1,0,1,0,0
3,0.353818,0.016908,0.000,0.000000,0,0,1,1,0,0
4,0.287881,0.023984,0.125,0.111111,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...
413,0.353818,0.015713,0.000,0.000000,0,0,1,1,0,0
414,0.512066,0.212559,0.000,0.000000,1,0,0,0,0,1
415,0.505473,0.014151,0.000,0.000000,0,0,1,1,0,0
416,0.353818,0.015713,0.000,0.000000,0,0,1,1,0,0


We cannot compare SSE/accuracy because df_test has no labels. However, sklearn has classified more positive classes than manual prediction after manual inspection of the dataframe.

In [47]:
sk_bias = sk_model.intercept_[0]
sk_weights = sk_model.coef_[0]
sklearn_full_weights = [round(float(sk_bias), 3)] + [round(float(w), 3) for w in sk_weights]
print(f"Sklearn SGD Weights: {sklearn_full_weights}")

Sklearn SGD Weights: [5.141, 0.593, -1.997, -0.178, 1.221, -0.475, 1.819, -1.092, -0.984, 0.397, 1.529, 1.996]


In [48]:
print(f"Manual Logistic Weights: {manual_weights}")

Manual Logistic Weights: [-0.936, -0.599, 0.301, 0.091, -0.558, 0.185, 0.619, -0.983, 0.606]


Above, I implemented logistic regression from scratch using gradient descent and Sum of Squared Errors (SSE) as the loss function, although SSE isn't the standard loss for classification tasks. My custom model applied the sigmoid function to compute probabilities and updated weights through batch gradient descent. Predictions were converted to binary outcomes using a 0.5 threshold. While this approach produced functional results, I noticed that the learned weights were significantly different from those generated by scikit-learn’s LogisticRegression, which sparked my curiosity.

This led me to explore how scikit-learn trains its logistic regression models. Upon checking the documentation and digging into the underlying methods, I found that LogisticRegression (the model I trained at first) uses solvers like LBFGS and liblinear which are known to be efficient optimization techniques, but not gradient descent in the same way I had implemented it manually. Wanting a fairer comparison, I looked for scikit-learn implementations that used gradient descent and discovered SGDClassifier. Unlike LogisticRegression, SGDClassifier supports gradient-based training, allows control over learning rate and iterations, and lets you specify the loss function. To better align with my manual approach, I used 'squared_error' as the loss and matched other hyperparameters such as the learning rate (eta0) and number of iterations.

After training both models using the same setup, I compared the weights. Although there were still differences in the resulting values, the comparison was much more meaningful now that both models used the same optimization method and loss function. This process highlighted how even small differences in optimization algorithms, initialization, and convergence criteria can lead to variations in the final model.

This training of a manual and automated model made it clear to me how important implementation details are when training models. Using SSE instead of log loss, or switching from LBFGS to gradient descent, can dramatically change how a model learns. Building the model from scratch helped me unpack scikit-learn’s tools and gave me a deeper appreciation for both the convenience of high-level libraries and the learning value of doing it manually.