In [1]:
import numpy as np

class LassoRegression:
    def __init__(self, alpha=1.0, max_iter=1000, tol=1e-4):
        self.alpha = alpha  # Regularization parameter (lambda)
        self.max_iter = max_iter  # Maximum number of iterations
        self.tol = tol  # Tolerance for convergence
        self.coef_ = None  # Coefficients
        self.intercept_ = None  # Intercept

    def soft_thresholding_operator(self, x, lambda_):
        if x > 0 and lambda_ < abs(x):
            return x - lambda_
        elif x < 0 and lambda_ < abs(x):
            return x + lambda_
        else:
            return 0

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.coef_ = np.zeros(n_features)  # Initialize coefficients
        self.intercept_ = np.mean(y)  # Initialize intercept

        for _ in range(self.max_iter):
            # Coordinate descent for each feature
            coef_old = np.copy(self.coef_)
            for j in range(n_features):
                X_j = X[:, j]
                y_pred = np.dot(X, self.coef_) + self.intercept_ - X_j * self.coef_[j]
                rho = np.dot(X_j, y - y_pred + X_j * self.coef_[j])

                # Update coefficient using soft thresholding operator
                self.coef_[j] = self.soft_thresholding_operator(rho / n_samples, self.alpha)

                # Update intercept
                self.intercept_ = np.mean(y - np.dot(X, self.coef_))

            # Check for convergence
            if np.linalg.norm(self.coef_ - coef_old) < self.tol:
                break

    def predict(self, X):
        return np.dot(X, self.coef_) + self.intercept_

# Example usage:
if __name__ == "__main__":
    # Sample data
    X = np.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.4]])
    y = np.array([0.3, 0.5, 0.7])

    # Create and fit Lasso regression model
    lasso = LassoRegression(alpha=0.1)
    lasso.fit(X, y)

    # Predict
    X_test = np.array([[0.4, 0.5]])
    y_pred = lasso.predict(X_test)
    print("Predicted:", y_pred)

Predicted: [0.5]


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error

# Load the Boston housing dataset
boston = load_boston()
X, y = boston.data, boston.target

#print(X)
#print(y)
#print(X.shape)
#print(boston.columns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Lasso regression model
lasso = Lasso(alpha=0.1)  # Set the regularization parameter (lambda)
lasso.fit(X_train, y_train)

print(lasso.coef_)
print(lasso.intercept_)

# Make predictions on the test set
y_pred = lasso.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

[-0.10415691  0.03489335 -0.01678527  0.91995182 -0.          4.31168655
 -0.01512583 -1.15148729  0.23923695 -0.01296223 -0.73224678  0.01309057
 -0.56467442]
19.85976948041746
Mean Squared Error: 25.155593753934173



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

In [3]:
import numpy as np

class RidgeRegression:
    def __init__(self, alpha=1.0):
        self.alpha = alpha  # Regularization parameter (lambda)
        self.coef_ = None  # Coefficients

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Compute coefficients using the closed-form solution
        self.coef_ = np.linalg.inv(X.T @ X + self.alpha * np.identity(n_features)) @ X.T @ y

    def predict(self, X):
        return X @ self.coef_

# Example usage:
if __name__ == "__main__":
    # Sample data
    X = np.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.4]])
    y = np.array([0.3, 0.5, 0.7])

    # Create and fit Ridge regression model
    ridge = RidgeRegression(alpha=0.1)
    ridge.fit(X, y)

    # Predict
    X_test = np.array([[0.4, 0.5]])
    y_pred = ridge.predict(X_test)
    print("Predicted:", y_pred)

Predicted: [0.72089552]


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error

# Load the Boston housing dataset
boston = load_boston()
X, y = boston.data, boston.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Ridge regression model
ridge = Ridge(alpha=1.0)  # Set the regularization parameter (lambda)
ridge.fit(X_train, y_train)

print(ridge.coef_)
print(ridge.intercept_)

# Make predictions on the test set
y_pred = ridge.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

[-1.09234061e-01  3.22706863e-02  7.49805942e-03  2.54546998e+00
 -9.53795159e+00  4.46450537e+00 -1.21910176e-02 -1.33870040e+00
  2.48881816e-01 -1.14746211e-02 -8.28604284e-01  1.26421124e-02
 -5.23833016e-01]
25.104099233774463
Mean Squared Error: 24.47719122770868



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error

# Load the Boston housing dataset
boston = load_boston()
X, y = boston.data, boston.target

#print(X)
#print(y)
#print(X.shape)
#print(boston.columns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the Lasso regression model
lr = LinearRegression()  # Set the regularization parameter (lambda)
lr.fit(X_train, y_train)

print(lr.coef_)
print(lr.intercept_)

# Make predictions on the test set
y_pred = lr.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

[-1.13055924e-01  3.01104641e-02  4.03807204e-02  2.78443820e+00
 -1.72026334e+01  4.43883520e+00 -6.29636221e-03 -1.44786537e+00
  2.62429736e-01 -1.06467863e-02 -9.15456240e-01  1.23513347e-02
 -5.08571424e-01]
30.246750993923214
Mean Squared Error: 24.291119474973232
