# Practice Activity 5.3: Implementing LASSO

[link](https://www.coursera.org/learn/ai-and-machine-learning-algorithms-and-techniques/supplement/gqj4O/practice-activity-implementing-lasso)

In [1]:
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
# Sample dataset: Study hours, previous exam scores, and pass/fail labels
# Load the data from the student_data.csv
data = pd.read_csv('student_data.csv')

df = pd.DataFrame(data)

# Features and target variable
X = df[['StudyHours', 'PrevExamScore']]
y = df['Pass']
print(df.head(5))

   StudyHours  PrevExamScore  Pass
0           5             83     0
1           5             74     0
2           9             72     1
3           5             76     0
4           6             69     0


## Splitting the data

In [3]:
import random
randonmaxboundery = len(df) - 1
print(f"randonmaxboundery: {randonmaxboundery}")

random_state = random.randint(0, randonmaxboundery)
print(f"random_state: {random_state}")
# Split data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state,shuffle=True)

# Display the shape of the training and testing sets
print(f"Training data: {X_train.shape}, {y_train.shape}")
print(f"Testing data: {X_test.shape}, {y_test.shape}")

randonmaxboundery: 999
random_state: 104
Training data: (800, 2), (800,)
Testing data: (200, 2), (200,)


## Applying LASSO

In [4]:
# Initialize the LASSO model with alpha (regularization parameter)
lasso_model = Lasso(alpha=0.1)

# Train the model on the training data
lasso_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = lasso_model.predict(X_test)

# Evaluate the model's performance using R-squared
r2 = r2_score(y_test, y_pred)
print(f'R-squared score: {r2}')

R-squared score: 0.623195939275456


In [5]:
# Display the coefficients of the features
print(f'LASSO Coefficients: {lasso_model.coef_}')

LASSO Coefficients: [0.19064406 0.00423829]


## Tuning the regularization parameter

In [6]:
# Try different alpha values and compare the results
for alpha in [0.01, 0.05, 0.1, 0.5, 1.0]:
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    y_pred = lasso_model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    print(f'Alpha: {alpha}, R-squared score: {r2}, Coefficients: {lasso_model.coef_}')

Alpha: 0.01, R-squared score: 0.6407350976082357, Coefficients: [0.22250899 0.00637975]
Alpha: 0.05, R-squared score: 0.6362859904014944, Coefficients: [0.2083468  0.00542799]
Alpha: 0.1, R-squared score: 0.623195939275456, Coefficients: [0.19064406 0.00423829]
Alpha: 0.5, R-squared score: 0.24287565901095443, Coefficients: [0.04978857 0.        ]
Alpha: 1.0, R-squared score: -0.004741330939288524, Coefficients: [0. 0.]
