# Supervised Machine Learning Algorithms 

## Linear Models: 

In [7]:
import mglearn 
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import Ridge 
from sklearn.linear_model import Lasso
import numpy as np

In [2]:
X, y = mglearn.datasets.make_wave(n_samples=60)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

model = LinearRegression()
slope = model.fit(X_train, y_train).coef_
intercept = model.fit(X_train, y_train).intercept_

print("slope: {}\nintercept; {}".format(slope, intercept))

train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print("\nTest Score: {}".format(test_score))
print("Train Score: {}".format(train_score))

slope: [0.39390555]
intercept; -0.031804343026759746

Test Score: 0.65933685968637
Train Score: 0.6700890315075756


### 1.) Ordinary linear model: mostly have higher train scores but don't generalize accurately 

In [3]:

X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

model = LinearRegression()
model.fit(X_train, y_train)

train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print("Train Score: {}\nTest Score: {}".format(train_score, test_score))

Train Score: 0.9520519609032729
Test Score: 0.607472195966596


### 2.) Ridge regression models:  
have lower train scores they are used instead of linear models because they generalize better by avoiding overfitting using L2 Regularization 


In [4]:
X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

model = Ridge()
model.fit(X_train, y_train)

train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print("Train Score: {}\nTest Score: {}".format(train_score, test_score))

Train Score: 0.8857966585170941
Test Score: 0.7527683481744756


### 3.) Lasso models: 
   ##### i. they use L1 regularization (means some coefficients are near zero)
   ##### ii. some features are completely ignored 
   ##### iii. Using lower values for alpha and max_iter further improves our model 

In [18]:
X, y = mglearn.datasets.load_extended_boston()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

model_1 = Lasso()
model_1.fit(X_train, y_train)

model1_train_score = model_1.score(X_train, y_train)
model1_test_score = model_1.score(X_test, y_test)
model1f_used = np.sum(model_1.coef_ != 0)

print("Model-1 train Score: {}\nModel-1 test Score: {}\nModel-1 features used: {}".format(model1_train_score, model1_test_score, model1f_used))


model_2 = Lasso(alpha=0.005, max_iter=100000)
model_2.fit(X_train, y_train)

model2_test_score = model_2.score(X_test, y_test)
model2_train_score = model_2.score(X_train, y_train)
model2_f_used = np.sum(model_2.coef_ != 0 )

print("\nModel-2 train Score: {}\nModel-2 test Score: {}\nModel-2 features used: {}".format(model2_train_score, model2_test_score, model2_f_used))

Model-1 train Score: 0.2932376899111462
Model-1 test Score: 0.20937503255272294
Model-1 features used: 4

Model-2 train Score: 0.915961898733442
Model-2 test Score: 0.7813535143177992
Model-2 features used: 43
