# Mi primera Regresión Lineal 

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('mtcars.csv', index_col = 0)
df.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


## Generando mi matriz de Atributos y Vector Objetivo

In [2]:
X = df.drop(columns = 'mpg')
y = df.mpg

# Instanciando mi Regresión Lineal
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X,y)
lr.score(X,y) # .score() devuelve R2 por defecto para problemas de regresión.

0.8690157644777647

## Obteniendo los coeficientes beta de mi Regresión más el intercepto 

In [3]:
lr.coef_

array([-0.11144048,  0.01333524, -0.02148212,  0.78711097, -3.71530393,
        0.82104075,  0.31776281,  2.52022689,  0.65541302, -0.19941925])

In [4]:
lr.intercept_

12.30337415599627

## Implementando un modelo Utilizando Train y Test Sets

In [5]:
from sklearn.model_selection import train_test_split

# pueden escribir este código en una sóla línea
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns = 'mpg'), df.mpg, test_size = 0.3, random_state = 123)

In [6]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression() #crear el modelo
lr.fit(X_train,y_train) # entrenamiento en el train_set
lr.score(X_test,y_test) #evaluación en el test_set

0.5347705850352262

# Ejercicios 

## Implementando un Modelo de Regresión en el set de Diabetes 

In [7]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes(as_frame = True)
X = diabetes.data
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 123)

lr = LinearRegression()
lr.fit(X_train, y_train)
# Midiendo mis resultados tanto en train como en test set

print('Score Train:', lr.score(X_train, y_train))
print('Score Test:', lr.score(X_test, y_test))

# Identificando los coeficientes con mayor contribución al modelo
ind_pos = np.argmax(lr.coef_)
ind_neg = np.argmin(lr.coef_)
print('Max coef:', X.columns[ind_pos])
print('Min coef:', X.columns[ind_neg])


Score Train: 0.5174979976746197
Score Test: 0.5078285584893742
Max coef: s5
Min coef: s1


## Implementando un Modelo de Regresión en el set de Boston House 

In [8]:
from sklearn.datasets import load_boston

X,y  = load_boston(return_X_y  = True)
names = load_boston()['feature_names']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 123)

lr = LinearRegression()
lr.fit(X_train, y_train)

# Midiendo mis resultados tanto en train como en test set
print('Score Train:', lr.score(X_train, y_train))
print('Score Test:', lr.score(X_test, y_test))

# Identificando los coeficientes con mayor contribución al modelo
ind_pos = np.argmax(lr.coef_)
ind_neg = np.argmin(lr.coef_)
print('Max coef:', names[ind_pos])
print('Min coef:', names[ind_neg])


Score Train: 0.7559380876016175
Score Test: 0.6592466510354097
Max coef: RM
Min coef: NOX


# Implementando Modelos con otras métricas

In [9]:
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
diabetes = load_diabetes(as_frame = True)
X = diabetes.data
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 123)

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
y_pred_train = lr.predict(X_train)

r2_train = r2_score(y_train, y_pred_train)
r2 = r2_score(y_test, y_pred) # Se puede usar el método score si quieren.

mse_train = mean_squared_error(y_train, y_pred_train)
mse = mean_squared_error(y_test, y_pred)

rmse_train = np.sqrt(mse_train)
rmse = np.sqrt(mse)

mae_train = mean_absolute_error(y_train, y_pred_train)
mae = mean_absolute_error(y_test, y_pred)

print('Diabetes:')
print('R2 Train:', r2_train)
print('R2 Test', r2)
print('MSE Train:', mse_train)
print('MSE Test:', mse)
print('RMSE Train:', rmse_train)
print('RMSE Test:', rmse)
print('MAE Train:', mae_train)
print('MAE Test:', mae)

print('Intercepto:', lr.intercept_)

#Identificando los coeficientes más importantes
pd.Series(lr.coef_, index = X.columns).sort_values()

Diabetes:
R2 Train: 0.5174979976746197
R2 Test 0.5078285584893742
MSE Train: 2854.168253060431
MSE Test: 2926.8005772468828
RMSE Train: 53.424416263169704
RMSE Test: 54.099912913487046
MAE Train: 43.03474379534746
MAE Test: 44.48057319064366
Intercepto: 152.61083063288848


s1    -855.214478
sex   -261.166011
age     10.453849
s6     102.377233
s3     166.518814
bp     280.725445
s4     309.887633
s2     472.173053
bmi    538.845412
s5     684.048952
dtype: float64

In [10]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.datasets import load_boston

X,y  = load_boston(return_X_y  = True)
names = load_boston()['feature_names']


X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 123)

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
y_pred_train = lr.predict(X_train)

r2_train = r2_score(y_train, y_pred_train)
r2 = r2_score(y_test, y_pred) # Se puede usar el método score si quieren.

mse_train = mean_squared_error(y_train, y_pred_train)
mse = mean_squared_error(y_test, y_pred)

rmse_train = np.sqrt(mse_train)
rmse = np.sqrt(mse)

mae_train = mean_absolute_error(y_train, y_pred_train)
mae = mean_absolute_error(y_test, y_pred)

print('Boston:')
print('R2 Train:', r2_train)
print('R2 Test', r2)
print('MSE Train:', mse_train)
print('MSE Test:', mse)
print('RMSE Train:', rmse_train)
print('RMSE Test:', rmse)
print('MAE Train:', mae_train)
print('MAE Test:', mae)

print('Intercepto:', lr.intercept_)

#Identificando los coeficientes más importantes
pd.Series(lr.coef_, index = names).sort_values()

Boston:
R2 Train: 0.7647156501433012
R2 Test 0.6485645742370703
MSE Train: 20.184336639873155
MSE Test: 28.40585481050824
RMSE Train: 4.492698146979514
RMSE Test: 5.329714327288869
MAE Train: 3.1219958710301117
MAE Test: 3.6913626771162673
Intercepto: 28.981270388095655


NOX       -14.340917
DIS        -1.326740
PTRATIO    -0.956626
LSTAT      -0.486571
CRIM       -0.100994
TAX        -0.013044
AGE        -0.007441
B           0.006425
ZN          0.039958
INDUS       0.075376
CHAS        0.264564
RAD         0.273495
RM          4.833183
dtype: float64