In [1]:
import pandas as pd

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score

In [2]:
cruise = pd.read_csv("Data/cruise_ship_info.csv")

In [3]:
cruise.columns

Index(['Ship_name', 'Cruise_line', 'Age', 'TonnageGTx1000', 'passengersx100',
       'lengthx100ft', 'cabinsx100', 'spaceratio', 'crewx100'],
      dtype='object')

In [4]:
X = cruise[
    [
        "Age",
        "TonnageGTx1000",
        "passengersx100",
        "lengthx100ft",
        "cabinsx100",
        "spaceratio",
    ]
]

Y = cruise[["crewx100"]]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, train_size=0.70, random_state=0
)

X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train = (X_train - X_train_mean) / X_train_std

X_test = (X_test - X_train_mean) / X_train_std

In [6]:
Linreg = LinearRegression()
Linreg.fit(X_train, y_train)
y_pred_Linreg_train = Linreg.predict(X_train)

r2_score_Linreg_train = r2_score(y_train, y_pred_Linreg_train)
r2_score_Linreg_test = r2_score(y_test, Linreg.predict(X_test))

print("Linreg_train_R2_score: ", r2_score_Linreg_train)
print("Linreg_test_R2_score: ", r2_score_Linreg_test)

Linreg_train_R2_score:  0.9118485946473178
Linreg_test_R2_score:  0.957979988701403


In [7]:
ridge_reg = Ridge(alpha=0.05)
ridge_reg.fit(X_train, y_train)
ridge_reg_train = ridge_reg.predict(X_train)

ridge_reg_train = r2_score(y_train, ridge_reg_train)
ridge_reg_test = r2_score(y_test, ridge_reg.predict(X_test))
print("ridge_reg_train_R2_score: ", ridge_reg_train)
print("ridge_reg_test_R2_score: ", ridge_reg_test)

ridge_reg_train_R2_score:  0.911834722034894
ridge_reg_test_R2_score:  0.9581695082579529


In [8]:
Lasso_reg = Lasso(alpha=0.01)
Lasso_reg.fit(X_train, y_train)
Lasso_reg_train = Lasso_reg.predict(X_train)

Lasso_reg_train = r2_score(y_train, Lasso_reg_train)
Lasso_reg_test = r2_score(y_test, Lasso_reg.predict(X_test))
print("Lasso_reg_train_R2_score: ", Lasso_reg_train)
print("Lasso_reg_test_R2_score: ", Lasso_reg_test)

Lasso_reg_train_R2_score:  0.9109007370544493
Lasso_reg_test_R2_score:  0.9567206919364786


In [9]:
scoresLinreg = cross_val_score(
    estimator=Linreg,
    X=X_train,
    y=y_train,
    cv=4,
)
print(
    "%0.8f mean R2 with a standard deviation of %0.8f"
    % (scoresLinreg.mean(), scoresLinreg.std())
)

0.90645508 mean R2 with a standard deviation of 0.05355125


In [10]:
scoresLinreg = cross_val_score(
    estimator=ridge_reg,
    X=X_train,
    y=y_train,
    cv=4,
)
print(
    "%0.8f mean R2 with a standard deviation of %0.8f"
    % (scoresLinreg.mean(), scoresLinreg.std())
)

0.90668073 mean R2 with a standard deviation of 0.05380481
