In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_wine
import pandas as pd

In [None]:
# Load the Wine dataset
wine = load_wine()
df = pd.DataFrame(wine.data, columns=wine.feature_names)

# Add target column
df['target'] = wine.target

# Display the first few rows
print("Wine dataset head:\n", df.head())

Wine dataset head:
    alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline  

In [None]:
# Simple Linear Regression
# Step 1: Select the feature ('alcohol') and target for Simple Linear Regression
X_simple = df[['alcohol']]  # Using 'alcohol' as the single feature
y_simple = df['target']  # Target variable is 'target'

# Step 2: Split the data into training and testing sets
X_train_simple, X_test_simple, y_train_simple, y_test_simple = train_test_split(X_simple, y_simple, test_size=0.2, random_state=42)

# Step 3: Train the Simple Linear Regression model
simple_regressor = LinearRegression()
simple_regressor.fit(X_train_simple, y_train_simple)

# Step 4: Make predictions
y_pred_simple = simple_regressor.predict(X_test_simple)

# Step 5: Evaluate the model
mse_simple = mean_squared_error(y_test_simple, y_pred_simple)
r2_simple = r2_score(y_test_simple, y_pred_simple)
print(f"\nSimple Linear Regression - MSE: {mse_simple}, R²: {r2_simple}")

from sklearn.model_selection import cross_val_score
# k-fold cross-validation on Simple Linear Regression
cv_scores_simple = cross_val_score(simple_regressor, X_simple, y_simple, cv=5, scoring='neg_mean_squared_error') # Changed simple_lr to simple_regressor and y to y_simple
print("\nk-Fold Cross-Validation (Simple LR) - MSE Scores:", -cv_scores_simple)
print("Mean k-Fold MSE (Simple LR):", -cv_scores_simple.mean())


Simple Linear Regression - MSE: 0.507269587602501, R²: 0.13039499268142662

k-Fold Cross-Validation (Simple LR) - MSE Scores: [1.30001156 0.5658872  0.10711433 0.48445014 2.23140521]
Mean k-Fold MSE (Simple LR): 0.9377736892572976


In [None]:
# Multiple Linear Regression
# Step 1: Select multiple features for Multiple Linear Regression
X_multiple = df.drop(columns=['target'])  # Using all features except target
y_multiple = df['target']  # Target variable is 'target'

# Step 2: Split the data into training and testing sets
X_train_multiple, X_test_multiple, y_train_multiple, y_test_multiple = train_test_split(X_multiple, y_multiple, test_size=0.2, random_state=42)

# Step 3: Train the Multiple Linear Regression model
multiple_regressor = LinearRegression()
multiple_regressor.fit(X_train_multiple, y_train_multiple)

# Step 4: Make predictions
y_pred_multiple = multiple_regressor.predict(X_test_multiple)

# Step 5: Evaluate the model
mse_multiple = mean_squared_error(y_test_multiple, y_pred_multiple)
r2_multiple = r2_score(y_test_multiple, y_pred_multiple)
print(f"\nMultiple Linear Regression - MSE: {mse_multiple}, R²: {r2_multiple}")

# k-fold cross-validation on Multiple Linear Regression
cv_scores_multiple = cross_val_score(multiple_regressor, X_multiple, y_multiple, cv=5, scoring='neg_mean_squared_error')
print("\nk-Fold Cross-Validation (Multiple LR) - MSE Scores:", -cv_scores_multiple)
print("Mean k-Fold MSE (Multiple LR):", -cv_scores_multiple.mean())


Multiple Linear Regression - MSE: 0.06853348464256047, R²: 0.8825140263270391

k-Fold Cross-Validation (Multiple LR) - MSE Scores: [0.0805439  0.11198656 0.08452765 0.13949167 0.0738447 ]
Mean k-Fold MSE (Multiple LR): 0.09807889456819846
