# EDS232 Discussion 1: Creating Widgets for Multiple Linear Regression

https://maro406.github.io/eds-232-machine-learning/discussion/week1.html


In [20]:
# Load libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets
from ipywidgets import interact, FloatSlider
from IPython.display import display, clear_output

In [9]:
# Read-in data
file_path = "data/hurricane.xlsx"

do_data = pd.read_excel("data/hurricane.xlsx", sheet_name=5).drop(['Piermont D.O. (ppm)'], axis=1)

turbidity_data = pd.read_excel("data/hurricane.xlsx",
                            sheet_name= 'Turbidity').drop(['Piermont Turbidity in NTU'], axis=1)

rainfall_data = pd.read_excel("data/hurricane.xlsx",
                            sheet_name= 'Rainfall').drop(['Piermont  Rainfall Daily Accumulation (Inches)'], axis=1)

## Data Cleaning

We need to merge our different data frames, we can merge our data on this. 

In [22]:
# Merge the datasets
data = do_data.merge(rainfall_data, on = 'Date Time (ET)') 
data = data.merge(turbidity_data, on = 'Date Time (ET)')
data.head()

# Update column names
data.columns = ['date', 'albany_DO', 'norrie_DO', 'albany_rainfall', 'norrie_rainfall', 'albany_turbidity', 'norrie_turbidity']

# Convert data to datetime format and set as index
data['data']= pd.to_datetime(data['date'])
data.set_index('date', inplace = True)

data.head()

Unnamed: 0_level_0,albany_DO,norrie_DO,albany_rainfall,norrie_rainfall,albany_turbidity,norrie_turbidity,data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-08-25 00:00:00,7.68,7.81,0.0,0.0,4.0,9.3,2011-08-25 00:00:00
2011-08-25 00:15:00,7.6,7.73,0.0,0.0,3.9,8.4,2011-08-25 00:15:00
2011-08-25 00:30:00,7.57,7.63,0.0,0.0,4.3,7.9,2011-08-25 00:30:00
2011-08-25 00:45:00,7.72,7.67,0.0,0.0,4.7,8.1,2011-08-25 00:45:00
2011-08-25 01:00:00,7.74,7.63,0.0,0.0,4.4,8.4,2011-08-25 01:00:00


### Multiple Linear Regression

In [38]:
# Define predictors and the target variable
X = data[['albany_DO', 'albany_rainfall']]
Y = data[['albany_turbidity']]

# Split data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 42)

# Create and fit model
model = LinearRegression()
model.fit(X_train, Y_train)

# Predict and evaluate
Y_pred = model.predict(X_test)
Y_pred

# Calculate evaluation metrics using scikit-learn's mean_squared_error and r2_score, and RMSE
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred))

mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")
print(f"RMSE Score: {rmse}")

Mean Squared Error: 49245.97762215778
R² Score: 0.4907389518457509
RMSE Score: 221.9143474905527


## Create a widget to visualize different models

In [45]:
# Create a widget for selecting predictors
predictor_selector = widgets.SelectMultiple(
    options = data.columns,
    value = (data.columns[0],),
    description = 'Predictors'
)

# Create a dropdown for selecting the target variable
target_selector = widgets.Dropdown(
    options = data.columns,
    value = data.columns[1],
    description = 'Target'
)

# Button to evaluate the  model
evaluate_button = widgets.Button(description = 'Evaluate Model')

# Output widgets to display results
output = widgets.Output()

# Define the function to handle button clicks
def evaluate_model(b):
    with output:
        clear_output(wait=True)
        
        # Make sure target is not in the predictors
        selected_predictors = [item for item in predictor_selector.value]
        if target_selector.value in selected_predictors :
            print("Target variable must not be in the predictors.")
            return 
        
        # Prepare the data
        X = data[selected_predictors]
        y = data[target_selector.value]
        
        # Split data into training and testing
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 42)
        
        # Create and fit model
        model = LinearRegression()
        model.fit(X_train, Y_train)
        
       # Predict and calculate R2 and MSe
        y_pred = model.predict(X_test)
        r2 = r2_score(Y_test, Y_pred)
        mse = mean_squared_error(Y_test, Y_pred)
        
        # Display the R2 score and MSE
        print(f"R2: {r2:.4f}")
        print(f"MSE: {mse:.4f}")
        
# Display the widget
display(predictor_selector, target_selector, evaluate_button, output)
evaluate_button.on_click(evaluate_model)


SelectMultiple(description='Predictors', index=(0,), options=('albany_DO', 'norrie_DO', 'albany_rainfall', 'no…

Dropdown(description='Target', index=1, options=('albany_DO', 'norrie_DO', 'albany_rainfall', 'norrie_rainfall…

Button(description='Evaluate Model', style=ButtonStyle())

Output()