In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets
from ipywidgets import interact, FloatSlider
from IPython.display import display, clear_output

# Load the data
file_path = 'data/Hurricane_Irene_Hudson_River.xlsx'
do_data = pd.read_excel('data/Hurricane_Irene_Hudson_River.xlsx', sheet_name = 5).drop(['Piermont D.O. (ppm)'], axis = 1)
rainfall_data = pd.read_excel(file_path, sheet_name='Rainfall').drop(['Piermont  Rainfall Daily Accumulation (Inches)'], axis = 1)
turbidity_data = pd.read_excel(file_path, sheet_name='Turbidity').drop(['Piermont Turbidity in NTU'], axis = 1)


# Merge the two datasets
data = rainfall_data.merge(turbidity_data, on = 'Date Time (ET)')
data = data.merge(do_data, on = 'Date Time (ET)')
data.head()

# Update the column names 
data.columns = ['date', 'albany_rainfall', 'norrie_rainfall', 'albany_turbidity', 'norrie_turbidity','albany_do', 'norrie_do']

# Convert data to datetime format and set it as index
data['date'] = pd.to_datetime(data['date'])

data.set_index('date', inplace=True)



In [2]:
# Define predictors and the target variable
X = data[['albany_rainfall', 'norrie_rainfall', 'albany_do', 'norrie_do']]  # Adjust as needed
y = data['albany_turbidity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred))}")
print(f"R-squared: {r2_score(y_test, y_pred)}")


RMSE: 180.96630576072272
R-squared: 0.6327795601693718


In [3]:
# Create a widget for selecting predictors
predictor_selector = widgets.SelectMultiple(
    options=data.columns,
    value=[data.columns[0]],  
    description='Predictors'
)

# Create a dropdown for selecting the target variable
target_selector = widgets.Dropdown(
    options=data.columns,
    value=data.columns[1],  
    description='Target',
)

# Button to evaluate the model
evaluate_button = widgets.Button(description="Evaluate Model")

# Output widget to display results
output = widgets.Output()

# Define the function to handle button clicks
def evaluate_model(b):
    with output:
        clear_output(wait=True)
        
        # Make sure the target is not in the predictors
        selected_predictors = [item for item in predictor_selector.value]
        if target_selector.value in selected_predictors:
            print("Target variable must not be in the predictors.")
            return
        
        # Prepare the data
        X = data[selected_predictors]
        y = data[target_selector.value]
        
        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Create and fit the model
        model = LinearRegression()
        model.fit(X_train, y_train)
        
        # Predict and calculate R^2 and MSE
        y_pred = model.predict(X_test)
        r2 = r2_score(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        
        # Display the R^2 score and MSE
        print(f"R^2: {r2:.4f}")
        print(f"MSE: {mse:.4f}")


        plt.scatter(y_test, y_pred) # SOLUTION NO PROMPT
        plt.xlabel('Actual') # SOLUTION NO PROMPT
        plt.ylabel('Predicted') # SOLUTION NO PROMPT
        plt.title('Actual vs Predicted') # SOLUTION NO PROMPT
        plt.show() # SOLUTION NO PROMPT


# Display the widgets and connect the button to the function
display(predictor_selector, target_selector, evaluate_button, output)
evaluate_button.on_click(evaluate_model)



SelectMultiple(description='Predictors', index=(0,), options=('albany_rainfall', 'norrie_rainfall', 'albany_tu…

Dropdown(description='Target', index=1, options=('albany_rainfall', 'norrie_rainfall', 'albany_turbidity', 'no…

Button(description='Evaluate Model', style=ButtonStyle())

Output()