# Interactive Widget: Front End Code: KNeighbors

This is our final version of the widget.

Throughout this workbook, we used steps from the following web pages to inform our widgets.
- https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Basics.html
- https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html
- https://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html

### Set Up

In [1]:
# Import the necessary data libraries.
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.neighbors import KNeighborsClassifier
import scipy.stats as stats

# Classification Accuracy
from sklearn import metrics

# The following are for Jupyter Widgets.
import ipywidgets as widgets
from IPython.display import display
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import FloatSlider

In [2]:
# Set up datasets.
X_resampled_url = 'https://raw.githubusercontent.com/georgetown-analytics/Formula1/main/data/interim/X_resampled_forKNeighborWidget.csv'
X_resampled = pd.read_csv(X_resampled_url, sep = ',', engine = 'python')
y_resampled_url = 'https://raw.githubusercontent.com/georgetown-analytics/Formula1/main/data/interim/y_resampled_forKNeighborWidget.csv'
y_resampled = pd.read_csv(y_resampled_url, sep = ',', engine = 'python')
X_test_url = 'https://raw.githubusercontent.com/georgetown-analytics/Formula1/main/data/interim/X_test_forKNeighborWidget.csv'
X_test = pd.read_csv(X_test_url, sep = ',', engine = 'python')
y_test_url = 'https://raw.githubusercontent.com/georgetown-analytics/Formula1/main/data/interim/y_test_forKNeighborWidget.csv'
y_test = pd.read_csv(y_test_url, sep = ',', engine = 'python')

We know from testing the type of `y_resampled` in `InteractiveWidget_BackEnd.ipynb` that `y_resampled` and `y_test` need to be a series in order for our model to run correctly. We also know from this site (https://datatofish.com/pandas-dataframe-to-series/) how to change a dataframe into a series.

In [3]:
# Change the y_resampled dataframe into a y_resampled series.
y_resampled = y_resampled.squeeze()

In [4]:
# Change the y_test dataframe into a y_test series.
y_test = y_test.squeeze()

### Create the Modeling Functions

In [5]:
# Create the function score_model.
def widgetpred(X_resampled, y_resampled, X_test, y_test, input_test, estimator, **kwargs):
    """
    Test various estimators.
    """
    # Instantiate the classification model and visualizer.
    estimator.fit(X_resampled, y_resampled, **kwargs)  
    
    expected  = y_test
    predicted = estimator.predict(X_test)
    
    inputpred = estimator.predict(input_test)
    
    # Compute and return the prediction.
    return [predicted, inputpred]

In [6]:
# Create the function conmatrix.
def conmatrix(y_test, predicted, inputpred):
    """
    Compute the confusion matrix and return the results.
    """
    confusion = metrics.confusion_matrix(y_test, predicted)
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    
    # When the prediction is positive, how often is it correct?
    truepositive_rate = round((TP / (TP + FP)) * 100, 2)
    
    # When the prediction is negative, how often is it correct?
    truenegative_rate = round((TN / (TN + FN)) * 100, 2)
    
    if inputpred == 1:
        print("When our model predicts that a car will finish the race, it is correct", truepositive_rate, "% of the time.")
    else:
        print("When our model predicts that a car will not finish the race, it is correct", truenegative_rate, "% of the time.")

### Create the Widget

In [7]:
"""
Establish function "predict" which allows selection of two track types, whether
the track is historic or not, and how popular the circuit is,
as well as the input of one of each of the following values:
year, grid, alt, average_lap_time, minimum_lap_time, PRCP, TAVG.

Place these values in the dataframe input_df and display the dataframe.

Create prediction based on widgetpred function and display the prediction:
0 for did not finish, 1 for did finish.
"""
def predictfinish(trackType, historic, circuit, year, grid, alt, average_lap_time, normalized_minLapTime, PRCP, TAVG):    
    # Use an if-else statement to determine the output based on the input track.
    if trackType == "Race":
        trackType = 0
    else:
        trackType = 1
        
    # Use an if-else statement to determine the output based on the input historic.
    if historic == "Not Historic":
        isHistoric = 0
    else:
        isHistoric = 1
    
    # Use an if-else statement to determine the output based on the input circuit.
    if circuit == "Used 500+ times":
        oneHot_circuits_1 = 1
        oneHot_circuits_2 = 0
        oneHot_circuits_3 = 0
        oneHot_circuits_4 = 0
        oneHot_circuits_5 = 0
        oneHot_circuits_6 = 0
    elif circuit == "Used 400-499 times":
        oneHot_circuits_1 = 0
        oneHot_circuits_2 = 1
        oneHot_circuits_3 = 0
        oneHot_circuits_4 = 0
        oneHot_circuits_5 = 0
        oneHot_circuits_6 = 0
    elif circuit == "Used 300-399 times":
        oneHot_circuits_1 = 0
        oneHot_circuits_2 = 0
        oneHot_circuits_3 = 1
        oneHot_circuits_4 = 0
        oneHot_circuits_5 = 0
        oneHot_circuits_6 = 0
    elif circuit == "Used 200-299 times":
        oneHot_circuits_1 = 0
        oneHot_circuits_2 = 0
        oneHot_circuits_3 = 0
        oneHot_circuits_4 = 1
        oneHot_circuits_5 = 0
        oneHot_circuits_6 = 0
    elif circuit == "Used 100-199 times":
        oneHot_circuits_1 = 0
        oneHot_circuits_2 = 0
        oneHot_circuits_3 = 0
        oneHot_circuits_4 = 0
        oneHot_circuits_5 = 1
        oneHot_circuits_6 = 0
    elif circuit == "Used less than 100 times":
        oneHot_circuits_1 = 0
        oneHot_circuits_2 = 0
        oneHot_circuits_3 = 0
        oneHot_circuits_4 = 0
        oneHot_circuits_5 = 0
        oneHot_circuits_6 = 1
    
    # Transform average_lap_time.
    normalized_avgLapTime = np.log(average_lap_time)
    
    # Use an if-else statement to move any potential outliers from average_lap_time.
    avgQ1 = -0.019303
    avgQ3 = 0.006690
    avgIQR = avgQ3 - avgQ1
    avglowertail = avgQ1 - 2.5 * avgIQR
    avguppertail = avgQ3 + 2.5 * avgIQR
    avgmedian = -0.005962837883204569
    if normalized_avgLapTime > avguppertail or normalized_avgLapTime < avglowertail:
        normalized_avgLapTime = avgmedian
        
    # Use an if-else statement to move any potential outliers from normalized_minLapTime.
    minQ1 = 0.984717
    minQ3 = 1.006281
    minIQR = minQ3 - minQ1
    minlowertail = minQ1 - 2.0 * minIQR
    minuppertail = minQ3 + 2.0 * minIQR
    minmedian = 0.995628475361378
    if normalized_minLapTime > minuppertail or normalized_minLapTime < minlowertail:
        normalized_minLapTime = minmedian
    
    # Transform altitude.
    alt_trans = np.log(alt + 1 - (-7))
    
    # Transform precipitation.
    PRCP_trans = np.log(PRCP + 1)
    
    # Establish the data of our input_df dataframe.
    inputdata = [[grid, trackType, year, TAVG, isHistoric, oneHot_circuits_1, oneHot_circuits_2,
                 oneHot_circuits_3, oneHot_circuits_4, oneHot_circuits_5, oneHot_circuits_6, alt_trans,
                 PRCP_trans, normalized_minLapTime, normalized_avgLapTime]]
    
    # Establish the dataframe input_df itself with pd.DataFrame.
    input_df = pd.DataFrame(inputdata, columns =
                ['grid', 'trackType', 'year', 'TAVG',
             'isHistoric', 'oneHot_circuits_1', 'oneHot_circuits_2',
             'oneHot_circuits_3', 'oneHot_circuits_4', 'oneHot_circuits_5',
             'oneHot_circuits_6', 'alt_trans', 'PRCP_trans', 'normalized_minLapTime',
             'normalized_avgLapTime'])
    
    display(input_df)
    
    # Using the widgetpred function, predict whether the car will finish the race or not given input_df.
    pred = widgetpred(X_resampled, y_resampled, X_test, y_test, input_df, KNeighborsClassifier())
    
    # Using an if-else statement, determine what interactors will see given the data they input.
    if pred[1] == 1:
        writtenpred = "finish the race."
    else:
        writtenpred = "not finish the race."
    
    print("According to our KNeighbors model, your car is predicted to", writtenpred)
    
    conmatrix(y_test, pred[0], pred[1])

In [8]:
# Create a widget that will interact with the predictfinish function.
interact(predictfinish, trackType = widgets.Dropdown(options = ["Race", "Street"], value = "Race", description = 'Track Type'),
         historic = widgets.Dropdown(options = ["Not Historic", "Historic"], value = "Not Historic", description = 'Historic?'),
         circuit = widgets.Dropdown(options = ["Used 500+ times", "Used 400-499 times", "Used 300-399 times", "Used 200-299 times", "Used 100-199 times", "Used less than 100 times"], value = "Used less than 100 times", description = 'Circuit'),
         year = widgets.IntSlider(min = 1996, max = 2021, description = 'Year', disabled = False, continuous_update = False),
         grid = widgets.IntSlider(min = 0, max = 30, description = 'Grid', disabled = False, continuous_update = False),
         alt = widgets.BoundedFloatText(min = -100, max = 2500, description = 'Altitude', disabled = False, continuous_update = False),
         average_lap_time = widgets.FloatSlider(min = 0.1, max = 6.0, value = 0.1, description = 'Avg Lap Time', disabled = False, continuous_update = False),
         normalized_minLapTime = widgets.FloatSlider(min = 0.1, max = 6.0, value = 0.1, description = 'Min Lap Time', disabled = False, continuous_update = False),
         PRCP = widgets.FloatSlider(min = 0, max = 10, description = 'Precipitation', disabled = False, continuous_update = False),
         TAVG = widgets.FloatSlider(min = 0, max = 110, description = 'Avg Temp (F)', disabled = False, continuous_update = False));

interactive(children=(Dropdown(description='Track Type', options=('Race', 'Street'), value='Race'), Dropdown(d…