Welcome to the feature playground! In this notebook, you can interact with the three features in this model.

Feature 1: Creates a correlation matrix, showing positive and negative correlations between Combine performance and Career performance. Select specific positions to receive different correlation matrixes. For example, if you only want to see the correlations for Quarterbacks, select "QB."
The following positions can be called:

In [18]:
# List of positions
positions = ['RB', 'K', 'TE', 'QB', 'WR', 'G', 'RG', 'FB', 'P', 'C', 'LCB', 'MLB', 'LB', 'T',
             'WB', 'SS', 'DE', 'FS', 'DB', 'LT', 'S', 'DT', 'RCB', 'RDE', 'LDE', 'RE', 'RT', 'LLB',
             'WR/QB', 'LDT', 'LG', 'NT', 'CB', 'DL', 'RLB', 'RT/LT', 'OL']

Feature 2: Input player Combine statistics to predict career lifespan. Based on historical Combine Performance data and career data, our model will use predict how long this player's career will be.

Feature 3: Input player Combine statistics to predict career lifespan. Based on historical Combine Performance data and career data, our model will use predict how strong this player's career will be. Fantasy PPR points are used as a normalized metric to represent player performance.

In [19]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display
from transform import transform_data
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer

Feature 1: Correlation Matrix. Select a specific position (or select "All") to create your correlation matrix! Run the first cell, select your position, then run the remaining two cells. You will receive a correlation matrix of historical player data as well as how many players are included in the creation of the correlation matrix.

In [21]:
# List of positions
positions = ['All', 'RB', 'K', 'TE', 'QB', 'WR', 'G', 'RG', 'FB', 'P', 'C', 'LCB', 'MLB', 'LB', 'T',
             'WB', 'SS', 'DE', 'FS', 'DB', 'LT', 'S', 'DT', 'RCB', 'RDE', 'LDE', 'RE', 'RT', 'LLB',
             'WR/QB', 'LDT', 'LG', 'NT', 'CB', 'DL', 'RLB', 'RT/LT', 'OL']

# Create a dropdown for selecting a position
position_dropdown = widgets.Dropdown(
    options=positions,
    value='All',  # Default value
    description='Position:',
    disabled=False,
)

# Display the dropdown
display(position_dropdown)

Dropdown(description='Position:', options=('All', 'RB', 'K', 'TE', 'QB', 'WR', 'G', 'RG', 'FB', 'P', 'C', 'LCB…

In [4]:
def display_correlation_matrix(position):
    # Load and filter data
    merged_data = transform_data()
    if position != 'All':
        filtered_data = merged_data[merged_data['Position'] == position]
    else:
        filtered_data = merged_data
    
    # Drop irrelevant columns
    filtered_data.drop(columns=['Pos', 'School', 'Position', 'Team', 'Player', 'DraftedTeam'], inplace=True)

    # Display the number of players included
    num_players = len(filtered_data)
    print(f"Number of players included in the correlation matrix for '{position}': {num_players}")

    # Compute the correlation matrix
    corr_matrix = filtered_data.corr()

    # Set up the matplotlib figure
    plt.figure(figsize=(12, 10))

    # Draw the heatmap
    sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap='coolwarm')

    # Show the plot
    plt.show()

In [5]:
# Link the dropdown to the display_correlation_matrix function
widgets.interactive(display_correlation_matrix, position=position_dropdown)

interactive(children=(Dropdown(description='Position:', options=('All', 'RB', 'K', 'TE', 'QB', 'WR', 'G', 'RG'…

Features 2: Player Lifespan Caluclator

In [6]:
import pandas as pd
from transform import transform_data
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score


In [7]:
def filter_data():
    merged_data = transform_data()
    columns_to_drop = ['Pos', 'School', 'Position', 'Team', 'TotalFantasyPoints', 'TotalPprFantasyPoints', 'FantasyPointsPerGame',
       'Player', 'DraftedTeam']
    merged_data.drop(columns=columns_to_drop, inplace=True)
    merged_data = merged_data[merged_data['LastSeason'] < 2023]
    merged_data['YearsPlayed'] = merged_data['LastSeason'] - merged_data['FirstSeason']
    return merged_data

In [8]:
def train_model():
    filtered_data = filter_data()
    features = ['Wt', '40yd', 'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle']
    target = ['YearsPlayed']
    
    # Separate features and target variable
    X = filtered_data[features]
    y = filtered_data[target]
    
    # Handle missing values in features
    imputer = SimpleImputer(strategy='mean')
    X_imputed = imputer.fit_transform(X)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)
    
    # Train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    return model, imputer

In [9]:
model, imputer = train_model()

Run the following cell and input the data for your player:

In [10]:
# Create widgets for player input features
wt_input = widgets.FloatText(value=0, description='Weight (Wt):')
yd40_input = widgets.FloatText(value=0, description='40yd:')
vertical_input = widgets.FloatText(value=0, description='Vertical:')
bench_input = widgets.FloatText(value=0, description='Bench:')
broad_jump_input = widgets.FloatText(value=0, description='Broad Jump:')
cone3_input = widgets.FloatText(value=0, description='3Cone:')
shuttle_input = widgets.FloatText(value=0, description='Shuttle:')

# Display the widgets
input_widgets = widgets.VBox([wt_input, yd40_input, vertical_input, bench_input, broad_jump_input, cone3_input, shuttle_input])
display(input_widgets)

VBox(children=(FloatText(value=0.0, description='Weight (Wt):'), FloatText(value=0.0, description='40yd:'), Fl…

After inputting your player data, run the following cell to receive your prediction:

In [11]:

def make_prediction():
    input_stats = [wt_input.value, yd40_input.value, vertical_input.value, 
                   bench_input.value, broad_jump_input.value, cone3_input.value, shuttle_input.value]
    
    input_df = pd.DataFrame([input_stats], columns=['Wt', '40yd', 'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle'])
    
    # Impute missing values
    input_df_imputed = imputer.transform(input_df)

    # Make predictions
    predicted_years = model.predict(input_df_imputed)
    
    print(f"Predicted Years Played: {predicted_years[0]}")

make_prediction()

Predicted Years Played: [5.36792267]


Feature 3: Player Performance Calculator (Average Fantasy PPR/Game)

In [12]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer

In [13]:
def filter_data():
    merged_data = transform_data()
    columns_to_drop = ['Pos', 'School', 'Position', 'Team', 'TotalFantasyPoints', 'TotalPprFantasyPoints', 'FantasyPointsPerGame',
       'Player', 'DraftedTeam']
    merged_data.drop(columns=columns_to_drop, inplace=True)
    merged_data = merged_data[merged_data['LastSeason'] < 2023]
    merged_data['YearsPlayed'] = merged_data['LastSeason'] - merged_data['FirstSeason']
    return merged_data

In [14]:
def train_model():
    filtered_data = filter_data()
    features = ['Wt', '40yd', 'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle']
    target = ['PprFantasyPointsPerGame']
    
    # Separate features and target variable
    X = filtered_data[features]
    y = filtered_data[target]
    
    # Handle missing values in features
    imputer = SimpleImputer(strategy='mean')
    X_imputed = imputer.fit_transform(X)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)
    
    # Train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    return model, imputer

In [15]:
model, imputer = train_model()

Run the above four cells, then run the below cell and input your player stats.

In [16]:
# Create widgets for player input features
wt_input = widgets.FloatText(value=0, description='Weight (Wt):')
yd40_input = widgets.FloatText(value=0, description='40yd:')
vertical_input = widgets.FloatText(value=0, description='Vertical:')
bench_input = widgets.FloatText(value=0, description='Bench:')
broad_jump_input = widgets.FloatText(value=0, description='Broad Jump:')
cone3_input = widgets.FloatText(value=0, description='3Cone:')
shuttle_input = widgets.FloatText(value=0, description='Shuttle:')

# Display the widgets
input_widgets = widgets.VBox([wt_input, yd40_input, vertical_input, bench_input, broad_jump_input, cone3_input, shuttle_input])
display(input_widgets)

VBox(children=(FloatText(value=0.0, description='Weight (Wt):'), FloatText(value=0.0, description='40yd:'), Fl…

After inputting your player stats, run the following cell to receive your player's performance prediction (in PPR Fantasy points per game):

In [17]:
def make_prediction():
    input_stats = [wt_input.value, yd40_input.value, vertical_input.value, 
                   bench_input.value, broad_jump_input.value, cone3_input.value, shuttle_input.value]
    
    input_df = pd.DataFrame([input_stats], columns=['Wt', '40yd', 'Vertical', 'Bench', 'Broad Jump', '3Cone', 'Shuttle'])
    
    # Impute missing values
    input_df_imputed = imputer.transform(input_df)

    # Make predictions
    predicted_points = model.predict(input_df_imputed)
    
    print(f"Predicted PPR Fantasy Points Per Game: {predicted_points[0]}")

make_prediction()

Predicted PPR Fantasy Points Per Game: [19.86641464]
