# Website Notebook

In [136]:
# Standard imports
# If any of these don't work, try doing `pip install _____`, or try looking up the error message.
import numpy as np
import pandas as pd
import json
import time
import os.path
from os import path
import math
import datetime
import unidecode
import requests
from bs4 import BeautifulSoup

In [137]:
# Modules from sportsrefernece.ncaab for college basketball
from sportsreference.ncaab.boxscore import Boxscore as NCAAB_Boxscore
from sportsreference.ncaab.conferences import Conferences as NCAAB_Conferences
from sportsreference.ncaab.rankings import Rankings as NCAAB_Rankings
from sportsreference.ncaab.roster import Player as NCAAB_Player
from sportsreference.ncaab.roster import Roster as NCAAB_Roster
from sportsreference.ncaab.schedule import Schedule as NCAAB_Schedule
from sportsreference.ncaab.teams import Teams as NCAAB_Teams

# Modules from sportsrefernece.nba for NBA basketball
from sportsreference.nba.boxscore import Boxscore as NBA_Boxscore
from sportsreference.nba.roster import Player as NBA_Player
from sportsreference.nba.roster import Roster as NBA_Roster
from sportsreference.nba.schedule import Schedule as NBA_Schedule
from sportsreference.nba.teams import Teams as NBA_Teams

# Introduction

Hi everyone! In this notebook, we will be constructing a variety of different machine learning models to predict NBA rookie statlines from different college players input from the Anvil App!

Let's get started!

# A. College Data

**In order for us to pass up college stats for this player to be viewed in the front-end, we will need to:**
1. Clean the name
2. Find the college player stats for the input name
    - If they don't exist, tell the user that they don't, and provide the correct error message

### 1. Find college player stats for input name

In [217]:
# This method should hopefully reduce the number of failure cases.
def convert_nba_ncaa_name(name: str) -> str:
    """
    Converts the format of the NBA player_id to the NCAA player_id.

    You may want to elaborate on the logic on this function to reduce the number of failure cases later.
    """
    return unidecode.unidecode(name.lower().replace(" ", "-") + "-1")

### 2. Find the college player stats for the input name

In [218]:
def get_college_stats(player_name: str) -> dict:
    """
    Return a dictionary with the following keys:
    
    success: If the player was correctly found (boolean)
    data: Player data (pd.Dataframe)
    error: Error message, if success is false (string)
    """
    clean_name = convert_nba_ncaa_name(player_name)
    response = {}
    
    # This check if for players with hyphens in their name
    if '-' in player_name:
        response['success'] = False
        response['data'] = None
        response['error'] = 'This player has a hyphen in his name, and unfortunately, we cannot handle this type of data. Please re-try with a player without a hyphen in their name!'
        return response
    
    # Checking if player does exist with current name
    try:
        player_data = NCAAB_Player(clean_name).dataframe
    except TypeError:
        response['success'] = False
        response['data'] = None
        response['error'] = "This player doesn't exist (or its name is not in the correct format). Passing up empty list."
        return response

    player_data.rename(columns=lambda x: 'NCAAB_' + x, inplace=True)
    last_year = player_data.iloc[[player_data.shape[0] - 2]]
    last_year.set_index(pd.Index(data=[player_name], name='Name'), inplace=True)
    
    response['success'] = True
    response['data'] = last_year # Change for Matt's formatting
    response['error'] = None
    return response

In [283]:
ayton = get_college_stats('Deandre Ayton')

In [292]:
ayton['data']

Unnamed: 0_level_0,NCAAB_assist_percentage,NCAAB_assists,NCAAB_block_percentage,NCAAB_blocks,NCAAB_box_plus_minus,NCAAB_conference,NCAAB_defensive_box_plus_minus,NCAAB_defensive_rebound_percentage,NCAAB_defensive_rebounds,NCAAB_defensive_win_shares,...,NCAAB_true_shooting_percentage,NCAAB_turnover_percentage,NCAAB_turnovers,NCAAB_two_point_attempts,NCAAB_two_point_percentage,NCAAB_two_pointers,NCAAB_usage_percentage,NCAAB_weight,NCAAB_win_shares,NCAAB_win_shares_per_40_minutes
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Deandre Ayton,10.2,57,6.1,66,10.9,pac-12,4.0,28.2,287,2.1,...,0.65,11.3,69,416,0.635,264,26.6,250,7.6,0.259


# B. Make Predictions

**In order for us to make predictions from the college data to be rendering in the front-end, we need to:**
1. Drop unneeded columns
2. Clean the data the same way we did in our initial data

### 1. Drop unneeded columns

In [284]:
def drop_columns(data: pd.DataFrame) -> pd.DataFrame:
    """
    Drops the columns from the input college data.
    """
    def drop_for_nans(data):
        # Drops columns due to NaNs inside the data.
        drop_col = [
            'NCAAB_box_plus_minus',
            'NCAAB_defensive_box_plus_minus',
            'NCAAB_offensive_box_plus_minus',
            'NCAAB_player_efficiency_rating',
            'NCAAB_three_point_percentage',
        ]
        return data.drop(columns=drop_col)
    
    def drop_for_qual(data):
        # Drops columns due to qualitative data
        qual_drop = [
            'NCAAB_conference', 
            'NCAAB_player_id',
            'NCAAB_team_abbreviation',
        ]
        return data.drop(columns=qual_drop)
    
    return drop_for_nans(drop_for_qual(data))

### 2. Clean the data the same way we did initially

In [285]:
new_ayton = drop_columns(ayton['data'])

In [286]:
def clean_qualitative(data: pd.DataFrame) -> pd.DataFrame:
    """
    Cleans the qualitative columns (height, position)
    """

    def convert_height(height: str) -> int:
        """
        Convert height from string to int (6-11 -> 83)
        """
        feet, inches = height.split("-")
        return int(feet) * 12 + int(inches)

    def clean_position(position: str) -> str:
        """
        If the player has a hypened position, remove the second one.
        """
        return position.split('-')[0]
    
    def one_hot_position(player):
        """
        Applies one hot encoding to the player's position.
        """
        positions = positions = ['Center', 'Forward', 'Guard']
        player[positions] = pd.DataFrame([
            [int(p == 'Forward') for p in positions]
        ], index=new_ayton.index)
        return player
    
    data['NCAAB_height'] = data['NCAAB_height'].apply(convert_height)
    data['NCAAB_position'] = data['NCAAB_position'].apply(clean_position)
    return one_hot_position(data)

In [287]:
clean_ayton = clean_qualitative(new_ayton)

In [288]:
def feature_extraction(data: pd.DataFrame) -> pd.DataFrame:
    """
    Takes the columns we've qualitative selected from the dataset to be our features for our model.
    """
    columns_to_keep = [
        'NCAAB_assists',
        'NCAAB_blocks',
        'NCAAB_field_goal_attempts',
        'NCAAB_field_goal_percentage',
        'NCAAB_field_goals',
        'NCAAB_free_throw_attempt_rate',
        'NCAAB_free_throw_attempts',
        'NCAAB_free_throw_percentage',
        'NCAAB_free_throws',
        'NCAAB_games_played',
        'NCAAB_games_started',
        'NCAAB_height',
        'NCAAB_personal_fouls',
        'NCAAB_points',
        'NCAAB_steal_percentage',
        'NCAAB_steals',
        'NCAAB_three_point_attempt_rate',
        'NCAAB_three_point_attempts',
        'NCAAB_total_rebound_percentage',
        'NCAAB_total_rebounds',
        'NCAAB_turnover_percentage',
        'NCAAB_turnovers',
        'NCAAB_two_point_attempts',
        'NCAAB_two_point_percentage',
        'NCAAB_win_shares',
        'Guard',
        'Forward',
        'Center',
    ]
    return data[columns_to_keep]

In [289]:
final_ayton = feature_extraction(clean_ayton)

In [290]:
final_ayton

Unnamed: 0_level_0,NCAAB_assists,NCAAB_blocks,NCAAB_field_goal_attempts,NCAAB_field_goal_percentage,NCAAB_field_goals,NCAAB_free_throw_attempt_rate,NCAAB_free_throw_attempts,NCAAB_free_throw_percentage,NCAAB_free_throws,NCAAB_games_played,...,NCAAB_total_rebound_percentage,NCAAB_total_rebounds,NCAAB_turnover_percentage,NCAAB_turnovers,NCAAB_two_point_attempts,NCAAB_two_point_percentage,NCAAB_win_shares,Guard,Forward,Center
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Deandre Ayton,57,66,451,0.612,276,0.424,191,0.733,140,35,...,21.4,405,11.3,69,416,0.635,7.6,0,1,0


In [293]:
### Make this a hook for the website

def get_college(name):
    """
    Return a one-item list with the dictionary in it.
    If doesn't exist, return empty list
    """
    player_data = get_college_stats('Deandre Ayton')
    post_drop = drop_columns(player_data['data'])
    clean_data = clean_qualitative(post_drop)
    final_data = feature_extraction(clean_data)
    return final_data

In [None]:
def get_predicts(name):
    """
    Return a one-item list with the dictionary in it.
    If doesn't exist, return empty list
    """
    return