In [1]:
import glob
import pandas as pd
import numpy as np
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
data_path = '../data/projections/'
final_avg_file = 'final_averages_jaylee.xlsx'
hash_bball_file = 'hashtag_bball_projections.xlsx'

In [2]:
# SCRAPING DRAFT RESULTS
id = 2214353
league_id = id

In [3]:
# METHODS

def calculate_zscores(df):
    df_final = df.copy()
    punt_cats = ['fg%','ft%','3pm','ppg','rpg','apg','spg','bpg','tog']
    
    for col in punt_cats:
        z_score = (df_final[col]-df_final[col].mean()) / df_final[col].std()
        if col == 'tog':
            df_final[col+'_Z'] = -1 * z_score.round(2)
        elif col!='tog':
            df_final[col + '_Z'] = z_score.round(2)
        else:
            pass
        
    df_final = _calculate_total_value(df_final)

    return df_final


def punt_cats(df,punt_list=['tog']):
    df_final = df.copy()
    for cat in punt_list:
        df_final.drop(columns=[cat,cat+'_Z'],inplace=True)
        if cat == 'fg%':
            df_final.drop(columns=['fg','fga'],inplace=True)
        elif cat == 'ft%':
            df_final.drop(columns=['ft','fta'],inplace=True)
        else:
            continue

    df_final = _calculate_total_value(df_final)

    return df_final


def _calculate_total_value(df):
    value_cols = []

    for col in df.columns:
        if col.endswith('_Z'):
            value_cols.append(col)

    df['Total'] = df[value_cols].sum(axis=1)
    df = df.sort_values('Total', ascending=False).reset_index(drop=True)

    return df


def _scrape_results(league_id=0000):
    url = "https://basketball.fantasysports.yahoo.com/nba/{0}/draftresults".format(league_id)
    players = []

    html = urlopen(url)
    soup = BeautifulSoup(html)

    for line in soup.find_all('td'):
        if line.a == None:
            continue
        else:
            players.append(line.a.get_text())

    return players


def find_best_available(df,league_id=0000):
    """
    Scrape draft results and store the player names being taken and recorded

    Take out row based on player name

    Every time you run the returning DataFrame provides only names of those available

    :param df:
    :return:
    """

    players_drafted = _scrape_results(league_id)

    filter = df['Player'].isin(players_drafted)
    df = df.loc[~filter].head(20).reset_index(drop=True)

    return df


def _clean_player_names(df):
    player_names = []
    camel_case_last_names = ('LaVine','VanVleet','DeRozan','DiVincenzo','McGee',"O'Neale",'LeVert')
    for player in df.Player:
        
        player_list = player.split()[:3]
        
        if len(player_list) > 2 and (player_list[2].startswith('Jr.') or player_list[2].startswith('II')):
            
            prefix = player_list[2]
            
            if prefix.startswith('Jr.'):
                split_pre =  re.findall('[A-Z][^A-Z]*', prefix)
            elif prefix.startswith('II'):
                split_pre = re.findall('I+', prefix)
            
            player_list[2] = split_pre[0]
            player_string = ' '.join(player_list)
            player_names.append(player_string)
            
        else:
            
            player_list = player_list[:2]
            last_name = player_list[1]
            
            if last_name.startswith(camel_case_last_names) and last_name not in camel_case_last_names:
                split_last = re.findall('[A-Z].[A-Z][^A-Z]*', last_name)
            elif last_name in camel_case_last_names:
                split_last = [last_name]
            else:
                split_last = re.findall('[A-Z][^A-Z]*', last_name)
            
            player_list[1] = split_last[0]
            player_string = ' '.join(player_list)
            player_names.append(player_string)
   
    player_col = pd.Series(player_names)
    return player_col


def _separate_percentages(col):
    
    list_percentages = []
    list_made = []
    list_attempts = []
    
    for percentage in col:
        stat = percentage.split('(')
        perc = stat[0]
        m = stat[1].split('/')[0]
        a = stat[1].split('/')[1].split(')')[0]
        list_percentages.append(perc)
        list_made.append(m)
        list_attempts.append(a)
    
    col_percent = pd.Series(list_percentages)
    col_made = pd.Series(list_made)
    col_attempt = pd.Series(list_attempts)
    
    return col_percent, col_made, col_attempt
    

def clean_hashtag_df(df):
    df_final = df.copy()
    df_final.drop(columns=['Pos','Tm','G','Mp','Total'],inplace=True)
    df_final['Player'] = _clean_player_names(df_final)
    df_final['fg%'],df_final['fg'],df_final['fga'] = _separate_percentages(df_final['fg%'])    
    df_final['ft%'],df_final['ft'],df_final['fta'] = _separate_percentages(df_final['ft%'])

    # Re-arrange order of columns
    columns = ['Player','fg%','fg','fga','ft%','ft','fta','3pm','ppg',
               'rpg','apg', 'spg','bpg','tog']
    
    for col in df_final.columns[1:]:
        df_final[col] = pd.to_numeric(df_final[col])

    df_final = df_final[columns]
    
    if len(df_final) < len(df):
        print("Warning! Cleaning data removed player(s)")
    
    return df_final

In [4]:
hash_bball = pd.read_excel(data_path + hash_bball_file, engine='openpyxl')

In [8]:
# Copy data
df = hash_bball.copy()
df = clean_hashtag_df(df)

In [None]:
# Clean data

In [11]:
# Adjust Top Rankings 
df_100 = df.head(100)
df_150 = df.head(150)
df_200 = df.head(200)

In [17]:
df_100.loc[df_100['Player']=="Shai Gilgeous-Alexander"]

Unnamed: 0,Player,fg%,fg,fga,ft%,ft,fta,3pm,ppg,rpg,apg,spg,bpg,tog


In [None]:
# PUNT STRATEGIES

#fg% and to
df_fgto = punt_cats(df_hash,['fg%','tog'])
df_fgto

In [14]:
def _scrape_by_team(name,id):
    url = "https://basketball.fantasysports.yahoo.com/nba/{0}/draftresults".format(league_id)
    players_selected = []

    html = urlopen(url)
    soup = BeautifulSoup(html)

    for line in soup.find_all('td'):
        if line.a == None:
            continue
        else:
            #if team name == name:
            players_selected.append(line.a.get_text())

    return players_selected


# def track_team_stats(df_hash,name,id=0000):
#     # scrape results based on a team name
#     players_selected = _scrape_by_team(name,id)
#     my_team_df = df_hash.loc[df_hash['Player'].isin(players_selected)]
    
#     # Add all values by row and create a new TOTAL TEAM row with all summed values per column
#     # Read just the percentages based on fgm/fga, ftm/fta
    
#     return df

def track_team_stats(df,player_list):
    
#     players_selected = _scrape_by_team(name,id)
    
    team_df = df.loc[df['Player'].isin(player_list)]
    team_df = team_df.append(team_df.sum(numeric_only=True), ignore_index=True)
    team_df = team_df.replace(np.nan, 'TOTALS')
    
    # Add all values by row and create a new TOTAL TEAM row with all summed values per column
    # Read just the percentages based on fgm/fga, ftm/fta
    
    return team_df

In [None]:
selected = ["D'Angelo Russell", "Fred VanVleet", "Collin Sexton", "Devin Booker", "Lauri Markkanen","Kawhi Leonard",
            "Jaren Jackson Jr.","Al Horford","Josh Richardson","Otto Porter Jr.","Killian Hayes","Terry Rozier","Maxi Kleber"]

In [None]:
track_team_stats(df_hash,selected)

In [12]:
ayoo = ['Stephen Curry','Shai Gilgeous-Alexander','Ben Simmons','Brandon Ingram','Anthony Davis','Joel Embiid','Domantas Sabonis','Thomas Bryant','Jrue Holiday','Buddy Hield','Jonas Valanciunas','Dejounte Murray','Kelly Oubre Jr.']

In [15]:
track_team_stats(df_150,ayoo)

Unnamed: 0,Player,fg%,fg,fga,ft%,ft,fta,3pm,ppg,rpg,apg,spg,bpg,tog
0,Anthony Davis,0.503,9.0,17.9,0.847,7.3,8.6,1.3,26.6,9.4,3.2,1.5,2.3,2.5
1,Stephen Curry,0.459,8.0,17.5,0.911,6.2,6.8,4.1,26.2,5.4,6.4,1.2,0.5,3.8
2,Joel Embiid,0.478,7.8,16.3,0.811,7.2,8.9,1.1,23.9,12.2,3.1,0.9,1.5,3.2
3,Ben Simmons,0.564,6.8,12.0,0.634,3.3,5.2,0.0,16.9,8.0,8.1,2.1,0.6,3.5
4,Domantas Sabonis,0.542,7.2,13.3,0.73,3.3,4.5,0.5,18.2,11.8,4.4,0.7,0.5,2.6
5,Brandon Ingram,0.464,7.1,15.4,0.805,4.2,5.2,2.6,21.1,4.9,3.5,1.0,0.7,3.0
6,Jrue Holiday,0.463,6.9,14.9,0.749,2.0,2.7,1.8,17.6,4.3,5.1,1.5,0.7,2.9
7,Jonas Valanciunas,0.564,6.1,10.7,0.747,2.1,2.8,0.5,14.7,10.4,2.0,0.4,1.0,1.9
8,Dejounte Murray,0.464,5.0,10.7,0.824,1.6,1.9,0.8,12.3,6.4,4.5,1.9,0.3,2.2
9,Thomas Bryant,0.594,5.5,9.3,0.738,1.8,2.4,0.8,13.6,7.1,1.8,0.5,1.1,1.2
