## MUT20 Player Scraper Usage Example
- Date is only used to track scrape date, url is the first page of the pages you want to scrape on Muthead.

TODO: 
- Map archetype ids (int) to actual archetype name.

In [64]:
from MUTScraper20 import Player, PlayerHandler, JSONParser
import pandas as pd
%load_ext autoreload
%autoreload 2


def parse_json(date):
    jp = JSONParser(date=date)
    jp.load_json()
    jp.parse_json_items()
    jp.jsons_to_dataframe()
    jp.save_to_csv()
    return jp


date = '03-01-2020'
url = 'https://www.muthead.com/20/players/?overall__gte=90&overall__lte=93'

ph = PlayerHandler(date=date, url=url)
ph.handle_players()
jp = parse_json(date)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Number of pages: 22
https://www.muthead.com/20/players/?overall__gte=90&overall__lte=93
429 player links gathered.
Filtering player list, before: 429
Completed filtering, after: 17



## Viewing our data

In [65]:
import os
import pandas as pd


directory = 'data'
drop_duplicates = False
save = False

dfs = {}

for file in os.listdir(directory):
    if '.csv' in file:

        print(directory, file)
        df = pd.read_csv(os.path.join(directory, file))
        
        df = df[df.columns.difference(['Unnamed: 0'])]
        # dropping duplicates because of previous error
        if drop_duplicates:
            df = df.drop_duplicates(subset=df.columns.difference(['date_scraped']))
        if save:
            df.to_csv(os.path.join(directory, file), index=False)
            
        dfs[file.split('.csv')[0]] = df

data DEF.csv
data OFF.csv
data QB.csv
data ST.csv


In [68]:
def filter_df(df, positions, sort_by=None):
    if type(positions) != list: 
        positions = [positions]
    result = df[ df['position'].isin(positions)]
    if sort_by:
        return result.sort_values(sort_by, ascending=False)
    return result

#dfs['QB'].sort_values('OVR', ascending=False).head(10)

#filter_df(dfs['OFF'], 'HB', 'BTK')[['name', 'BTK', 'SPD', 'TRK', 'AGI']]
oline = filter_df(dfs['OFF'], ['LT', 'LG', 'C', 'RG', 'LT', 'TE'])#, 'TAK')[['name', 'TAK', 'POW', 'SPD', 'HT']]

In [69]:
oline[ (oline['WT'] >= 300) & (oline['RBK'] >= 88)].sort_values('RBK', ascending=False) \
    [['name', 'position', 'OVR','RBK', 'SPD', 'IMP', 'LBK', 'PBF', 'STR', 'AGI', 'AWR']].reset_index(drop=True).head(15)

Unnamed: 0,name,position,OVR,RBK,SPD,IMP,LBK,PBF,STR,AGI,AWR
0,Larry Allen,RG,94,94,66,93,89,90,95,62,89
1,Rodger Saffold III,LG,91,93,59,93,87,81,84,66,96
2,Bruce Matthews,LG,94,92,70,92,92,92,91,71,93
3,Travis Frederick,C,92,92,51,91,89,83,89,62,94
4,Alex Mack,C,91,92,60,93,92,81,89,66,95
5,Matt Birk,C,94,91,68,86,87,90,91,68,93
6,Marshal Yanda,RG,92,91,60,90,80,88,91,64,93
7,Matt Birk,C,94,91,68,86,87,90,91,68,93
8,Joe Thomas,LT,93,90,70,90,90,93,88,72,90
9,Brandon Scherff,RG,90,90,68,89,92,82,89,70,86


In [57]:
# Hester - 47: Unkown, Dante Hall 
archetype_map = {4: 'Field General (QB)', 16: 'Improvisor (QB)', 40: 'Scrambler (QB)', 45: 'Strong Arm (QB)',
                 5: 'Physical (WR)', 8: 'Route Running (WR)', 10: 'Deep Threat (WR)', 36: 'Slot (WR)',
                 47: 'Unknown',
                 26: 'Elusive Back (HB)', 30: 'Powerback (HB)', 33: 'Receiving Back (HB)',
                 15: 'Power (OT)', 7: 'Pass Protector (OT)',
                 11: 'Power (OG)', 19: 'Pass Protector (OG)', 46: 'Agile (OG)',
                 18: 'Power (C)', 38: 'Agile (C)', 39: 'Pass Protector (C)',
                 3: 'Vertical Threat (TE)', 14: 'Blocking (TE)', 44: 'Possession (TE)',
                 }

def get_archtype(arch_id):
    if arch_id in archetype_map.keys():
        return archetype_map[arch_id]
    else:
        return 'N/A'
    
    

In [70]:
# from: https://twitter.com/MUT_Leaks20/status/1156002794869911552
(x1, y1), (x2, y2) = (150, 0), (290, 0.8)
m1 = (y2-y1)/(x2-x1)
b1 = -m1 * 150

(x3, y3) = (350, 1.0)

def get_weight_contribution_rbk(weight):
    if weight <= 150: return 0
    elif weight <= 290: return m1*weight + b1
    return (1/300)*weight - (1/6)

def calc_run_block(rbk, weight, strength, vec=True):
    if vec:
        return (0.6 * rbk) + (0.175 * weight.apply(lambda x: get_weight_contribution_rbk(x))) + (0.225 * strength)
    else:
        return (0.6 * rbk) + (0.175 * get_weight_contribution_rbk(weight)) + (0.225 * strength)

In [76]:
#calc_run_block(79, 288, 94, False)

oline['RBK_calc'] = calc_run_block(oline['RBK'],oline['WT'], oline['STR'])

#tes = oline[ oline['position'] == 'TE']
#tes.sort_values('RBK_calc', ascending=False)

stat_cols = ['RBK', 'IMP', 'LBK', 'SPD', 'AGI']
oline['pulling_guard'] = oline[stat_cols].mean(axis=1)
oline.sort_values('pulling_guard', ascending=False)[['name', 'program', 'position', 'pulling_guard'] + stat_cols].head(15)
oline['all_around_guard'] = oline[['RBK_calc', 'PBF', 'PBP', 'pulling_guard']].mean(1)
oline.sort_values('all_around_guard', ascending=False)[['name', 'program', 'position', 'all_around_guard'] + stat_cols + ['PBF', 'PBP']].head(15)
oline[ oline['position'] == 'TE'].sort_values('RBK_calc', ascending=False)[['name', 'program', 'position', 'RBK_calc', 'RBK', 'WT', 'STR', 'HT', 'SPD']]

Unnamed: 0,name,program,position,RBK_calc,RBK,WT,STR,HT,SPD
245,Marcedes Lewis,Flashbacks,TE,70.542,87,267,81,78,74
298,J.J. Watt,Out of Position,TE,68.688,79,288,94,77,83
240,George Kittle,Series Redux,TE,67.822,84,247,77,76,85
73,George Kittle,MUT Heroes,TE,67.822,84,247,77,76,85
108,Delanie Walker,MUT Heroes,TE,67.598,84,248,76,74,82
283,Rob Gronkowski,Zero Chill,TE,67.09,79,265,87,78,86
159,Jason Witten,MUT Heroes,TE,64.688,78,263,79,78,74
155,Rob Gronkowski,Madden Ultimate Team 10,TE,64.015,75,265,84,78,83
258,Rob Gronkowski,Series Redux,TE,64.015,75,265,84,78,83
171,Austin Hooper,Football Outsiders,TE,62.873,78,248,71,76,80
