# Web Scraping MUTHead with BeautifulSoup and Requests 
## + Plotly
- ** Matthew Johnson, Oct 6, 2018**

In [1]:
%matplotlib inline
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt 
import seaborn as sns
import urllib
import pandas as pd
import requests
import warnings; warnings.simplefilter('ignore')  
import time
import re

**Method to collect stats given a BeautifulSoup item:**

In [161]:
def get_player_stats(soup, name):

    player_dict = {}

    for sec_tag in soup.find_all('section', {'class':'player-details'}):
    
        for span_tag in sec_tag.find_all('ul', {'class':'player-details-stats'}):
    
            for i, li_tag in enumerate(sec_tag.find_all('li')):
            
                if i not in [0, 7, 15, 21]:
                    x = str(li_tag)
                    x1 = x.lstrip('<li data-stat-abbreviation="')
                    stat = x1[0:3]
                    rating = re.sub('[^0-9]','', str(li_tag))[0:2]
                
                    player_dict.update({stat : rating})
                    if i==29:
                        break
                        
                        
    return pd.DataFrame([player_dict], index=[name]) 

**Getting player links from the search page filtered with WRs and set to 88+ OVR:**

In [33]:
links = []

for i in range(1,3):
    wr_url = f'https://www.muthead.com/19/players?filter-market=3&filter-ovr-min=88&filter-position=8&page={i}'
    response = requests.get(wr_url)
    soup = BeautifulSoup(response.text, 'lxml')
    
    for link in soup.findAll('a', attrs={'href': re.compile("19/players/")}):
        links.append(link.get('href'))

new_links = [s for s in links if 'prices' not in s]
new_links = list(set(new_links))

**Getting player stats from the links we just gathered:**

In [162]:
prefix = 'https://www.muthead.com'

player_list= []

for link in new_links:
    new_url = prefix + link + 'upgrades'
    response = requests.get(new_url)
    player_name = ' '.join(link.split('-')[1:])
    soup = BeautifulSoup(response.text, 'html.parser')
    player_list.append( get_player_stats(soup, player_name) )
    time.sleep(2)
    
player_df = pd.concat(player_list)

In [202]:
player_df.head()

Unnamed: 0,ACC,AGI,AWR,BCV,BTK,CAR,CIT,CTH,DRR,ELU,...,RBK,RBP,RLS,SFA,SPC,SPD,SPM,SRR,STR,TRK
tim brown,90,91,91,89,81,84,88,89,82,88,...,55,44,85,60,86,88,84,90,61,60
herman moore,89,91,91,84,82,75,89,91,91,80,...,51,43,89,63,91,89,81,89,68,61
julio jones,86,90,88,90,81,73,90,90,85,83,...,62,41,89,78,92,87,78,87,72,69
odell beckham jr,88,91,88,87,82,66,85,84,86,87,...,58,40,85,69,92,88,82,88,43,36
doug baldwin,84,86,90,84,80,70,89,91,83,83,...,60,38,82,46,89,84,75,88,48,28


In [172]:
for _ in player_df.columns:
    player_df[_] = player_df[_].astype(int)

In [175]:
player_df.to_csv('mut19_wrs_oct6.csv')

In [167]:
player_df.columns
cols = ['CIT', 'CTH', 'SPC', 'DRR', 'MRR', 'SRR', 'SPD', 'STR'] # 'RLS','SPM',

**RBs:**

In [None]:
links2 = []

for i in range(1,2):
    rb_url = 'https://www.muthead.com/19/players?filter-ovr-min=88&filter-market=3&&filter-position=2'
    response = requests.get(rb_url)
    soup = BeautifulSoup(response.text, 'lxml')
    
    for link in soup.findAll('a', attrs={'href': re.compile("19/players/")}):
        links2.append(link.get('href'))
        
rb_links = [s for s in links2 if 'prices' not in s]
rb_links = list(set(rb_links))

In [227]:
def get_rb_stats(soup, name):

    player_dict = {}

    for sec_tag in soup.find_all('section', {'class':'player-details'}):
    
        for span_tag in sec_tag.find_all('ul', {'class':'player-details-stats'}):
    
            for i, li_tag in enumerate(sec_tag.find_all('li')):
            
                #if i not in [0, 7, 15, 21]:
                    x = str(li_tag)
                    x1 = x.lstrip('<li data-stat-abbreviation="')
                    stat = x1[0:3]
                    rating = re.sub('[^0-9]','', str(li_tag))[0:2]
                
                    player_dict.update({stat : rating})
                        
    return pd.DataFrame([player_dict], index=[name])[['ACC', 'AGI', 'BCV', 'BTK', 'CAR',
       'CIT', 'CTH', 'DRR', 'ELU', 'JKM', 'JMP', 'MRR', 'SFA', 'SPC', 'SPD',
       'SPM', 'SRR', 'STR', 'TRK']]

In [229]:
prefix = 'https://www.muthead.com'

rb_list= []

for link in rb_links:
    new_url = prefix + link + 'upgrades'
    response = requests.get(new_url)
    player_name = ' '.join(link.split('-')[1:])
    soup = BeautifulSoup(response.text, 'html.parser')
    rb_list.append( get_rb_stats(soup, player_name) )
    time.sleep(2)
    
rb_df = pd.concat(rb_list)

In [230]:
rb_df.head()

Unnamed: 0,ACC,AGI,BCV,BTK,CAR,CIT,CTH,DRR,ELU,JKM,JMP,MRR,SFA,SPC,SPD,SPM,SRR,STR,TRK
christian mccaffrey,91,94,84,82,87,81,83,69,90,91,86,77,74,82,90,92,84,73,61
eddie george,88,84,87,88,88,64,62,52,76,77,73,55,88,50,85,79,64,84,87
barry sanders,90,90,90,88,86,67,68,48,89,89,76,61,75,62,89,89,71,65,69
devonta freeman,88,92,90,87,87,67,70,48,92,93,82,56,74,73,88,93,63,72,86
leonard fournette,89,86,90,87,90,54,66,37,78,85,67,45,88,52,89,87,53,82,93


In [232]:
for _ in rb_df.columns:
    rb_df[_] = rb_df[_].astype(int)

In [233]:
rb_df.to_csv('mut19_rbs_oct6.csv')

### Learning Plotly 

In [190]:
import plotly as py
py.tools.set_credentials_file(username='perronfrobenius',api_key='YJeDxGA0UkLpkTzGqjkp')

In [215]:
import plotly.plotly as py
import plotly.graph_objs as go

**Plot with hover attributes:**

In [206]:
# cols = ['CIT', 'CTH', 'SPC', 'DRR', 'MRR', 'SRR', 'SPD', 'STR'] # 'RLS','SPM',

df = player_df

trace1 = go.Splom(dimensions=[dict(label='CIT',
                                 values=df['CIT']),
                            dict(label='CTH',
                                 values=df['CTH']),
                            dict(label='SPC',
                                 values=df['SPC']),
                            dict(label='SPD',
                                 values=df['SPD']),
                            dict(label='DRR',
                                 values=df['DRR']),
                            dict(label='MRR',
                                 values=df['MRR']),
                             dict(label='STR',
                                 values=df['STR'])],
                text=df.index.values,
                marker=dict(size=7,
                            showscale=False,
                            line=dict(width=0.5,
                                      color='rgb(230,230,230)'))
                )


axis = dict(showline=True,
          zeroline=False,
          gridcolor='#fff',
          ticklen=4)

layout = go.Layout(
    title='MUT19 Wide Receivers (88+ OVR)',
    dragmode='select',
    width=1000,
    height=1000,
    autosize=False,
    hovermode='closest',
    plot_bgcolor='rgba(240,240,240, 0.95)',
    xaxis1=dict(axis), xaxis2=dict(axis), xaxis3=dict(axis), xaxis4=dict(axis),
    xaxis5=dict(axis), xaxis6=dict(axis), xaxis7=dict(axis), xaxis8=dict(axis),
    yaxis1=dict(axis), yaxis2=dict(axis), yaxis3=dict(axis), yaxis4=dict(axis),
    yaxis5=dict(axis), yaxis6=dict(axis), yaxis7=dict(axis), yaxis8=dict(axis)
)
trace1['diagonal'].update(visible=False)
trace1['showupperhalf']=False
fig1 = dict(data=[trace1], layout=layout)
py.iplot(fig1)

In [234]:
# cols = ['CIT', 'CTH', 'SPC', 'DRR', 'MRR', 'SRR', 'SPD', 'STR'] # 'RLS','SPM',

df = rb_df

trace1 = go.Splom(dimensions=[dict(label='SPD',
                                 values=df['SPD']),
                            dict(label='SPM',
                                 values=df['SPM']),
                            dict(label='AGI',
                                 values=df['AGI']),
                            dict(label='ELU',
                                 values=df['ELU']),
                            dict(label='TRK',
                                 values=df['TRK']),
                            dict(label='BTK',
                                 values=df['BTK']),
                             dict(label='STR',
                                 values=df['STR'])],
                text=df.index.values,
                marker=dict(size=7,
                            showscale=False,
                            line=dict(width=0.5,
                                      color='rgb(230,230,230)'))
                )


axis = dict(showline=True,
          zeroline=False,
          gridcolor='#fff',
          ticklen=4)

layout = go.Layout(
    title='MUT19 Running Backs (88+ OVR)',
    dragmode='select',
    width=1000,
    height=1000,
    autosize=False,
    hovermode='closest',
    plot_bgcolor='rgba(240,240,240, 0.95)',
    xaxis1=dict(axis), xaxis2=dict(axis), xaxis3=dict(axis), xaxis4=dict(axis),
    xaxis5=dict(axis), xaxis6=dict(axis), xaxis7=dict(axis), xaxis8=dict(axis),
    yaxis1=dict(axis), yaxis2=dict(axis), yaxis3=dict(axis), yaxis4=dict(axis),
    yaxis5=dict(axis), yaxis6=dict(axis), yaxis7=dict(axis), yaxis8=dict(axis)
)
trace1['diagonal'].update(visible=False)
trace1['showupperhalf']=False
fig1 = dict(data=[trace1], layout=layout)
py.iplot(fig1)