# Scraping Overwatch Data

The purpose of this notebook is to demonstrate the use of BeautifulSoup to scrape data from the web. Here we are scraping the accuracy leaderboards of masteroverwatch.com to find the top 50 most accurate Overwatch players in the world.

In [1]:
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import re
import string as strang


url = 'https://masteroverwatch.com/leaderboards/pc/global/category/accuracy'
request = get(url)

soup = BeautifulSoup(request.text, 'html5lib')

In [2]:
table = soup.find_all('div', {'class':'table-row-content'})


In [3]:
heroes = ['doomfist','genji', 'mccree', 'pharah', 'reaper', 'soldier-76', 'sombra',
         'tracer', 'bastion', 'hanzo', 'junkrat','mei','torbjorn', 'widowmaker',
         'dva', 'orisa', 'reinhardt', 'roadhog', 'winston', 'zarya', 'ana', 
          'brigitte','lucio', 'mercy', 'moira', 'symmetra', 'zenyatta']

In [4]:
rank = []
battletag = []
rating = []
accuracy = []
wins = []
losses= []
winslosses = []
kdratio = []
killsdeaths = []
timeplayed = []
timeonfire = []
favechar = []

for x in range(1, len(table)):
    row =  table[x]
    fave_heroes = str(table[x].find('span', {'class':'heroes-hero'}))
    
    
    #rank
    rank.append(row.find('div', {'class':'table-column-left table-rank'}).text)
    battletag.append(re.sub('\n\s+','',row.find('div', {'class':'table-column-left table-name'}).strong.text))
    rating.append(re.sub('Rating ', '', row.find('div', {'class':'table-column-left table-name'}).small.text))
    accuracy.append(row.find('div', {'class':'table-main-value col-xs-3'}).strong.text)
    wins.append(re.sub('W','',row.find('span', {'class':'bar-wins'}).text))
    losses.append(re.sub('L','',row.find('span', {'class':'bar-losses'}).text))
    winslosses.append(row.find('div', {'class':'bar-outer'}).text)
    kdratio.append(row.find('div', {'class':"table-kd-ratio col-xs-2"}).strong.text)
    killsdeaths.append(row.find('div', {'class':"table-kd-ratio col-xs-2"}).small.text)
    timeplayed.append(row.find('div', {'class':'time-played'}).text)
    timeonfire.append(row.find('div', {'class':'time-fire'}).text)
    favechar.append(''.join([hero for hero in heroes if hero in fave_heroes]))

In [5]:
overwatch = pd.DataFrame({'Rank':rank,
                          'BattleTag':battletag, 
                          'Rating':rating,
                         'Accuracy': accuracy,
                         'Wins':wins,
                         'Losses':losses,
                        'Win_Percent':winslosses,
                        'KD_Ratio':kdratio,
                        'KillsDeaths':killsdeaths,
                        'TimePlayed':timeplayed,
                        'TimeonFire':timeonfire,
                        'FaveChar':favechar})
overwatch

Unnamed: 0,Rank,BattleTag,Rating,Accuracy,Wins,Losses,Win_Percent,KD_Ratio,KillsDeaths,TimePlayed,TimeonFire,FaveChar
0,#1,Jhezz93,2167,58.3%,72,63,53.3%,0.58,545 / 947,25 hours,1 hour,mercy
1,#2,CARD,3657,53%,15,4,78.9%,2.55,591 / 232,3 hours,44 minutes,pharah
2,#2,TakeOver,4237,53%,7,9,43.8%,2.58,416 / 161,3 hours,34 minutes,widowmaker
3,#4,Blue,846,52.7%,45,117,27.8%,2.45,"2,334 / 954",22 hours,4 hours,lucio
4,#5,JoEy,2681,52.3%,9,2,81.8%,4.49,328 / 73,2 hours,23 minutes,zarya
5,#5,Tauriel,2224,52.3%,72,64,52.9%,4.07,"3,092 / 760",26 hours,3 hours,widowmaker
6,#7,Pharah,3783,52%,94,97,49.2%,2.17,"5,138 / 2,372",39 hours,5 hours,pharah
7,#8,ĊĿǏŊTŴǾǾD,3091,51%,22,6,78.6%,2.89,765 / 265,5 hours,1 hour,hanzo
8,#8,KorraSpirit,3000,51%,5,5,50%,2.42,281 / 116,2 hours,21 minutes,pharah
9,#10,MYLEAVE,3579,50.6%,12,6,66.7%,3.03,527 / 174,3 hours,45 minutes,tracer
