In [1]:
"""Scrapes a WC3 profile page from battlenet and returns json."""
from bs4 import BeautifulSoup
import requests
import re
import dateparser
import pandas as pd
from config import data_positions

In [4]:
class Profile:
    def __init__(self, player, server):
        self.player = player
        self.server = server
        self.params = {'PlayerName': self.player, 'Gateway': self.server}
        self.url = 'http://classic.battle.net/war3/ladder/w3xp-player-profile.aspx?'
        self.soup = self.get_soup()
        self.tables = self._parse_tables()
    
    def get_soup(self):
        try:
            r = requests.get(self.url, params=self.params)
        except requests.exceptions.RequestException as e:
            print(e)
            
        return BeautifulSoup(r.content, 'lxml')
    
    def parse(self):
        data = {}
        data['info'] = self.information
        data['individual'] = self.individual_data
        data['team'] = self.team_data
        data['main_race'] = self.main_race
        return data
    
    def _parse_tables(self):
        soup = self.soup.find('table', class_='mainTable')
        tables = soup.find_all('td', {'align': 'center', 'valign': 'top'})
        tables = dict(zip(['info', 'individual', 'team'], tables))
        return tables

    @property
    def information(self):
        data = {}
        clan = self.clan
        data['clan'] = clan
        data['player'] = self.player
        data['server'] = self.server
        data['home_page'] = self.home_page
        data['additional_info'] = self.parse_additional_info
        data['last_ladder_game'] = self.last_ladder_game
        return data
    
    @property
    def home_page(self):
        soup = self.tables.get('info')
        home_page = soup.find('div', {'id': 'homePage'})
        if home_page is None:
            return
        if home_page is not None:
            return home_page.b.get_text().strip()
   
    @property
    def parse_additional_info(self):
        soup = self.tables.get('info')
        additional_info_div = soup.find('div', {'id': 'additionalInfo'})
        script = additional_info_div.script.get_text()
        text_start = 'document.write("'
        i = script.find(text_start)
        if i == -1:
            additional_info = None
        else:
            i += len(text_start)

        text_end = '");'
        j = script[i:].find(text_end)
        additional_info = script[i:i+j]

        return additional_info

    @property
    def last_ladder_game(self):
        soup = self.tables.get('info')
        last_ladder_game = soup.find(text='Last Ladder Game:').parent.parent.b.get_text()
        last_ladder_game = str(dateparser.parse(last_ladder_game).date())
        return last_ladder_game

    @property
    def main_race(self):
        soup = self.tables.get('info')
        overall_stats_table = soup.find('td', class_='rankingHeader').parent.parent
        rows = overall_stats_table.find_all('tr')[1:-1]
        df = self.parse_stats_table(rows)
        i = df['total_games'].idxmax()

        if len(df[df['percentage_games'] >= 75]) != 0:
            main_race = df[df['percentage_games'] >= 75]['race'].values[0]
        else:
            main_race = 'No main race'

        return main_race

    @staticmethod
    def parse_stats_table(rows):
        data = []
        keys = ['race', 'wins', 'losses', 'win_percentage']

        for row in rows:
            row = [x.get_text().strip() for x in row.find_all('td')]
            row = dict(zip(keys, row))
            row['num_games'] = int(row['losses']) + int(row['wins'])
            data.append(row)

        df = pd.DataFrame(data)
        df['percentage_games'] = df['num_games'] * 100 / sum(df['num_games'])
        df['race'] = df['race'].apply(lambda x: x.lower().replace(':', ''))

        for col in ['wins', 'losses']:
            df[col] = df[col].astype(int)

        df['total_games'] = df['losses'] + df['wins']

        return df

    @property
    def individual_data(self):
        soup = self.tables.get('individual')
        type_ = 'individual'
        game_types = ['Team Games', 'Solo Games', 'FFA Games']
        vocab = {'Team Games': 'random_team', 'Solo Games': 'solo', 'FFA Games': 'free_for_all'}
        data = {}

        for game_type, new_key in vocab.items():
            container = soup.find(text=game_type)
            if container is not None:
                table = container.parent.parent.parent.parent.parent
                values = [x.get_text() for x in table.find_all('b')]
                d = self.format_values(type_, values)
                d['win_percentage'] = self.calc_win_percentage(d['wins'], d['losses'])
                new_key = vocab[game_type]
                data[new_key] = d

        return data

    @property
    def team_data(self):
        soup = self.tables.get('team')
        type_ = 'teams'
        teams = soup.find_all(text='Partner(s):')
        data = []

        for team in teams:
            table = team.parent.parent.parent.parent.parent.parent.parent.parent
            values = self.extract_values(table)
            d = self.format_values(type_, values)
            d['win_percentage'] = self.calc_win_percentage(d['wins'], d['losses'])
            data.append(d)

        return data

    @property
    def clan(self):
        soup = self.soup.table.get('info')
        if soup is None:
            return 
        
        clan_url = soup.find(href=re.compile('ClanTag='))
        if clan_url is not None:
            return clan_url.get_text()

    @staticmethod
    def format_values(type_, values):
        fields = ['wins', 'losses', 'partners', 'level', 'rank', 'experience']
        data = {}

        for field in fields:
            meta_data = data_positions[type_][field]
            if not meta_data:
                continue
            i = meta_data['position']
            v = values[i]
            if not v:
                continue
            formatter = meta_data['function']
            if formatter:
                value = formatter(v)
            else:
                value = v

            data[field] = value

        return data
    
    @staticmethod
    def calc_win_percentage(wins, losses):
        win_percentage = round((100 * int(wins)) / (int(wins) + int(losses)), 2)
        return win_percentage
    
    
    @staticmethod
    def extract_values(table):
        values = []
        values_old = table.find_all('b')

        for i, value in enumerate(values_old):
            if i == 3:
                partners = [x.get_text() for x in value]
                if len(partners) > 1:
                    partners.remove('')
                values.append(partners)

            else:
                values.append(value.get_text())

        return values 

In [5]:
profile = Profile('romantichuman', 'northrend')
profile.parse()

-- getting soup --


{'individual': {'random_team': {'experience': 2115,
   'level': 10,
   'losses': 18,
   'rank': None,
   'win_percentage': 64.0,
   'wins': 32},
  'solo': {'experience': 20427,
   'level': 45,
   'losses': 231,
   'rank': 8,
   'win_percentage': 88.2,
   'wins': 1727}},
 'info': {'additional_info': 'twitter.com/YoanMerlo',
  'clan': None,
  'home_page': 'twitch.tv/ToD',
  'last_ladder_game': '2018-11-15',
  'player': 'romantichuman',
  'server': 'northrend'},
 'main_race': 'human',
 'team': [{'level': 6,
   'losses': 2,
   'partners': ['Lado'],
   'rank': None,
   'win_percentage': 50.0,
   'wins': 2},
  {'level': 5,
   'losses': 1,
   'partners': ['123456789012345', 'LadoBlanco'],
   'rank': 355,
   'win_percentage': 50.0,
   'wins': 1},
  {'level': 11,
   'losses': 0,
   'partners': ['LUL'],
   'rank': None,
   'win_percentage': 100.0,
   'wins': 6}]}

In [None]:
parse_main_race(tables['info'])

In [None]:
parse_last_ladder_game(tables['info'])

In [None]:
additional_info = parse_additional_info(tables['info'])

In [None]:
parse_home_page(tables['info'])

In [None]:
parse_clan(tables['info'])