In [152]:
import requests
import json
import pandas
import csv
from data.calculatedgg_api.api_interfacer import CalculatedApiInterfacer


class ReplayScraper:
    
    BASE_URL = 'https://calculated.gg/api/'


    def get_replay_meta(self, replay_id):
        response = requests.get(self.BASE_URL + 'replay/' + replay_id)
        return json.loads(response.text)


    def get_basic_player_stats(self, replay_id):
        response = requests.get(self.BASE_URL + 'replay/' + replay_id + '/basic_player_stats')
        return json.loads(response.text)

    def get_replay_data(self, replay_id):
        rp_meta = get_replay_meta(replay_id)
        player_stats = get_basic_player_stats(replay_id)
        rp_meta['player_stats'] = player_stats
        return self.format_data(rp_meta)

    @staticmethod
    def format_data(r):
        players = r['players']
        player_data = {}
        winner = winning_team(r['gameScore'])

        for player in players:
            player_data[player['name']] = {'name': player['name']}
            player_data[player['name']]['isOrange'] = player['isOrange']
            player_data[player['name']]['won'] = player_team(player['isOrange']) == winner
            player_data[player['name']]['game_id'] = r['id']
            player_data[player['name']]['game_type'] = r['gameMode']
        for stat in r['player_stats']:
            for player_stat in stat['chartDataPoints']:
                player_data[player_stat['name']][stat['title']] = player_stat['value']

        return [player_data[i] for i in player_data]

    @staticmethod
    def winning_team(game_score):
        if game_score['team0Score'] > game_score['team1Score']:
            return 'Blue'
        else:
            return 'Orange'
    
    @staticmethod
    def player_team(is_orange):
        if is_orange:
            return 'Orange'
        else:
            return 'Blue'


In [153]:
columns = [
    'name',
    'game_id',
    'game_type',
    'isOrange',
    'won',
    'average hit distance',
    'ball hit forward',
    'dribbles',
    'passes',
    'time close to ball',
    'time closest to ball',
    'time furthest from ball',
    'time behind ball',
    'time in front ball',
    'time high in air',
    'time low in air',
    'time on ground',
    'time in defending third',
    'time in neutral third',
    'time in attacking third',
    'time in defending half',
    'time in attacking half',
    'time in corner',
    'time most forward player',
    'time between players',
    'time most back player',
    'time near wall',
    'time in front of center of mass',
    'time behind center of mass',
    'speed',
    'possession time',
    'turnovers',
    'turnovers on my half',
    'turnovers on their half',
    'wasted collection',
    'num small boosts',
    'num large boosts',
    'boost ratio',
    'used boost efficiency',
    'turnover efficiency',
    'shot %',
    'useful/hits',
    'time at boost speed',
    'time at slow speed',
    'time at super sonic',
    'boost usage',
    'num stolen boosts',
    'time low boost',
    'time no boost',
    'collection boost efficiency',
    'total boost efficiency',
    'aerial efficiency',
    'aerials',
]

In [None]:
cai = CalculatedApiInterfacer()
replay_list = cai.get_all_replay_ids(limit=10)
rs = ReplayScraper()

with open('/Users/Ben/Desktop/test.csv', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=columns)
    writer.writeheader()
    for replay_id in replay_list:
        replay_data = rs.get_replay_data(replay_id)
        for player_data in replay_data:
            writer.writerow(player_data)
