In [28]:
import requests
import importlib
import blizzard_api
import mysql
import mplusdb
import blizzard_credentials
import pandas as pd
import utils
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import datetime

In [29]:
mdb = mplusdb.MplusDatabase('.db_config')
realms = mdb.get_utility_table('realm')
dungeons = mdb.get_utility_table('dungeon')

In [30]:
realm_clusters = realms[['cluster_id', 'region']].drop_duplicates()

In [31]:
auth = blizzard_credentials.Credentials('.api_tokens')
access_token = auth.access_token

In [32]:
access_token

'USfWkW3lLncyVoNb3hb3Q4MRiWW11cokjr'

In [6]:
# this is a bit anti-pattern, but the fastest way

# generate url calls for every valid combination of
# region/realm_cluster/dungeon/period

# S4 starts with period 734
region_encoder = {1:'us', 2:'kr', 3:'eu', 4:'tw'}

all_urls = []
urls_for_period_region_dungeon = {}
for _, row in dungeons.iterrows():
    dungeon_id = row[0]
    if dungeon_id < 244:
        continue;
    period_start = 734 #row[2]
    period_end = 762
    for _, realm in realm_clusters.iterrows():
        cluster_id = realm[0]
        region = region_encoder[realm[1]]
        url_factory = blizzard_api.UrlFactory(
            access_token = access_token, region=region)
        for period in range(period_start, period_end+1):
            url = url_factory.get_mythic_plus_leaderboard_url(
                dungeon_id = dungeon_id, realm_id = cluster_id,
                period = period)
            all_urls.append(url)
            #assign url to a sub-list by region-period
            key = (region, period, dungeon_id)
            if key in urls_for_period_region_dungeon:
                urls_for_period_region_dungeon[key].append(url)
            else:
                urls_for_period_region_dungeon[key] = []
                urls_for_period_region_dungeon[key].append(url)

In [7]:
len(urls_for_period_region_dungeon)

1392

In [9]:
# check math by hand
calls_per_realm = (10 * (763-662)) + (33 * 2)
print('calls per realm', calls_per_realm)
print('total calls', calls_per_realm * len(realm_clusters))

calls per realm 1076
total calls 273304


In [10]:
print('total time (hrs):', len(all_urls) * 0.25 / 3600)
print('total space raw json (Gbs):', 1/1024 * 1.2 * len(all_urls))
print('total space python list (Gbs):', 1/1024 * 0.02 * len(all_urls))

total time (hrs): 6.138333333333334
total space raw json (Gbs): 103.584375
total space python list (Gbs): 1.7264062500000001


#### How are we going to do this?

So we got 284,000 url calls we need to make. Each call takes on ~0.25 seconds on average (if we use 10 threads), and generates 1.2Mb of data.

```
Total data size = 273,304 * 1.2Mb = 320 Gb
```

The capacity of my DB is just 20Gb. Oh-oh. I don't even have the storage for this.

#### Is it really 1.2Mb per call? I don't think so:

The raw json is 1.2Mb. Once we extract the data, the list is only 20kb. So total is:

```
Total data size = 273,304 * 0.02Mb = 5 Gb
```

I have plenty of space for this. Yay. Let's proceed.

#### This is how we are going to proceed:
* Break up the API calls into segments based on region and time period. Each segment is a time period within a region, and there are 404 total segments.
* Query each segment, one at a time.
* Aggregate data for each segment, and push to DB
* Keep track of which segment is done using some form of logging

In [11]:
import datetime


class MyLogger():
    """wrapper for a simple logger"""
    __fp = 'logs/mdb_segments.log'
    def __init__(self):
        pass
    
    def log(self, segment_id):
        ts = time.time()
        ts = datetime.datetime.fromtimestamp(ts).strftime('%c')
        with open(self.__fp, 'a') as file:
            #file.write('%s\t%s\t%s\t%s\n' % (ts, segment_id))
            file.write('%s\t%s\n' % (ts, segment_id))
            
    def get_logged_keys(self):
        keys = []
        with open(self.__fp, 'r') as file:
            for line in file:
                key_token = line.split()[-1]
                key = key_token.split('_')
                keys.append((key[0], int(key[1]), int(key[2])))
        return keys

In [13]:
logger = MyLogger()
logged_keys = logger.get_logged_keys()
print(len(logged_keys))

0


In [56]:
importlib.reload(blizzard_api)
importlib.reload(mplusdb)

def api_call(url):
    time.sleep(0.1)
    response = requests.get(url)
    return response
    

def multi_threaded_call(urls):
    """Sends multiple calls to the API at once."""
    threads = []
    with ThreadPoolExecutor(max_workers = 10) as executor:
        for url in urls:
            threads.append(executor.submit(api_call, url))
    agg_result = []
    for task in as_completed(threads):
        agg_result.append(task.result())
    return agg_result


def agg_leaderboards(responses):
    """Joins output of several leaderboards into single list."""
    agg = []
    parser = blizzard_api.ResponseParser()
    for resp in responses:
        try:
            leaderboard = parser.parse_keyrun_leaderboard_json(resp.json())
            agg.extend(leaderboard.get_runs_as_tuple_list())
        except:
            print('json parsing error')
    return agg

mdb = mplusdb.MplusDatabase('.db_config')

segment = []
t0 = time.time()
i = 0

logger = MyLogger()
logged_keys = logger.get_logged_keys() # segments already done

for key, urls in urls_for_period_region_dungeon.items():
    if key in logged_keys:
        i += 1
        print(key, i)
        continue
    print(key)
    print(len(urls))
    print(datetime.datetime.now())
    responses = multi_threaded_call(urls)
    t1 = time.time()
    print('api calls', t1 - t0)
    segment = agg_leaderboards(responses)
    t2 = time.time()
    print('parsing jsons', t2 - t1)
    segment = list(set(segment))
    t3 = time.time()
    print('set(data)', t3 - t2)
    mdb.insert(table = 'run', data = segment)
    t4 = time.time()
    print('inserting', t4 - t3)
    print(len(segment))
    i += 1
    logger.log('%s_%s_%s' % key)
    print('-------------------')
    if i % 10 == 0:
        time.sleep(20)
    if i == 1000:
        break
print('total', time.time() - t0)

('us', 662, 244) 1
('us', 663, 244) 2
('us', 664, 244) 3
('us', 665, 244) 4
('us', 666, 244) 5
('us', 667, 244) 6
('us', 668, 244) 7
('us', 669, 244) 8
('us', 670, 244) 9
('us', 671, 244) 10
('us', 672, 244) 11
('us', 673, 244) 12
('us', 674, 244) 13
('us', 675, 244) 14
('us', 676, 244) 15
('us', 677, 244) 16
('us', 678, 244) 17
('us', 679, 244) 18
('us', 680, 244) 19
('us', 681, 244) 20
('us', 682, 244) 21
('us', 683, 244) 22
('us', 684, 244) 23
('us', 685, 244) 24
('us', 686, 244) 25
('us', 687, 244) 26
('us', 688, 244) 27
('us', 689, 244) 28
('us', 690, 244) 29
('us', 691, 244) 30
('us', 692, 244) 31
('us', 693, 244) 32
('us', 694, 244) 33
('us', 695, 244) 34
('us', 696, 244) 35
('us', 697, 244) 36
('us', 698, 244) 37
('us', 699, 244) 38
('us', 700, 244) 39
('us', 701, 244) 40
('us', 702, 244) 41
('us', 703, 244) 42
('us', 704, 244) 43
('us', 705, 244) 44
('us', 706, 244) 45
('us', 707, 244) 46
('us', 708, 244) 47
('us', 709, 244) 48
('us', 710, 244) 49
('us', 711, 244) 50
('us', 71

('kr', 706, 245) 550
('kr', 707, 245) 551
('kr', 708, 245) 552
('kr', 709, 245) 553
('kr', 710, 245) 554
('kr', 711, 245) 555
('kr', 712, 245) 556
('kr', 713, 245) 557
('kr', 714, 245) 558
('kr', 715, 245) 559
('kr', 716, 245) 560
('kr', 717, 245) 561
('kr', 718, 245) 562
('kr', 719, 245) 563
('kr', 720, 245) 564
('kr', 721, 245) 565
('kr', 722, 245) 566
('kr', 723, 245) 567
('kr', 724, 245) 568
('kr', 725, 245) 569
('kr', 726, 245) 570
('kr', 727, 245) 571
('kr', 728, 245) 572
('kr', 729, 245) 573
('kr', 730, 245) 574
('kr', 731, 245) 575
('kr', 732, 245) 576
('kr', 733, 245) 577
('kr', 734, 245) 578
('kr', 735, 245) 579
('kr', 736, 245) 580
('kr', 737, 245) 581
('kr', 738, 245) 582
('kr', 739, 245) 583
('kr', 740, 245) 584
('kr', 741, 245) 585
('kr', 742, 245) 586
('kr', 743, 245) 587
('kr', 744, 245) 588
('kr', 745, 245) 589
('kr', 746, 245) 590
('kr', 747, 245) 591
('kr', 748, 245) 592
('kr', 749, 245) 593
('kr', 750, 245) 594
('kr', 751, 245) 595
('kr', 752, 245) 596
('kr', 753, 2

Exception: Problem with inserting data into MDB.

In [37]:
13238/500/115

0.23022608695652172

In [None]:
100
api calls 19.148154258728027
paring jsons 4.854432821273804
set(data) 0.01677107810974121
inserting 18.750869750976562

In [24]:
4000 * 408/10 * 1/3600.0

45.333333333333336

In [159]:
len(set(segment))

32918

In [14]:
segment

[(1536511719192118214, 244, 1, 0, 5, 1536511719000, 662, 1271153),
 (15363555721139526075, 244, 1, 0, 6, 1536355572000, 662, 1428567),
 (15362544651147066244, 244, 1, 0, 5, 1536254465000, 662, 2030500),
 (15364850981153360016, 244, 1, 1, 2, 1536485098000, 662, 1157806),
 (15364379711153161970, 244, 1, 0, 2, 1536437971000, 662, 1584483),
 (15360990911130511730, 244, 1, 0, 5, 1536099091000, 662, 1777141),
 (15362911661132392937, 244, 1, 1, 4, 1536291166000, 662, 1841771),
 (1536079102195991238, 244, 1, 1, 2, 1536079102000, 662, 1344289),
 (15364342591118518649, 244, 1, 1, 5, 1536434259000, 662, 1433933),
 (1536126595197077729, 244, 1, 1, 2, 1536126595000, 662, 1058287),
 (15365442571114327381, 244, 1, 1, 4, 1536544257000, 662, 1461856),
 (15364838621171264163, 244, 1, 1, 4, 1536483862000, 662, 1783496),
 (15362584861103233254, 244, 1, 0, 3, 1536258486000, 662, 1387741),
 (15364616341133334803, 244, 1, 0, 2, 1536461634000, 662, 1681345),
 (15366274101162539687, 244, 1, 1, 5, 1536627410000

In [71]:
t0 = time.time()
response = requests.get(leaderboard_urls[1])
t1 = time.time()
parser = blizzard_api.ResponseParser()
leaderboard = parser.parse_keyrun_leaderboard_json(response.json())

print(t1 - t0)
print(time.time() - t1)

1.0572872161865234
0.04085087776184082


In [79]:
import pickle
pickle.dump(leaderboard.get_runs_as_tuple_list(), open('test_objs.pkl', 'wb'))
print(leaderboard.keyruns)

[<blizzard_api.KeyRun object at 0x7f4bc377a128>, <blizzard_api.KeyRun object at 0x7f4bc377a160>, <blizzard_api.KeyRun object at 0x7f4bc377a2b0>, <blizzard_api.KeyRun object at 0x7f4bc377a400>, <blizzard_api.KeyRun object at 0x7f4bc377a550>, <blizzard_api.KeyRun object at 0x7f4bc377a6a0>, <blizzard_api.KeyRun object at 0x7f4bc377a7f0>, <blizzard_api.KeyRun object at 0x7f4bc377a940>, <blizzard_api.KeyRun object at 0x7f4bc377aa90>, <blizzard_api.KeyRun object at 0x7f4bc377abe0>, <blizzard_api.KeyRun object at 0x7f4bc377ad30>, <blizzard_api.KeyRun object at 0x7f4bc377ae80>, <blizzard_api.KeyRun object at 0x7f4bc377afd0>, <blizzard_api.KeyRun object at 0x7f4bc377d160>, <blizzard_api.KeyRun object at 0x7f4bc377d2b0>, <blizzard_api.KeyRun object at 0x7f4bc377d400>, <blizzard_api.KeyRun object at 0x7f4bc377d550>, <blizzard_api.KeyRun object at 0x7f4bc377d6a0>, <blizzard_api.KeyRun object at 0x7f4bc377d7f0>, <blizzard_api.KeyRun object at 0x7f4bc377d940>, <blizzard_api.KeyRun object at 0x7f4bc3

In [61]:
response.json()

{'_links': {'self': {'href': 'https://us.api.blizzard.com/data/wow/connected-realm/3694/mythic-leaderboard/244/period/663?namespace=dynamic-us'}},
 'map': {'name': "Atal'Dazar", 'id': 1763},
 'period': 663,
 'period_start_timestamp': 1536678000000,
 'period_end_timestamp': 1537282799000,
 'connected_realm': {'href': 'https://us.api.blizzard.com/data/wow/connected-realm/3694?namespace=dynamic-us'},
 'leading_groups': [{'ranking': 1,
   'duration': 2352729,
   'completed_timestamp': 1537235666000,
   'keystone_level': 12,
   'members': [{'profile': {'name': 'Leathi',
      'id': 142614719,
      'realm': {'key': {'href': 'https://us.api.blizzard.com/data/wow/realm/1?namespace=dynamic-us'},
       'id': 1,
       'slug': 'lightbringer'}},
     'faction': {'type': 'ALLIANCE'},
     'specialization': {'key': {'href': 'https://us.api.blizzard.com/data/wow/playable-specialization/102?namespace=static-8.0.1_27026-us'},
      'id': 102}},
    {'profile': {'name': 'Orvilled',
      'id': 1416884