In [33]:
import requests
import importlib
import blizzard_api
import mysql
import mplusdb
import blizzard_credentials
import pandas as pd
import utils
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import datetime

In [112]:
importlib.reload(mplusdb)
mdb = mplusdb.MplusDatabase('.db_config')
realms = mdb.get_utility_table('realm')
dungeons = mdb.get_utility_table('dungeon')
specs = mdb.get_utility_table('spec')

In [113]:
realm_clusters = realms[['cluster_id', 'region']].drop_duplicates()

In [114]:
auth = blizzard_credentials.Credentials('.api_tokens')
access_token = auth.access_token

In [200]:
# this is a bit anti-pattern, but the fastest way

# generate url calls for every valid combination of
# region/realm_cluster/dungeon/period

# S4 starts with period 734
region_encoder = {1:'us', 2:'kr', 3:'eu', 4:'tw'}

all_urls = []
urls_for_period_region_dungeon = {}
for _, row in dungeons.iterrows():
    dungeon_id = row[0]
    if dungeon_id < 244:
        continue;
    period_start = 734 #row[2]
    period_end = 763
    for _, realm in realm_clusters.iterrows():
        cluster_id = realm[0]
        region = region_encoder[realm[1]]
        url_factory = blizzard_api.UrlFactory(
            access_token = access_token, region=region)
        for period in range(period_start, period_end+1):
            url = url_factory.get_mythic_plus_leaderboard_url(
                dungeon_id = dungeon_id, realm_id = cluster_id,
                period = period)
            all_urls.append(url)
            #assign url to a sub-list by region-period
            key = (region, period, dungeon_id)
            if key in urls_for_period_region_dungeon:
                urls_for_period_region_dungeon[key].append(url)
            else:
                urls_for_period_region_dungeon[key] = []
                urls_for_period_region_dungeon[key].append(url)

In [202]:
len(urls_for_period_region_dungeon)

1440

In [119]:
# check math by hand
calls_per_realm = (10 * (764-662)) + (33 * 2)
print('calls per realm', calls_per_realm)
print('total calls', calls_per_realm * len(realm_clusters))

calls per realm 1086
total calls 275844


In [120]:
print('total time (hrs):', len(all_urls) * 0.25 / 3600)
print('total space raw json (Gbs):', 1/1024 * 1.2 * len(all_urls))
print('total space python list (Gbs):', 1/1024 * 0.02 * len(all_urls))

total time (hrs): 6.35
total space raw json (Gbs): 107.15625
total space python list (Gbs): 1.7859375


#### How are we going to do this?

So we got 284,000 url calls we need to make. Each call takes on ~0.25 seconds on average (if we use 10 threads), and generates 1.2Mb of data.

```
Total data size = 273,304 * 1.2Mb = 320 Gb
```

The capacity of my DB is just 20Gb. Oh-oh. I don't even have the storage for this.

#### Is it really 1.2Mb per call? I don't think so:

The raw json is 1.2Mb. Once we extract the data, the list is only 20kb. So total is:

```
Total data size = 273,304 * 0.02Mb = 5 Gb
```

I have plenty of space for this. Yay. Let's proceed.

#### This is how we are going to proceed:
* Break up the API calls into segments based on region and time period. Each segment is a time period within a region, and there are 404 total segments.
* Query each segment, one at a time.
* Aggregate data for each segment, and push to DB
* Keep track of which segment is done using some form of logging

In [121]:
import datetime


class MyLogger():
    """wrapper for a simple logger"""
    __fp = 'logs/mdb_segments.log'
    def __init__(self):
        pass
    
    def log(self, segment_id):
        ts = time.time()
        ts = datetime.datetime.fromtimestamp(ts).strftime('%c')
        with open(self.__fp, 'a') as file:
            #file.write('%s\t%s\t%s\t%s\n' % (ts, segment_id))
            file.write('%s\t%s\n' % (ts, segment_id))
            
    def get_logged_keys(self):
        keys = []
        with open(self.__fp, 'r') as file:
            for line in file:
                key_token = line.split()[-1]
                key = key_token.split('_')
                keys.append((key[0], int(key[1]), int(key[2])))
        return keys

In [197]:
logger = MyLogger()
logged_keys = logger.get_logged_keys()
print(len(logged_keys))

34


In [None]:
importlib.reload(blizzard_api)
importlib.reload(mplusdb)

def api_call(url):
    time.sleep(0.1)
    response = requests.get(url)
    return response
    

def multi_threaded_call(urls):
    """Sends multiple calls to the API at once."""
    threads = []
    with ThreadPoolExecutor(max_workers = 10) as executor:
        for url in urls:
            threads.append(executor.submit(api_call, url))
    agg_result = []
    for task in as_completed(threads):
        agg_result.append(task.result())
    return agg_result


def agg_leaderboards(responses):
    """Joins output of several leaderboards into single list."""
    parser = blizzard_api.ResponseParser()
    
    runs = []
    comps = []
    rosters = []
            
    for resp in responses:
        try:
            leaderboard = parser.parse_keyrun_leaderboard_json(resp.json())
            runs.extend(leaderboard.get_runs_as_tuple_list())
            rosters.extend(leaderboard.get_rosters_as_tuple_list())
            comps.extend(leaderboard.get_run_comps_as_vector_list())
            #runs_comps.extend()
        except:
            print('json parsing error')
    
    
    return runs, comps, rosters

mdb = mplusdb.MplusDatabase('.db_config')

segment = []
t0 = time.time()
i = 0

logger = MyLogger()
logged_keys = logger.get_logged_keys() # segments already done

for key, urls in urls_for_period_region_dungeon.items():
    if key in logged_keys:
        i += 1
        print(key, i)
        continue
    print(key)
    print(len(urls))
    print(datetime.datetime.now())
    t00 = time.time()
    responses = multi_threaded_call(urls)
    t1 = time.time()
    print('api calls', t1 - t00)
    runs, comps, rosters = agg_leaderboards(responses)
    t2 = time.time()
    print('parsing jsons', t2 - t1)
    runs = list(set(runs))
    rosters = list(set(rosters))
    comps = list(set(comps))
    t3 = time.time()
    print('set(data)', t3 - t2)
    
    
    mdb.insert(table = 'run', data = runs)
    t4 = time.time()
    print('inserting runs', t4 - t3)
    
    mdb.insert(table = 'roster', data = rosters)
    t5 = time.time()
    print('inserting rosters', t5 - t4)
    
    mdb.insert(table = 'run_composition', data = comps)
    t5 = time.time()
    print('inserting comps', t5 - t4)
    
    print(len(runs))
    print(len(comps))
    print(len(rosters))
    i += 1
    logger.log('%s_%s_%s' % key)
    print('-------------------')
    if i % 10 == 0:
        time.sleep(20)
    if i == 300:
        break
print('total', time.time() - t0)

('us', 734, 244) 1
('us', 735, 244) 2
('us', 736, 244) 3
('us', 737, 244) 4
('us', 738, 244) 5
('us', 739, 244) 6
('us', 740, 244) 7
('us', 741, 244) 8
('us', 742, 244) 9
('us', 743, 244) 10
('us', 744, 244) 11
('us', 745, 244) 12
('us', 746, 244) 13
('us', 747, 244) 14
('us', 748, 244) 15
('us', 749, 244) 16
('us', 750, 244) 17
('us', 751, 244) 18
('us', 752, 244) 19
('us', 753, 244) 20
('us', 754, 244) 21
('us', 755, 244) 22
('us', 756, 244) 23
('us', 757, 244) 24
('us', 758, 244) 25
('us', 759, 244) 26
('us', 760, 244) 27
('us', 761, 244) 28
('us', 762, 244) 29
('us', 763, 244) 30
('kr', 734, 244) 31
('kr', 735, 244) 32
('kr', 736, 244) 33
('kr', 737, 244) 34
('kr', 738, 244) 35
('kr', 739, 244) 36
('kr', 740, 244) 37
('kr', 741, 244) 38
('kr', 742, 244) 39
('kr', 743, 244) 40
('kr', 744, 244) 41
('kr', 745, 244) 42
('kr', 746, 244) 43
('kr', 747, 244) 44
('kr', 748, 244) 45
('kr', 749, 244) 46
('kr', 750, 244) 47
('kr', 751, 244) 48
('kr', 752, 244) 49
('kr', 753, 244) 50
('kr', 75

In [191]:
19006 + 18821

37827

In [195]:
144 * 1025 / 3600

41.0

In [189]:
for i in runs:
    if i[0] == 5797755210158851746:
        print(i)

(5797755210158851746, 244, 11, 734, 1579775521000, 2541334, 1, 1)
(5797755210158851746, 244, 11, 734, 1579775521000, 2541334, 0, 1)


In [188]:
for i in runs:
    c = 0
    for j in runs:
        if i[0] == j[0]:
            c += 1
            if c > 1:
                print(i[0])

5797755210158851746
5797755210158851746


In [170]:
importlib.reload(utils)
importlib.reload(blizzard_api)
all_urls[0]

test_rq = requests.get(all_urls[0])
test_klb = blizzard_api.KeyRunLeaderboard(test_rq.json())

In [171]:
test_klb.keyruns[0].get_composition_vector()
t0 = time.time()
comps = test_klb.get_run_comps_as_vector_list()
print(time.time()-t0)
print(comps)

0.003947734832763672
[(5801552110138173541, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (5799271910074983455, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (5797755310074983455, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (5800754410138173541, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (5800847510136831535, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (5801142210119588359, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (5802001510137488182, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (5801841210139658014, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [169]:
tuple([1,2])

(1, 2)

In [173]:
print(len(set(comps)))

500


In [79]:
#import pickle
#pickle.dump(leaderboard.get_runs_as_tuple_list(), open('test_objs.pkl', 'wb'))
#print(leaderboard.keyruns)

[<blizzard_api.KeyRun object at 0x7f4bc377a128>, <blizzard_api.KeyRun object at 0x7f4bc377a160>, <blizzard_api.KeyRun object at 0x7f4bc377a2b0>, <blizzard_api.KeyRun object at 0x7f4bc377a400>, <blizzard_api.KeyRun object at 0x7f4bc377a550>, <blizzard_api.KeyRun object at 0x7f4bc377a6a0>, <blizzard_api.KeyRun object at 0x7f4bc377a7f0>, <blizzard_api.KeyRun object at 0x7f4bc377a940>, <blizzard_api.KeyRun object at 0x7f4bc377aa90>, <blizzard_api.KeyRun object at 0x7f4bc377abe0>, <blizzard_api.KeyRun object at 0x7f4bc377ad30>, <blizzard_api.KeyRun object at 0x7f4bc377ae80>, <blizzard_api.KeyRun object at 0x7f4bc377afd0>, <blizzard_api.KeyRun object at 0x7f4bc377d160>, <blizzard_api.KeyRun object at 0x7f4bc377d2b0>, <blizzard_api.KeyRun object at 0x7f4bc377d400>, <blizzard_api.KeyRun object at 0x7f4bc377d550>, <blizzard_api.KeyRun object at 0x7f4bc377d6a0>, <blizzard_api.KeyRun object at 0x7f4bc377d7f0>, <blizzard_api.KeyRun object at 0x7f4bc377d940>, <blizzard_api.KeyRun object at 0x7f4bc3