In [88]:
import os
import json
from google.cloud import bigquery
import pandas as pd

from history import HistoryPage
from profile import Profile

In [3]:
def create_dataset(dataset_ref, location='US'):
    dataset = bigquery.Dataset(dataset_ref)
    try:
        dataset = client.create_dataset(dataset)
        print('\n-- Dataset {} created --'.format(dataset_id))
    except:
        print('\n-- Dataset {} already exists --'.format(dataset_id))
        
def check_table_exists(table_ref):
    try:
        client.get_table(table_ref)
        return True
    except:
        return False


In [4]:
def get_schema_from_json(input_json, schema=[]):
    if not input_json:
        return schema

    cur = input_json.pop()
    name = cur['name']
    field_type = cur['type']
    mode = cur['mode']
    fields = [] if 'fields' not in cur else get_schema_from_json(cur['fields'], [])
    schema.append(bigquery.SchemaField(name=name, field_type=field_type, mode=mode, fields=fields))

    return get_schema_from_json(input_json, schema)

In [5]:
def divide_into_batches(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [6]:
bigquery_credpath = os.path.abspath('./BigQuery Reader Project-88493810ca62.json')
client = bigquery.Client.from_service_account_json(bigquery_credpath)
job_config = bigquery.LoadJobConfig()
job_config.skip_leading_rows = 1
job_config.autodetect = True

dataset_id = 'wc3'
dataset_ref = client.dataset(dataset_id)

In [7]:
create_dataset(dataset_ref, location='US')


-- Dataset wc3 already exists --


In [8]:
print('STARTING')
with open('./table_config.json', 'r') as f:
    configurations = json.loads(f.read())
dataset_id = 'wc3'
dataset_ref = client.dataset(dataset_id)
create_dataset(dataset_ref)

STARTING

-- Dataset wc3 already exists --


In [9]:
for config in configurations:
    config = configurations[0]
    table_name = config.get('name')
    fields = config.get('fields')
    print(table_name)
    schema = get_schema_from_json(fields, [])
    print(schema)

    table_ref = dataset_ref.table(table_name)
    table = bigquery.Table(table_ref, schema=schema)
    if check_table_exists(table_ref):
        print('\n-- Table {} already exists --\n\n'.format(table_ref))

    else:
        table = client.create_table(table)
        file_path = './data_backfill/{}.json'.format(table_name)
        with open(file_path, 'r') as f:
            data = json.loads(f.read())
            print(len(data))

        batches = divide_into_batches(data, 10000)
        for batch in batches:
            errors = client.insert_rows(table, batch)
            try:
                assert errors == []
            except Exception:
                print(Exception, errors[0])
        print('\n-- Table {} created --\n\n'.format(table_name))

history2
[SchemaField('winner', 'STRING', 'REPEATED', None, ()), SchemaField('team_two', 'STRING', 'REPEATED', None, ()), SchemaField('team_one', 'STRING', 'REPEATED', None, ()), SchemaField('map', 'STRING', 'NULLABLE', None, ()), SchemaField('game_id', 'INTEGER', 'NULLABLE', None, ()), SchemaField('game_type', 'STRING', 'NULLABLE', None, ()), SchemaField('game_length', 'INTEGER', 'NULLABLE', None, ()), SchemaField('date', 'DATETIME', 'NULLABLE', None, ())]

-- Table TableReference(DatasetReference('bigquery-reader-project', 'wc3'), 'history2') already exists --




In [None]:
# command = !record followgrubby romantichuman @northrend
# inputs = {player_one: followgrubby, player_two:romantichuman}

In [113]:
data_input = {
            'player_one': 'followgrubby',
            'player_two': 'Fall3n',
            'server': 'northrend'
        }

In [114]:
query = ('''SELECT * FROM  `bigquery-reader-project.wc3.{server}`,
              UNNEST(team_one) AS first,
              UNNEST(team_two) AS second
            WHERE 
                first IN ('{player_one}', '{player_two}') 
            AND 
                second in ('{player_one}', '{player_two}')
            ORDER BY date DESC
        '''.format(**data_input))

df = client.query(query).to_dataframe()
print(df.shape)
df.head(3)

(1, 10)


Unnamed: 0,winner,team_two,team_one,map,game_id,game_type,game_length,date,first,second
0,[followgrubby],[Fall3n],[followgrubby],Last Refuge-1,79432131,Solo,18,2018-11-01 23:00:00,followgrubby,Fall3n


### Case 1: No data for record

In [115]:
if df.empty:
    # Check that these players exist.
    try:
        history_page_player_one = HistoryPage(data_input.get('player_one'), data_input.get('server'))
        history_page_player_two = HistoryPage(data_input.get('player_two'), data_input.get('server'))
    except Exception as e:
        print(e)
        print('Invalid player')

### Case 2: Have data for record
Make sure it's up to date.

In [116]:
last_date = df.loc[0, 'date']
last_date

Timestamp('2018-11-01 23:00:00')

In [None]:
# Start scraping history until this datqe.
# Update database.
# Join old and new.
# Calculate record.

In [117]:
history_page = HistoryPage(data_input.get('player_one'), data_input.get('server'))

In [118]:
data = list(history_page.games())

In [122]:
df_new = pd.DataFrame(data)
df_new['date'] = pd.to_datetime(df_new['date'])
df_new[df_new['date'] > last_date]
df_new.head(3)

Unnamed: 0,date,game_id,game_length,game_type,map,team_one,team_two,winner
0,2018-12-01 19:25:00,79811389,5,Solo,Last Refuge,[followgrubby],[Kuoro-Eetu],[followgrubby]
1,2018-12-01 19:18:00,79811310,9,Solo,Northern Isles,[followgrubby],[Fall3n],[followgrubby]
2,2018-11-28 02:46:00,79780015,8,Solo,Concealed Hill,[followgrubby],[4222222222222],[followgrubby]


In [None]:
if not df_new.empty:
    # Upload to BQ
    # Combine
    pass

In [129]:
df_new['team_one']

0     [followgrubby]
1     [followgrubby]
2     [followgrubby]
3     [followgrubby]
4     [followgrubby]
5     [followgrubby]
6     [followgrubby]
7     [followgrubby]
8     [followgrubby]
9     [followgrubby]
10    [followgrubby]
11    [followgrubby]
12    [followgrubby]
13    [followgrubby]
14    [followgrubby]
15    [followgrubby]
16    [followgrubby]
17    [followgrubby]
18    [followgrubby]
Name: team_one, dtype: object

Unnamed: 0,date,game_id,game_length,game_type,map,team_one,team_two,winner


In [None]:
# 2 cases:
# No new games.
# New games. Loop through game history until we have everything.