# Use API to get board game features

In [40]:
import csv
import requests
import xml.etree.ElementTree as ET
from tqdm import tqdm
import time

def get_id_list(page_num):
    ids = []
    glinks = []

    with open('games/page_{}'.format(page_num, 'r')) as f:
        reader = csv.reader(f)
        for row in reader:
            ids.append(row[0])
            glinks.append(row[2])
            
    api_id_list = ','.join(ids)  
    return api_id_list, ids, glinks

def get_game_tree(api_id_list):
    api_q = 'http://www.boardgamegeek.com/xmlapi/boardgame/{}?stats=1'.format(api_id_list)
    r = requests.get(api_q)
    tree = ET.fromstring(r.text)

    return tree

class game_info():

    def __init__(self, gid, glink, tree):

        root = tree.find("boardgame[@objectid='{}']".format(gid))
        
        self.gid = gid
        
        self.glink = glink

        try:
            self.gname = root.find("name[@primary='true']").text
        except:
            self.gname = ''

        sub_name = []
        sub_id = []
        for item in root.findall('boardgamesubdomain'):
            sub_name.append(item.text)
            sub_id.append(item.attrib['objectid'])
        self.subdomain = list(zip(sub_id, sub_name))

        fam_name = []
        fam_id = []
        for item in root.findall('boardgamefamily'):
            fam_name.append(item.text)
            fam_id.append(item.attrib['objectid'])
        self.bgfamily = list(zip(fam_id, fam_name))

        mec_name = []
        mec_id = []
        for item in root.findall('boardgamemechanic'):
            mec_name.append(item.text)
            mec_id.append(item.attrib['objectid'])
        self.mechanic = list(zip(mec_id, mec_name))

        cat_name = []
        cat_id = []
        for item in root.findall('boardgamecategory'):
            cat_name.append(item.text)
            cat_id.append(item.attrib['objectid'])
        self.category = list(zip(cat_id, cat_name))

        try:
            self.yearpublished = int(root.find('yearpublished').text)
        except:
            self.yearpublished = ''

        try:
            self.minplaytime = int(root.find('minplaytime').text)
        except:
            self.minplaytime = ''

        try:
            self.maxplaytime = int(root.find('maxplaytime').text)
        except:
            self.maxplaytime = ''

        try:
            self.age = int(root.find('age').text)
        except:
            self.age = ''

        try:
            self.minplayers = int(root.find('minplayers').text)
        except:
            self.minplayers = '' 
        
        try:
            self.maxplayers = int(root.find('maxplayers').text)
        except:
            self.maxplayers = '' 

        try:
            self.avgrating = float(root.find("statistics/ratings/average").text)
        except:
            self.avgrating = ''

        try:
            self.bayesrating = float(root.find("statistics/ratings/bayesaverage").text)
        except:
            self.bayesrating = ''

        try:
            self.complexity = float(root.find("statistics/ratings/averageweight").text)
        except:
            self.complexity = ''

        try:
            self.numratings = int(root.find("statistics/ratings/usersrated").text)
        except:
            self.numratings = ''

        try:
            self.ratingstd = float(root.find("statistics/ratings/stddev").text)
        except:
            self.ratingstd = ''

def write_bg_csv(game_info):
    bg_line = ','.join([game_info.gid,
                    '"{}"'.format(game_info.gname),
                    game_info.glink, 
                    str(game_info.yearpublished),
                    str(game_info.minplaytime),
                    str(game_info.maxplaytime),
                    str(game_info.age),
                    str(game_info.minplayers),
                    str(game_info.maxplayers),
                    str(game_info.avgrating),
                    str(game_info.bayesrating),
                    str(game_info.complexity),
                    str(game_info.numratings),
                    str(game_info.ratingstd),
                         ])
    with open('csv/bg_info', 'a') as f:
        f.write(bg_line + '\n')

In [14]:
def check_feat_dict(game_info, feat_dict=feat_dict):

    for item in game_info.subdomain:
        if item not in feat_dict:
            feat_dict[item[0]] = [item[0], item[1], 'sub']

    for item in game_info.bgfamily:
        if item not in feat_dict:
            feat_dict[item[0]] = [item[0], item[1], 'fam']

    for item in game_info.mechanic:
        if item not in feat_dict:
            feat_dict[item[0]] = [item[0], item[1], 'mec']

    for item in game_info.category:
        if item not in feat_dict:
            feat_dict[item[0]] = [item[0], item[1], 'cat']

    return feat_dict

In [15]:
def write_feat_csv(game_info):
    with open('csv/feats', 'a') as f:
        for item in game_info.subdomain:
            f.write('{},{}\n'.format(game_info.gid, item[0]))

        for item in game_info.bgfamily:
            f.write('{},{}\n'.format(game_info.gid, item[0]))

        for item in game_info.mechanic:
            f.write('{},{}\n'.format(game_info.gid, item[0]))

        for item in game_info.category:
            f.write('{},{}\n'.format(game_info.gid, item[0]))

In [26]:
feat_dict = {}
for page in tqdm(range(1, 101)):
    api_id_list, ids, glinks = get_id_list(page)
    tree = get_game_tree(api_id_list)
    for i, gid in enumerate(ids):
        gi = game_info(gid, glinks[i], tree)
        write_bg_csv(gi)
        feat_dict = check_feat_dict(gi)
        write_feat_csv(gi)
    time.sleep(1)

100%|██████████| 100/100 [20:58<00:00, 12.59s/it]
