# Preamble

In [2]:
import glob
import os
import pandas as pd
import numpy as np
import seaborn as sns
import wikitextparser as wtp
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm, Normalize
from datetime import datetime
from ast import literal_eval

# Lists

In [3]:
# API variables
api_url = 'https://yugipedia.com/api.php'
sets_query_url = '?action=ask&query=[[Category:Set%20Card%20Lists]]|limit%3D5000|order%3Dasc&format=json'
lists_query_url = '?action=query&prop=revisions&rvprop=content&format=json&titles='

rarity_dict = {'c': 'Common', 'r': 'Rare', 'sr': 'Super Rare', 'ur': 'Ultra Rare', 'utr': 'Ultimate Rare', 'n': 'Normal', 'nr': 'Normal Rare', 'sp': 'Short Print', 'ssp': 'Super Short Print', 'hfr': 'Holofoil Rare', 'scr': 'Secret Rare', 'uscr': 'Ultra Secret Rare', 'scur': 'Secret Ultra Rare', 'pscr': 'Prismatic Secret Rare', 'hgr': 'Holographic Rare', ' gr': 'Ghost Rare', 'pr': 'Parallel Rare', 'npr': 'Normal Parallel Rare', 'pc': 'Parallel Common', 'spr': 'Super Parallel Rare', 'upr': 'Ultra Parallel Rare', 'dnpr': 'Duel Terminal Normal Parallel Rare', 'dpc': 'Duel Terminal Parallel Common', 'drpr': 'Duel Terminal Rare Parallel Rare', 'dspr': 'Duel Terminal Super Parallel Rare', 'dupr': 'Duel Terminal Ultra Parallel Rare', 'DScPR': 'Duel Terminal Secret Parallel Rare', 'gur': 'Gold Rare', 'escr': 'Extra Secret Rare', 'ggr': 'Ghost/Gold Rare', 'shr': 'Shatterfoil Rare', 'cr': 'Collector\'s Rare', 'str': 'Starlight Rare', 'gr': 'Ghost Rare', 'gscr': 'Gold Secret Rare', 'sfr': 'Starfoil Rare'}
regions_dict = {'EN':'English','E':'English', 'PT': 'Portuguese', 'P': 'Portuguese', 'DE': 'German', 'G': 'German', 'FR': 'French', 'F': 'French', 'IT': 'Italian', 'I': 'Italian', 'SP': 'Spanish', 'S': 'Spanish', 'JP': 'Japanese', 'AE': 'Asian English', 'KR': 'Korean', 'K': 'Korean', 'TC': 'Traditional Chinese', 'SC': 'Simplified Chinese'}

In [4]:
test = pd.read_json('https://yugipedia.com/api.php?action=askargs&conditions=Legend%20of%20Blue%20Eyes%20White%20Dragon&printouts=German%20release%20date&format=json')
test

Unnamed: 0,query
meta,"{'hash': '7848540055229a5d6df85faadfa4b911', '..."
printrequests,"[{'label': '', 'key': '', 'redi': '', 'typeid'..."
results,{'Legend of Blue Eyes White Dragon': {'printou...
serializer,SMW\Serializers\QueryResultSerializer
version,2


In [5]:
test2 = pd.DataFrame(test['query']['results']).T
test3 = pd.DataFrame(test2['printouts'].values.tolist())
timestamp = test3['German release date'][0][0]['timestamp']
pd.Timestamp(int(timestamp), unit='s').ctime()

'Fri Apr 25 00:00:00 2003'

In [6]:
df = pd.read_json(f'{api_url}{sets_query_url}')
keys = list(df['query']['results'].keys())
print(len(keys))

3949


In [7]:
all_set_lists = pd.DataFrame(columns = ['Set','Card number','Name','Rarity','Print','Quantity'])

for i in range(40):
    first = i*100
    last = (i+1)*100
    titles = '|'.join(keys[first:last]).replace(' ','%20').encode('utf-8')
    df = pd.read_json(f'{api_url}{lists_query_url}{titles}')
    contents = df['query']['pages'].values()
    
    for content in contents:
        if 'revisions' in  content.keys():
            temp = content['revisions'][0]['*']
            parsed = wtp.parse(temp)
            
            for template in parsed.templates:
                if template.name == 'Set list':
                    title = content['title'].split('Lists:')[1]
                    set_df = pd.DataFrame(columns = ['Set', 'Card number', 'Name', 'Rarity', 'Print', 'Quantity'])


                    region = None
                    rarity = None
                    card_print = None
                    qty = None
                    desc = None
                    opt = None
                    list_df = None
                    
                    for argument in template.arguments:
                        if 'region=' in argument:
                            region = argument.string[argument.string.index('=')+1:]
                        elif 'rarities=' in argument:
                            rarity = tuple(rarity_dict.get(i.strip().lower(), i.strip()) for i in argument.string[argument.string.index('=')+1:].split(','))
                        elif 'print=' in argument:
                            card_print = argument.string[argument.string.index('=')+1:]
                        elif 'qty=' in argument:
                            qty = argument.string[argument.string.index('=')+1:]
                        elif 'description=' in argument:
                            desc = argument.string[argument.string.index('=')+1:]
                        elif 'options=' in argument:
                            opt = argument.string[argument.string.index('=')+1:]
                        else:
                            set_list = argument.string[2:-1]
                            lines = set_list.split('\n')

                            list_df = pd.DataFrame([x.split(';') for x in lines])
                            list_df = list_df[~list_df[0].str.contains('!:')]
                            list_df = list_df.applymap(lambda x: x.split('//')[0] if x is not None else x)
                            list_df = list_df.applymap(lambda x: x.strip() if x is not None else x)
                            list_df.replace(r'^\s*$', None, regex = True, inplace = True)

                    if opt != 'noabbr':
                        set_df['Card number'] = list_df[0]
                        set_df['Name'] = list_df[1]
                    else: 
                        set_df['Name'] = list_df[0]

                    if len(list_df.columns)>2: # and rare in str
                        set_df['Rarity'] = list_df[2].apply(lambda x: tuple([rarity_dict.get(y.strip().lower(), y.strip()) for y in x.split(',')]) if x is not None else rarity)
                    else:
                        set_df['Rarity'] = [rarity for _ in set_df.index]

                    if len(list_df.columns)>3 :
                        if card_print is not None: # and new/reprint in str
                            set_df['Print'] = list_df[3].apply(lambda x: x if x is not None else card_print)
                            if len(list_df.columns)>4 and qty is not None:
                                set_df['Quantity'] = list_df[4].apply(lambda x: x if x is not None else qty)
                        elif qty is not None:
                            set_df['Quantity'] = list_df[3].apply(lambda x: x if x is not None else qty)
                    
                    set_df['Set'] = title
                    all_set_lists = pd.concat([all_set_lists, set_df], ignore_index=True)
                    
        else:
            print('Error:', content['title'])

Error: B"Set Card Lists:The Lost Millennium: Special Edition (TCG-PT)
Error: Set Card Lists:2-Player Starter Deck: Yuya
Error: B"Set Card Lists:Yu-Gi-Oh! R Volume 5 promotional card (TCG-EN)
Error: Set Card Lists:Expert Edition Volume.3 (OCG-KR)
Error: Set Card Lists:Invasion: Vengeance: Special Edition (TCG-FR)
Error: B"Set Card Lists:Weekly Shonen Jump March 2014 membership promotional card (TCG-EN)
Error: Set Card Lists:Yu-Gi-Oh: Super Fusion! Bonds That Transcend Time Movie Pack (OCG-KR)
Error: Set Card Lists:Sneak Peek Participation Cards: Series 5 (TCG-IT)
Error: B"Set Card Lists:Onslaught of the Fire Kings Structure Deck (TCG-SP)
Error: Set Card Lists:Starter Deck: Joey (TCG-EN)
Error: B"Set Card Lists:Crimson Crisis: Special Edition (TCG-EN)
Error: Set Card Lists:Structure Deck: Warrior's Strike (OCG-KR)
Error: Set Card Lists:Yu-Gi-Oh! Duel Monsters VI: Expert 2 Game Guide 1 Promos (OCG-JP)
Error: Set Card Lists:Invasion: Vengeance: Special Edition (TCG-SP)
Error: Set Card List

In [8]:
all_set_lists

Unnamed: 0,Set,Card number,Name,Rarity,Print,Quantity
0,Absolute Powerforce (TCG-SP),ABPF-SP000,Gravekeeper's Priestess,"(Super Rare,)",,
1,Absolute Powerforce (TCG-SP),ABPF-SP001,Unicycular,"(Common,)",,
2,Absolute Powerforce (TCG-SP),ABPF-SP002,Bicular,"(Common,)",,
3,Absolute Powerforce (TCG-SP),ABPF-SP003,Tricular,"(Common,)",,
4,Absolute Powerforce (TCG-SP),ABPF-SP004,Drill Synchron,"(Rare,)",,
...,...,...,...,...,...,...
71056,Jump Festa Limited Pack 2020 (OCG-JP),JF20-JP001,Seleglare the Luminous Lunar Dragon,"(Super Rare,)",,
71057,Jump Festa Limited Pack 2020 (OCG-JP),JF20-JP002,Ret-time Reviver Emit-ter,"(Super Rare,)",,
71058,Jump Festa Limited Pack 2020 (OCG-JP),JF20-JP003,Rampaging Smashtank Rhynosaber,"(Super Rare,)",,
71059,Jump Festa Limited Pack 2020 (OCG-JP),JF20-JP004,Yaminabe Party,"(Super Rare,)",,


In [9]:
all_set_lists.groupby('Card number', dropna=False).nunique()

Unnamed: 0_level_0,Set,Name,Rarity,Print,Quantity
Card number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
15AX-JP000,1,1,1,1,0
15AX-JPM01,1,1,1,1,0
15AX-JPM02,1,1,1,1,0
15AX-JPM03,1,1,1,1,0
15AX-JPM04,1,1,1,1,0
...,...,...,...,...,...
ZTIN-FR021,1,1,1,0,0
ZTIN-FRV01,1,1,1,0,0
ZTIN-FRV02,1,1,1,0,0
ZTIN-FRV03,1,1,1,0,0


In [10]:
all_set_lists.groupby('Name', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Rarity,Print,Quantity
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"""A"" Cell Breeding Device",4,4,1,1,0
"""A"" Cell Incubator",5,5,1,1,0
"""A"" Cell Recombination Device",4,4,1,0,0
"""A"" Cell Scatter Burst",3,3,1,1,0
"""Infernoble Arms - Durendal""",6,6,1,1,0
...,...,...,...,...,...
Zubaba Knight,30,30,5,0,1
Zubababancho Gagagacoat,2,2,1,1,0
"Zure, Knight of Dark World",13,13,1,0,1
Zushin the Sleeping Giant,9,9,3,1,0


In [11]:
all_set_lists.explode('Rarity').groupby('Rarity').nunique()

Unnamed: 0_level_0,Set,Card number,Name,Print,Quantity
Rarity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10000 Secret Rare,7,7,1,1,0
20ScR,1,16,16,0,0
20th Secret Rare,12,84,83,2,0
20th Secret rare,1,2,2,0,0
Collector's Rare,4,60,15,2,0
Collectors Rare,2,20,20,2,0
Common,844,35711,6897,3,3
Duel Terminal Normal Parallel Rare,17,611,424,0,0
Duel Terminal Normal Rare Parallel Rare,1,1,1,0,0
Duel Terminal Rare Parallel Rare,14,161,104,0,0


In [12]:
all_set_lists.groupby('Print', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Name,Rarity,Quantity
Print,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
European & Oceanian debut,1,6,6,1,0
European debut,7,10,10,2,0
New,154,4579,1857,35,3
New art,2,4,2,1,0
New artwork,6,15,7,4,0
North American debut,1,2,2,1,0
Oceanian debut,1,1,1,1,0
Reprint,149,6279,2594,33,3
Reprint (New Art),1,1,1,1,0
Reprint (functional errata),13,19,14,7,0


In [13]:
all_set_lists.groupby('Quantity', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Name,Rarity,Print
Quantity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.0,115,4292,1050,7,2
2.0,115,410,123,3,2
3.0,16,26,10,1,2
,1666,65568,10580,112,10


In [14]:
all_set_lists[all_set_lists['Card number']=='!: header::Kaiba Deck']

Unnamed: 0,Set,Card number,Name,Rarity,Print,Quantity
