# Preamble

In [1]:
import glob
import os
import pandas as pd
import numpy as np
import seaborn as sns
import wikitextparser as wtp
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm, Normalize
from datetime import datetime
from ast import literal_eval

# Lists

In [2]:
# API variables
api_url = 'https://yugipedia.com/api.php'
sets_query_url = '?action=ask&query=[[Category:Set%20Card%20Lists]]|limit%3D5000|order%3Dasc&format=json'
lists_query_url = '?action=query&prop=revisions&rvprop=content&format=json&titles='

rarity_dict = {'c': 'Common', 'r': 'Rare', 'sr': 'Super Rare', 'ur': 'Ultra Rare', 'utr': 'Ultimate Rare', 'n': 'Normal', 'nr': 'Normal Rare', 'sp': 'Short Print', 'ssp': 'Super Short Print', 'hfr': 'Holofoil Rare', 'scr': 'Secret Rare', 'uscr': 'Ultra Secret Rare', 'scur': 'Secret Ultra Rare', 'pscr': 'Prismatic Secret Rare', 'hgr': 'Holographic Rare', ' gr': 'Ghost Rare', 'pr': 'Parallel Rare', 'npr': 'Normal Parallel Rare', 'pc': 'Parallel Common', 'spr': 'Super Parallel Rare', 'upr': 'Ultra Parallel Rare', 'dnpr': 'Duel Terminal Normal Parallel Rare', 'dpc': 'Duel Terminal Parallel Common', 'drpr': 'Duel Terminal Rare Parallel Rare', 'dspr': 'Duel Terminal Super Parallel Rare', 'dupr': 'Duel Terminal Ultra Parallel Rare', 'DScPR': 'Duel Terminal Secret Parallel Rare', 'gur': 'Gold Rare', 'escr': 'Extra Secret Rare', 'ggr': 'Ghost/Gold Rare', 'shr': 'Shatterfoil Rare', 'cr': 'Collector\'s Rare', 'str': 'Starlight Rare', 'gr': 'Ghost Rare', 'gscr': 'Gold Secret Rare', 'sfr': 'Starfoil Rare'}
regions_dict = {'EN':'English','E':'English', 'PT': 'Portuguese', 'P': 'Portuguese', 'DE': 'German', 'G': 'German', 'FR': 'French', 'F': 'French', 'IT': 'Italian', 'I': 'Italian', 'SP': 'Spanish', 'S': 'Spanish', 'JP': 'Japanese', 'AE': 'Asian English', 'KR': 'Korean', 'K': 'Korean', 'TC': 'Traditional Chinese', 'SC': 'Simplified Chinese'}

In [3]:
test = pd.read_json('https://yugipedia.com/api.php?action=askargs&conditions=Legend%20of%20Blue%20Eyes%20White%20Dragon&printouts=German%20release%20date&format=json')
test

Unnamed: 0,query
meta,"{'hash': '7848540055229a5d6df85faadfa4b911', '..."
printrequests,"[{'label': '', 'key': '', 'redi': '', 'typeid'..."
results,{'Legend of Blue Eyes White Dragon': {'printou...
serializer,SMW\Serializers\QueryResultSerializer
version,2


In [4]:
test2 = pd.DataFrame(test['query']['results']).T
test3 = pd.DataFrame(test2['printouts'].values.tolist())
timestamp = test3['German release date'][0][0]['timestamp']
pd.Timestamp(int(timestamp), unit='s').ctime()

'Fri Apr 25 00:00:00 2003'

In [5]:
df = pd.read_json(f'{api_url}{sets_query_url}')
keys = list(df['query']['results'].keys())
print(len(keys))

3943


In [6]:
all_set_lists = pd.DataFrame(columns = ['Set','Card number','Name','Rarity','Print','Quantity'])

for i in range(40):
    first = i*100
    last = (i+1)*100
    titles = '|'.join(keys[first:last]).replace(' ','%20').encode('utf-8')
    df = pd.read_json(f'{api_url}{lists_query_url}{titles}')
    contents = df['query']['pages'].values()
    
    for content in contents:
        if 'revisions' in  content.keys():
            temp = content['revisions'][0]['*']
            parsed = wtp.parse(temp)
            
            for template in parsed.templates:
                if template.name == 'Set list':
                    title = content['title'].split('Lists:')[1]
                    set_df = pd.DataFrame(columns = ['Set', 'Card number', 'Name', 'Rarity', 'Print', 'Quantity'])


                    region = None
                    rarity = None
                    card_print = None
                    qty = None
                    desc = None
                    opt = None
                    list_df = None
                    
                    for argument in template.arguments:
                        if 'region=' in argument:
                            region = argument.string[argument.string.index('=')+1:]
                        elif 'rarities=' in argument:
                            rarity = tuple(rarity_dict.get(i.strip().lower(), i.strip()) for i in argument.string[argument.string.index('=')+1:].split(','))
                        elif 'print=' in argument:
                            card_print = argument.string[argument.string.index('=')+1:]
                        elif 'qty=' in argument:
                            qty = argument.string[argument.string.index('=')+1:]
                        elif 'description=' in argument:
                            desc = argument.string[argument.string.index('=')+1:]
                        elif 'options=' in argument:
                            opt = argument.string[argument.string.index('=')+1:]
                        else:
                            set_list = argument.string[2:-1]
                            lines = set_list.split('\n')

                            list_df = pd.DataFrame([x.split(';') for x in lines])
                            list_df = list_df[~list_df[0].str.contains('!:')]
                            list_df = list_df.applymap(lambda x: x.split('//')[0] if x is not None else x)
                            list_df = list_df.applymap(lambda x: x.strip() if x is not None else x)
                            list_df.replace(r'^\s*$', None, regex = True, inplace = True)

                    if opt != 'noabbr':
                        set_df['Card number'] = list_df[0]
                        set_df['Name'] = list_df[1]
                    else: 
                        set_df['Name'] = list_df[0]

                    if len(list_df.columns)>2: # and rare in str
                        set_df['Rarity'] = list_df[2].apply(lambda x: tuple([rarity_dict.get(y.strip().lower(), y.strip()) for y in x.split(',')]) if x is not None else rarity)
                    else:
                        set_df['Rarity'] = [rarity for _ in set_df.index]

                    if len(list_df.columns)>3 :
                        if card_print is not None: # and new/reprint in str
                            set_df['Print'] = list_df[3].apply(lambda x: x if x is not None else card_print)
                            if len(list_df.columns)>4 and qty is not None:
                                set_df['Quantity'] = list_df[4].apply(lambda x: x if x is not None else qty)
                        elif qty is not None:
                            set_df['Quantity'] = list_df[3].apply(lambda x: x if x is not None else qty)
                    
                    set_df['Set'] = title
                    all_set_lists = pd.concat([all_set_lists, set_df], ignore_index=True)
                    
        else:
            print('Error:', content['title'])

Error: B"Set Card Lists:Duel Terminal 4 (TCG-EN)
Error: B"Set Card Lists:V Jump Spring 2012 subscription bonus (OCG-JP)
Error: B"Set Card Lists:Duelist Entry Deck VS: Saber Force (OCG-KR)
Error: Set Card Lists:Yu-Gi-Oh! The Dark Side of Dimensions Blu-ray
Error: B"Set Card Lists:Destiny Soldiers (TCG-FR)
Error: Set Card Lists:Yu-Gi-Oh! World Championship 2007 Game Guide promotional card (OCG-JP)
Error: B"Set Card Lists:Starter Deck 2012 (OCG-JP)
Error: Set Card Lists:Starter Deck: Joey (TCG-EN)
Error: Set Card Lists:Yu-Gi-Oh! Duel Monsters VI: Expert 2 Game Guide 1 Promos (OCG-JP)
Error: Set Card Lists:Expert Edition Volume.3 (OCG-JP)
Error: Set Card Lists:Sneak Peek Participation Cards: Series 1 (TCG-EN)
Error: B"Set Card Lists:Advanced Tournament Pack 2015 Vol.4 (OCG-JP)
Error: Set Card Lists:Yu-Gi-Oh! Duel Monsters 8: Reshef of Destruction Game Guide 1 Promos (OCG-JP)
Error: Set Card Lists:The Dark Illusion: Special Edition (TCG-DE)
Error: B"Set Card Lists:Abyss Rising: Special Edit

In [7]:
all_set_lists

Unnamed: 0,Set,Card number,Name,Rarity,Print,Quantity
0,2015 Mega-Tin Mega Pack (TCG-FR),MP15-FR001,Artifact Scythe,"(Super Rare,)",,
1,2015 Mega-Tin Mega Pack (TCG-FR),MP15-FR002,Galaxy Mirror Sage,"(Common,)",,
2,2015 Mega-Tin Mega Pack (TCG-FR),MP15-FR003,Galaxy Tyranno,"(Rare,)",,
3,2015 Mega-Tin Mega Pack (TCG-FR),MP15-FR004,Heliosphere Dragon,"(Common,)",,
4,2015 Mega-Tin Mega Pack (TCG-FR),MP15-FR005,Blizzard Thunderbird,"(Common,)",,
...,...,...,...,...,...,...
70295,KC Grand Tournament 2021 prize card (TCG-EN),2021-EN001,Dark Magician,"(Extra Secret Rare,)",,
70296,KC Grand Tournament 2022 prize card (TCG-EN),2022-EN001,Red-Eyes Black Dragon,"(Extra Secret Rare,)",,
70297,KC Grand Tournament prize card (TCG-EN),2020-EN001,Blue-Eyes White Dragon,"(Extra Secret Rare,)",,
70298,Kaiba's Collector Box (TCG-EN),KACB-EN001,Blue-Eyes White Dragon,"(Ultra Rare,)",,


In [8]:
all_set_lists.groupby('Card number', dropna=False).nunique()

Unnamed: 0_level_0,Set,Name,Rarity,Print,Quantity
Card number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
15AX-JP000,1,1,1,1,0
15AX-JPM01,1,1,1,1,0
15AX-JPM02,1,1,1,1,0
15AX-JPM03,1,1,1,1,0
15AX-JPM04,1,1,1,1,0
...,...,...,...,...,...
ZTIN-SP021,1,1,1,0,0
ZTIN-SPV01,1,1,1,0,0
ZTIN-SPV02,1,1,1,0,0
ZTIN-SPV03,1,1,1,0,0


In [9]:
all_set_lists.groupby('Name', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Rarity,Print,Quantity
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"""A Legendary Ocean""",1,1,0,0,0
"""A"" Cell Breeding Device",4,4,1,0,0
"""A"" Cell Incubator",5,5,1,2,0
"""A"" Cell Recombination Device",6,6,1,0,0
"""A"" Cell Scatter Burst",4,4,1,0,0
...,...,...,...,...,...
Zubaba Knight,26,26,5,1,1
Zubababancho Gagagacoat,2,2,1,1,0
"Zure, Knight of Dark World",12,12,1,1,1
Zushin the Sleeping Giant,12,12,3,1,0


In [10]:
all_set_lists.explode('Rarity').groupby('Rarity').nunique()

Unnamed: 0_level_0,Set,Card number,Name,Print,Quantity
Rarity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10000 Secret Rare,5,5,1,1,0
20th Secret Rare,11,68,67,2,0
20th Secret rare,1,2,2,0,0
Collectors Rare,3,63,63,2,0
Common,806,35296,6867,4,3
Duel Terminal Normal Parallel Rare,15,585,408,0,0
Duel Terminal Rare Parallel Rare,13,150,102,0,0
Duel Terminal Secret Parallel Rare,2,20,16,0,0
Duel Terminal Super Parallel Rare,13,96,65,0,0
Duel Terminal Ultra Parallel Rare,16,102,72,0,0


In [11]:
all_set_lists.groupby('Print', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Name,Rarity,Quantity
Print,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
European debut,3,132,132,5,0
Functional errata,1,1,1,1,0
New,135,4379,2075,32,3
New art,2,4,2,1,0
New artwork,4,17,9,1,0
New artwork (renamed),1,3,3,1,0
North American debut,1,2,2,1,0
Reprint,131,6490,2888,23,3
Reprint (New Art),1,1,1,1,0
Reprint (renamed),1,75,75,5,0


In [12]:
all_set_lists.groupby('Quantity', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Name,Rarity,Print
Quantity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.0,101,3858,1033,10,2
2.0,101,364,131,4,2
3.0,11,18,9,1,2
,1580,65062,10604,106,10


In [13]:
all_set_lists[all_set_lists['Card number']=='!: header::Kaiba Deck']

Unnamed: 0,Set,Card number,Name,Rarity,Print,Quantity
