# Preamble

In [1]:
import glob
import os
import pandas as pd
import numpy as np
import seaborn as sns
import wikitextparser as wtp
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm, Normalize
from datetime import datetime
from ast import literal_eval

# Lists

In [2]:
# API variables
api_url = 'https://yugipedia.com/api.php'
sets_query_url = '?action=ask&query=[[Category:Set%20Card%20Lists]]|limit%3D5000|order%3Dasc&format=json'
lists_query_url = '?action=query&prop=revisions&rvprop=content&format=json&titles='

In [3]:
df = pd.read_json(f'{api_url}{sets_query_url}')
keys = list(df['query']['results'].keys())
print(len(keys))

3943


In [4]:
all_set_lists = pd.DataFrame(columns = ['Set','Card number','Name','Rarity','Print','Quantity'])

for i in range(40):
    first = i*100
    last = (i+1)*100
    print(len(keys[first:last]))
    titles = '|'.join(keys[first:last]).replace(' ','%20').encode('utf-8')
    df = pd.read_json(f'{api_url}{lists_query_url}{titles}')
    contents = df['query']['pages'].values()
    
    for content in contents:
        if 'revisions' in  content.keys():
            temp = content['revisions'][0]['*']
            parsed = wtp.parse(temp)
            
            for template in parsed.templates:
                if template.name == 'Set list':
                    title = content['title'].split('Lists:')[1]
                    set_df = pd.DataFrame(columns = ['Set', 'Card number', 'Name', 'Rarity', 'Print', 'Quantity'])


                    region = None
                    rarity = None
                    card_print = None
                    qty = None
                    desc = None
                    opt = None
                    list_df = None
                    
                    for argument in template.arguments:
                        if 'region=' in argument:
                            region = argument.string[argument.string.index('=')+1:]
                        elif 'rarities=' in argument:
                            rarity = tuple(argument.string[argument.string.index('=')+1:].split(','))
                        elif 'print=' in argument:
                            card_print = argument.string[argument.string.index('=')+1:]
                        elif 'qty=' in argument:
                            qty = argument.string[argument.string.index('=')+1:]
                        elif 'description=' in argument:
                            desc = argument.string[argument.string.index('=')+1:]
                        elif 'options=' in argument:
                            opt = argument.string[argument.string.index('=')+1:]
                        else:
                            set_list = argument.string[2:-1]
                            lines = set_list.split('\n')

                            list_df = pd.DataFrame([x.split(';') for x in lines])
                            list_df = list_df[~list_df[0].str.contains('!:')]
                            list_df = list_df.applymap(lambda x: x.split('//')[0] if x is not None else x)
                            list_df = list_df.applymap(lambda x: x.strip() if x is not None else x)
                            list_df.replace(r'^\s*$', None, regex = True, inplace = True)

                    if opt != 'noabbr':
                        set_df['Card number'] = list_df[0]
                        set_df['Name'] = list_df[1]
                    else: 
                        set_df['Name'] = list_df[0]

                    if len(list_df.columns)>2: # and rare in str
                        set_df['Rarity'] = list_df[2].apply(lambda x: tuple([y.strip() for y in x.split(',')]) if x is not None else rarity)
                    else:
                        set_df['Rarity'] = [rarity for _ in set_df.index]

                    if len(list_df.columns)>3 :
                        if card_print is not None: # and new/reprint in str
                            set_df['Print'] = list_df[3].apply(lambda x: x if x is not None else card_print)
                            if len(list_df.columns)>4 and qty is not None:
                                set_df['Quantity'] = list_df[4].apply(lambda x: x if x is not None else qty)
                        elif qty is not None:
                            set_df['Quantity'] = list_df[3].apply(lambda x: x if x is not None else qty)
                    
                    set_df['Set'] = title
                    all_set_lists = pd.concat([all_set_lists, set_df], ignore_index=True)

100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
100
43


In [5]:
all_set_lists

Unnamed: 0,Set,Card number,Name,Rarity,Print,Quantity
0,Absolute Powerforce: Special Edition (TCG-EN),ABPF-ENSE1,Dandylion,"(Super rare,)",,
1,Absolute Powerforce: Special Edition (TCG-EN),ABPF-ENSE2,Red-Eyes Darkness Metal Dragon,"(Super rare,)",,
2,Abyss Rising: Special Edition (TCG-DE),ABYR-DESE1,Gagaga Girl,"(Super rare,)",,
3,Abyss Rising: Special Edition (TCG-DE),ABYR-DESE2,Dark Smog,"(Super rare,)",,
4,Academy Duel Disk (OCG-KR),DSK1-KR001,Elemental HERO Steam Healer,"(Secret Rare,)",,
...,...,...,...,...,...,...
69854,King's Court (TCG-EN),KICO-EN061,The Wicked Avatar,"(R,)",Reprint,
69855,King's Court (TCG-EN),KICO-EN062,The Wicked Eraser,"(R,)",Reprint,
69856,King's Court (TCG-EN),KICO-EN063,Slifer the Sky Dragon,"(UR,)",Reprint,
69857,King's Court (TCG-EN),KICO-EN064,Obelisk the Tormentor,"(UR,)",Reprint,


In [508]:
all_set_lists.groupby('Card number', dropna=False).nunique()

Unnamed: 0_level_0,Set,Name,Rarity,Print,Quantity
Card number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
15AX-KR000,2,1,1,0,0
15AX-KRM01,1,1,1,0,0
15AX-KRM02,1,1,1,0,0
15AX-KRM03,1,1,1,0,0
15AX-KRM04,1,1,1,0,0
...,...,...,...,...,...
ZTIN-SP021,1,1,1,0,0
ZTIN-SPV01,1,1,1,0,0
ZTIN-SPV02,1,1,1,0,0
ZTIN-SPV03,1,1,1,0,0


In [505]:
all_set_lists.groupby('Name', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Rarity,Print,Quantity
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"""A"" Cell Breeding Device",5,5,2,0,0
"""A"" Cell Incubator",7,7,2,2,0
"""A"" Cell Recombination Device",4,4,1,0,0
"""A"" Cell Scatter Burst",5,5,2,0,0
"""Infernoble Arms - Durendal""",6,6,1,1,0
...,...,...,...,...,...
Zubaba Knight,22,22,7,1,1
Zubababancho Gagagacoat,2,2,1,1,0
"Zure, Knight of Dark World",11,11,2,0,1
Zushin the Sleeping Giant,4,4,2,1,0


In [6]:
all_set_lists.explode('Rarity').groupby('Rarity', dropna = False).first()

Unnamed: 0_level_0,Set,Card number,Name,Print,Quantity
Rarity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
20th Secret Rare,20th Secret Rare Final Challenge Pack (OCG-JP),20CP-JPF01,Blue-Eyes Alternative White Dragon,,
DNPR,Hidden Arsenal: Chapter 1 (TCG-EN),HAC1-EN004,Elemental HERO Neos,,
Gold Rare,Gold Series 2012 (OCG-JP),GS04-JP001,Morphing Jar #2,,
Gold Secret Rare,Gold Series 2013 (OCG-JP),GS05-JP001,Reflect Bounder,,
Millennium Rare,Duelist Road -Piece of Memory- Side: Yami Yugi...,15AX-JPY04,Dark Renewal,Reprint,
NPR,Deck Build Pack: Ancient Guardians (OCG-JP),DBAG-JP003,"Flogos, the Ogdoadic Boundless",New,
Normal Parallel Rare,Booster SP: Tribe Force (OCG-KR),SPTR-KR003,Yosenju Kama 1,,
Secret Rare,Premium Pack 17 (OCG-JP),PP17-JP001,Junk Giant,,
Super Parallel Rare,Premium Pack 3 (OCG-JP),P3-01,Blue-Eyes Ultimate Dragon,,
10000 Secret Rare,Battles of Legend: Armageddon (TCG-EN),BLAR-EN10K,Ten Thousand Dragon,New,


In [507]:
all_set_lists.groupby('Print', dropna=False).nunique()

Unnamed: 0_level_0,Set,Card number,Name,Rarity,Quantity
Print,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
European debut,1,2,2,1,0
New,129,3640,1835,44,3
New art,2,4,2,1,0
New artwork,5,16,2,3,0
Reprint,117,4742,2125,38,3
Reprint (New Art),1,1,1,1,0
,1567,55170,9628,150,3


In [448]:
all_set_lists.groupby('Quantity', dropna=False).nunique()

Unnamed: 0_level_0,Card number,Name,Rarity,Print
Quantity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,3446,967,12,2
2.0,377,133,4,2
3.0,22,10,2,2
,59593,10366,166,6


In [454]:
all_set_lists[all_set_lists['Card number']=='!: header::Kaiba Deck']

Unnamed: 0,Card number,Name,Rarity,Print,Quantity
6653,!: header::Kaiba Deck,,C,,
