In [1]:
# We import the libraries that we will need for the lab.
import pandas as pd # data handling and dataframes
import numpy as np # array handling and mathematical operations

# Libraries for web scraping with Selenium
from selenium import webdriver # webdriver allows browser handling
from webdriver_manager.chrome import ChromeDriverManager # allows installation and keeping Chrome driver updated
from selenium.webdriver.common.keys import Keys # allows simulating keyboard keys
from selenium.webdriver.chrome.options import Options # allows configuring Chrome driver options such as incognito mode or maximizing the window
from time import sleep # pauses between code execution
from tqdm import tqdm # Is a library for adding progress bars to your loops and iterations.

import warnings # allows ignoring Python warnings
warnings.filterwarnings('ignore')

In [2]:
opt= Options() # Creates an instance of the ChromeOptions class, which allows you to configure the Chrome driver options.

opt.add_experimental_option('excludeSwitches', ['enable-automation']) # Excludes the 'enable-automation' switch, which helps in bypassing the browser automation detection by websites.

opt.add_experimental_option('useAutomationExtension', False) # Disables the use of the Automation Extension, which further helps in avoiding detection by websites that check for automation.
opt.add_argument('--start-maximized') # Maximizes the browser window when it is launched.
opt.add_argument('user.data-dir=selenium') #  Specifies the user data directory for the Chrome browser. This can be useful for maintaining separate profiles or session data for different instances of the browser.
opt.add_argument('--incognito') # Opens the browser in incognito mode, which ensures that no browsing history or cookies are stored during the session.



In [3]:
# We create an empty dictionary with the keys that we do want.
top_cards = {
    'name':[],
    'decks':[],
    'porcentage':[]
}

In [4]:
driver = webdriver.Chrome() # Creates an instance of the Chrome WebDriver in Python using the Selenium library.

driver.get("https://ygoprodeck.com/top/") # Url 
driver.implicitly_wait(5) # In this case, sets an implicit wait time of 5 seconds. This means that if the WebDriver tries to find an element.


driver.find_element("css selector","#ncmp__tool > div > div > div.ncmp__banner-actions > div.ncmp__banner-btns > button:nth-child(2)").click() 
sleep(2)
# Find an element by CSS selector and click it and wait.

driver.find_element("css selector","body > main > div > div > div > div > div.border-bottom.d-flex.nav-wrapper > button:nth-child(2)").click()
sleep(2)

# We iterate to extract the name, the number of decks it appears in, and the percentage of each top card in tournament decks.
for i in tqdm(range(1,201)):
    name_selector = driver.find_element('css selector',f"#cardList > div:nth-child({i}) > figure > figcaption > div.mt-2" )
    name = name_selector.text
    top_cards["name"].append(name)
    
for i in tqdm(range(1,201)):
    try:
        deck_selector = driver.find_element('css selector',f"#cardList > div:nth-child({i}) > figure > figcaption > div:nth-child(2) > a > u" )
        decks = deck_selector.text
        top_cards["decks"].append(decks)
    except:
        pass
    
for i in tqdm(range(1,201)):
    porcen_selector = driver.find_element('css selector',f"#cardList > div:nth-child({i}) > figure > figcaption > div.badge.custom-badge" )
    porcen = porcen_selector.text
    top_cards["porcentage"].append(porcen)
   
driver.quit() # Close window

100%|██████████| 200/200 [00:02<00:00, 76.30it/s]
100%|██████████| 200/200 [00:02<00:00, 79.60it/s]
100%|██████████| 200/200 [00:02<00:00, 75.63it/s]


In [5]:
# Function that returns the length or number of elements
len(top_cards['name'])

200

In [6]:
len(top_cards['decks'])

200

In [7]:
len(top_cards['porcentage'])

200

In [8]:
# Display the contents of top_cards["name"], top_cards["decks"] and top_cards["porcentage"]
top_cards["name"]


['Ash Blossom & Joyous Spring',
 'Divine Arsenal AA-ZEUS - Sky Thunder',
 'Infinite Impermanence',
 'Called by the Grave',
 'Pot of Prosperity',
 'Terraforming',
 'Triple Tactics Talent',
 'Nibiru, the Primal Being',
 'Accesscode Talker',
 'Baronne de Fleur',
 'Dharc the Dark Charmer, Gloomy',
 'Garura, Wings of Resonant Life',
 'Effect Veiler',
 'Knightmare Unicorn',
 'PSY-Framegear Gamma',
 'PSY-Frame Driver',
 'Droll & Lock Bird',
 'Knightmare Phoenix',
 'Kashtira Fenrir',
 'Predaplant Dragostapelia',
 'Downerd Magician',
 'Pot of Desires',
 'Foolish Burial',
 'Number 41: Bagooska the Terribly Tired Tapir',
 'Spright Elf',
 'I:P Masquerena',
 'Spright Sprind',
 'Apollousa, Bow of the Goddess',
 'Forbidden Droplet',
 "Elder Entity N'tss",
 'Mudragon of the Swamp',
 'Bystial Druiswurm',
 'Instant Fusion',
 'Bystial Magnamhut',
 'Salamangreat Almiraj',
 'Abyss Dweller',
 'Bystial Saronir',
 'Dark Ruler No More',
 'Gigantic Spright',
 'Kashtira Arise-Heart',
 'Donner, Dagger Fur Hire',


In [9]:
top_cards["decks"]


['In 2167 Decks',
 'In 1779 Decks',
 'In 1772 Decks',
 'In 1241 Decks',
 'In 1078 Decks',
 'In 1075 Decks',
 'In 1043 Decks',
 'In 903 Decks',
 'In 872 Decks',
 'In 851 Decks',
 'In 831 Decks',
 'In 731 Decks',
 'In 731 Decks',
 'In 702 Decks',
 'In 607 Decks',
 'In 606 Decks',
 'In 605 Decks',
 'In 585 Decks',
 'In 582 Decks',
 'In 579 Decks',
 'In 576 Decks',
 'In 556 Decks',
 'In 544 Decks',
 'In 542 Decks',
 'In 521 Decks',
 'In 516 Decks',
 'In 507 Decks',
 'In 498 Decks',
 'In 491 Decks',
 'In 482 Decks',
 'In 467 Decks',
 'In 454 Decks',
 'In 448 Decks',
 'In 447 Decks',
 'In 426 Decks',
 'In 418 Decks',
 'In 413 Decks',
 'In 413 Decks',
 'In 411 Decks',
 'In 411 Decks',
 'In 410 Decks',
 'In 409 Decks',
 'In 409 Decks',
 'In 398 Decks',
 'In 395 Decks',
 'In 395 Decks',
 'In 394 Decks',
 'In 389 Decks',
 'In 389 Decks',
 'In 384 Decks',
 'In 384 Decks',
 'In 382 Decks',
 'In 378 Decks',
 'In 375 Decks',
 'In 370 Decks',
 'In 365 Decks',
 'In 364 Decks',
 'In 357 Decks',
 'In 35

In [10]:
top_cards["porcentage"]


['66.1276%',
 '54.2875%',
 '54.0738%',
 '37.8700%',
 '32.8959%',
 '32.8044%',
 '31.8279%',
 '27.5557%',
 '26.6097%',
 '25.9689%',
 '25.3586%',
 '22.3070%',
 '22.3070%',
 '21.4220%',
 '18.5230%',
 '18.4925%',
 '18.4620%',
 '17.8517%',
 '17.7601%',
 '17.6686%',
 '17.5771%',
 '16.9667%',
 '16.6005%',
 '16.5395%',
 '15.8987%',
 '15.7461%',
 '15.4715%',
 '15.1968%',
 '14.9832%',
 '14.7086%',
 '14.2508%',
 '13.8541%',
 '13.6710%',
 '13.6405%',
 '12.9997%',
 '12.7556%',
 '12.6030%',
 '12.6030%',
 '12.5420%',
 '12.5420%',
 '12.5114%',
 '12.4809%',
 '12.4809%',
 '12.1453%',
 '12.0537%',
 '12.0537%',
 '12.0232%',
 '11.8706%',
 '11.8706%',
 '11.7180%',
 '11.7180%',
 '11.6570%',
 '11.5349%',
 '11.4434%',
 '11.2908%',
 '11.1382%',
 '11.1077%',
 '10.8941%',
 '10.8636%',
 '10.8636%',
 '10.8026%',
 '10.7720%',
 '10.7720%',
 '10.7415%',
 '10.7110%',
 '10.6500%',
 '10.4974%',
 '10.4059%',
 '10.1312%',
 '10.0702%',
 '10.0397%',
 '9.6735%',
 '9.5514%',
 '9.2768%',
 '9.1547%',
 '9.0937%',
 '8.9716%',
 '8.8

In [11]:
# We create a DataFrame with the dictionary "top_cards" that contains the information we need.
df_nocleantc = pd.DataFrame(top_cards)

In [12]:
# Display DataFrame.
df_nocleantc

Unnamed: 0,name,decks,porcentage
0,Ash Blossom & Joyous Spring,In 2167 Decks,66.1276%
1,Divine Arsenal AA-ZEUS - Sky Thunder,In 1779 Decks,54.2875%
2,Infinite Impermanence,In 1772 Decks,54.0738%
3,Called by the Grave,In 1241 Decks,37.8700%
4,Pot of Prosperity,In 1078 Decks,32.8959%
...,...,...,...
195,Floowandereeze & Empen,In 136 Decks,4.1501%
196,Floowandereeze & Toccan,In 136 Decks,4.1501%
197,Floowandereeze & Stri,In 136 Decks,4.1501%
198,Shaman of the Tenyi,In 136 Decks,4.1501%


In [13]:
# Open CSV all_cards.
df_toadd = pd.read_csv('../data/all_cards.csv')

In [17]:
df_toadd

Unnamed: 0,id_,name,type,frametype,desc,race,archetype,atk,def,level,attribute,url_,cardmarket_price,tcgplayer_price,ebay_price,amazon_price,coolstuffinc_price,mean_price
0,34541863,"""A"" Cell Breeding Device",Spell Card,spell,"During each of your Standby Phases, put 1 A-Co...",Continuous,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Breedi...",4.55,0.18,0.99,24.45,0.25,6.084
1,64163367,"""A"" Cell Incubator",Spell Card,spell,Each time an A-Counter(s) is removed from play...,Continuous,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Incubator",0.16,0.21,1.25,0.50,0.25,0.474
2,91231901,"""A"" Cell Recombination Device",Spell Card,spell,Target 1 face-up monster on the field; send 1 ...,Quick-Play,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Recomb...",0.14,0.19,0.99,0.50,0.49,0.462
3,73262676,"""A"" Cell Scatter Burst",Spell Card,spell,"Select 1 face-up ""Alien"" monster you control. ...",Quick-Play,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Scatte...",0.02,0.13,2.00,9.76,0.25,2.432
4,98319530,"""Infernoble Arms - Almace""",Spell Card,spell,While this card is equipped to a monster: You ...,Equip,Noble Knight,,,,,"https://yugioh.fandom.com/wiki/""Infernoble_Arm...",0.00,0.00,0.00,0.00,0.00,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12661,2648201,ZW - Sleipnir Mail,Effect Monster,effect,"You can target 1 ""Utopia"" monster you control;...",Beast,Utopia,1000.0,1000.0,4.0,LIGHT,https://yugioh.fandom.com/wiki/ZW_-_Sleipnir_Mail,0.13,0.22,0.99,0.49,0.99,0.564
12662,95886782,ZW - Sylphid Wing,Effect Monster,effect,"You can only control 1 ""ZW - Sylphid Wing"". Yo...",Beast,Utopia,800.0,1600.0,4.0,LIGHT,https://yugioh.fandom.com/wiki/ZW_-_Sylphid_Wing,0.05,0.06,1.50,0.30,0.39,0.460
12663,81471108,ZW - Tornado Bringer,Effect Monster,effect,"You can target 1 ""Utopia"" monster you control;...",Dragon,Utopia,1300.0,1800.0,5.0,WIND,https://yugioh.fandom.com/wiki/ZW_-_Tornado_Br...,0.12,0.06,0.99,1.23,0.49,0.578
12664,18865703,ZW - Ultimate Shield,Effect Monster,effect,When this card is Normal or Special Summoned: ...,Aqua,Utopia,0.0,2000.0,4.0,EARTH,https://yugioh.fandom.com/wiki/ZW_-_Ultimate_S...,0.08,0.05,0.99,0.20,0.49,0.362


In [18]:
# Perform the merge using the common key column.
df_topcards = pd.merge(df_toadd, df_nocleantc, on='name', how='inner')


In [20]:
# we set the "id_" column as the index.
df_topcards.set_index('id_', inplace=True)

In [22]:
# We remove the columns that we don't need.
colums_clean = ['name','decks','porcentage']
df_topcards = df_topcards[colums_clean]

In [23]:
df_topcards

Unnamed: 0_level_0,name,decks,porcentage
id_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
21044178,Abyss Dweller,In 418 Decks,12.7556%
86066372,Accesscode Talker,In 872 Decks,26.6097%
81555617,Ad Libitum of Despia,In 163 Decks,4.9741%
62320425,Agido the Ancient Sentinel,In 203 Decks,6.1947%
87746184,Albion the Branded Dragon,In 243 Decks,7.4153%
...,...,...,...
2563463,Wandering Gryphon Rider,In 256 Decks,7.8120%
30680659,Water Enchantress of the Temple,In 264 Decks,8.0561%
99234526,White Dragon Wyverburster,In 143 Decks,4.3637%
45935145,"Wollow, Founder of the Drudge Dragons",In 158 Decks,4.8215%


In [24]:
# Save the DataFrame to a CSV.
df_topcards.to_csv('../data/top_cards.csv', index=True)