In [1]:
# We import the libraries that we will need for the lab.
import pandas as pd # data handling and dataframes
import numpy as np # array handling and mathematical operations
import re # regex 
# Libraries for web scraping with Selenium
from selenium import webdriver # webdriver allows browser handling
from webdriver_manager.chrome import ChromeDriverManager # allows installation and keeping Chrome driver updated
from selenium.webdriver.common.keys import Keys # allows simulating keyboard keys
from selenium.webdriver.chrome.options import Options # allows configuring Chrome driver options such as incognito mode or maximizing the window
from time import sleep # pauses between code execution
from tqdm import tqdm # Is a library for adding progress bars to your loops and iterations.

import warnings # allows ignoring Python warnings
warnings.filterwarnings('ignore')

In [2]:
opt= Options() # Creates an instance of the ChromeOptions class, which allows you to configure the Chrome driver options.

opt.add_experimental_option('excludeSwitches', ['enable-automation']) # Excludes the 'enable-automation' switch, which helps in bypassing the browser automation detection by websites.

opt.add_experimental_option('useAutomationExtension', False) # Disables the use of the Automation Extension, which further helps in avoiding detection by websites that check for automation.
opt.add_argument('--start-maximized') # Maximizes the browser window when it is launched.
opt.add_argument('user.data-dir=selenium') #  Specifies the user data directory for the Chrome browser. This can be useful for maintaining separate profiles or session data for different instances of the browser.
opt.add_argument('--incognito') # Opens the browser in incognito mode, which ensures that no browsing history or cookies are stored during the session.



In [3]:
# We create an empty dictionary with the keys that we do want.
top_cards = {
    'name':[],
    'decks':[],
    'porcentage':[]
}

In [4]:
driver = webdriver.Chrome() # Creates an instance of the Chrome WebDriver in Python using the Selenium library.

driver.get("https://ygoprodeck.com/top/") # Url 
driver.implicitly_wait(5) # In this case, sets an implicit wait time of 5 seconds. This means that if the WebDriver tries to find an element.


driver.find_element("css selector","#ncmp__tool > div > div > div.ncmp__banner-actions > div.ncmp__banner-btns > button:nth-child(2)").click() 
sleep(2)
# Find an element by CSS selector and click it and wait.

driver.find_element("css selector","body > main > div > div > div > div > div.border-bottom.d-flex.nav-wrapper > button:nth-child(2)").click()
sleep(2)

# We iterate to extract the name, the number of decks it appears in, and the percentage of each top card in tournament decks.
for i in tqdm(range(1,201)):
    name_selector = driver.find_element("css selector",f"#cardList > div:nth-child({i}) > figure > figcaption > div.card-name-grid")
    name = name_selector.text
   
    top_cards["name"].append(name)
   
for i in tqdm(range(1,201)):
    
    deck_selector = driver.find_element('css selector',f"#cardList > div:nth-child({i}) > figure > figcaption > div:nth-child(2) > a > u" )
    decks = deck_selector.text
    numbers = re.findall(r'\d+', decks)
    number =  int(numbers[0])
    top_cards["decks"].append(number)
   
for i in tqdm(range(1,201)):
    porcen_selector = driver.find_element('css selector',f"#cardList > div:nth-child({i}) > figure > figcaption > div.badge.custom-badge.mt-1" )
    porcen = porcen_selector.text
    numbers = re.findall(r'\d+\.\d+', porcen)
    number =  float(numbers[0])
    top_cards["porcentage"].append(number)
    
driver.quit() # Close window

100%|██████████| 200/200 [00:02<00:00, 74.65it/s]
100%|██████████| 200/200 [00:02<00:00, 71.08it/s]
100%|██████████| 200/200 [00:02<00:00, 78.64it/s]


In [5]:
# Function that returns the length or number of elements
len(top_cards['name'])

200

In [6]:
len(top_cards['decks'])

200

In [7]:
len(top_cards['porcentage'])

200

In [8]:
# Display the contents of top_cards["name"], top_cards["decks"] and top_cards["porcentage"]
top_cards["name"]


['Ash Blossom & Joyous Spring',
 'Divine Arsenal AA-ZEUS - Sky Thunder',
 'Infinite Impermanence',
 'Called by the Grave',
 'Pot of Prosperity',
 'Terraforming',
 'Triple Tactics Talent',
 'Nibiru, the Primal Being',
 'Baronne de Fleur',
 'Accesscode Talker',
 'Dharc the Dark Charmer, Gloomy',
 'Garura, Wings of Resonant Life',
 'Effect Veiler',
 'Knightmare Unicorn',
 'Kashtira Fenrir',
 'Droll & Lock Bird',
 'PSY-Framegear Gamma',
 'PSY-Frame Driver',
 'Knightmare Phoenix',
 'Predaplant Dragostapelia',
 'Downerd Magician',
 'Pot of Desires',
 'Foolish Burial',
 'Number 41: Bagooska the Terribly Tired Tapir',
 'I:P Masquerena',
 'Spright Elf',
 'Spright Sprind',
 'Apollousa, Bow of the Goddess',
 "Elder Entity N'tss",
 'Forbidden Droplet',
 'Mudragon of the Swamp',
 'Bystial Druiswurm',
 'Bystial Magnamhut',
 'Instant Fusion',
 'Dark Ruler No More',
 'Donner, Dagger Fur Hire',
 'Kashtira Arise-Heart',
 'Salamangreat Almiraj',
 'Gigantic Spright',
 'Pressured Planet Wraitsoth',
 'Bysti

In [9]:
top_cards["decks"]


[2206,
 1818,
 1803,
 1249,
 1111,
 1102,
 1077,
 921,
 881,
 878,
 841,
 749,
 735,
 713,
 613,
 613,
 608,
 607,
 594,
 586,
 585,
 563,
 549,
 548,
 526,
 521,
 517,
 499,
 497,
 492,
 474,
 460,
 453,
 449,
 439,
 438,
 434,
 426,
 423,
 423,
 420,
 419,
 418,
 417,
 416,
 412,
 411,
 410,
 408,
 407,
 407,
 405,
 400,
 385,
 383,
 382,
 376,
 373,
 363,
 359,
 358,
 358,
 356,
 355,
 353,
 351,
 351,
 346,
 345,
 331,
 330,
 328,
 317,
 308,
 300,
 299,
 294,
 292,
 291,
 286,
 282,
 281,
 280,
 280,
 275,
 273,
 265,
 264,
 264,
 264,
 263,
 262,
 261,
 256,
 256,
 254,
 252,
 250,
 247,
 247,
 247,
 242,
 241,
 240,
 239,
 239,
 238,
 234,
 234,
 233,
 231,
 220,
 216,
 215,
 213,
 213,
 212,
 208,
 208,
 207,
 205,
 201,
 201,
 199,
 195,
 194,
 193,
 192,
 191,
 191,
 188,
 180,
 180,
 176,
 175,
 173,
 170,
 168,
 168,
 167,
 166,
 166,
 166,
 165,
 165,
 165,
 165,
 164,
 163,
 163,
 163,
 163,
 163,
 163,
 162,
 162,
 161,
 161,
 160,
 160,
 159,
 159,
 158,
 158,
 158,
 15

In [10]:
top_cards["porcentage"]


[66.1668,
 54.5291,
 54.0792,
 37.4625,
 33.3233,
 33.0534,
 32.3035,
 27.6245,
 26.4247,
 26.3347,
 25.225,
 22.4655,
 22.0456,
 21.3857,
 18.3863,
 18.3863,
 18.2364,
 18.2064,
 17.8164,
 17.5765,
 17.5465,
 16.8866,
 16.4667,
 16.4367,
 15.7768,
 15.6269,
 15.5069,
 14.967,
 14.907,
 14.757,
 14.2172,
 13.7972,
 13.5873,
 13.4673,
 13.1674,
 13.1374,
 13.0174,
 12.7774,
 12.6875,
 12.6875,
 12.5975,
 12.5675,
 12.5375,
 12.5075,
 12.4775,
 12.3575,
 12.3275,
 12.2975,
 12.2376,
 12.2076,
 12.2076,
 12.1476,
 11.9976,
 11.5477,
 11.4877,
 11.4577,
 11.2777,
 11.1878,
 10.8878,
 10.7678,
 10.7379,
 10.7379,
 10.6779,
 10.6479,
 10.5879,
 10.5279,
 10.5279,
 10.3779,
 10.3479,
 9.928,
 9.898,
 9.838,
 9.5081,
 9.2382,
 8.9982,
 8.9682,
 8.8182,
 8.7582,
 8.7283,
 8.5783,
 8.4583,
 8.4283,
 8.3983,
 8.3983,
 8.2484,
 8.1884,
 7.9484,
 7.9184,
 7.9184,
 7.9184,
 7.8884,
 7.8584,
 7.8284,
 7.6785,
 7.6785,
 7.6185,
 7.5585,
 7.4985,
 7.4085,
 7.4085,
 7.4085,
 7.2585,
 7.2286,
 7.1986,
 7

In [11]:
# We create a DataFrame with the dictionary "top_cards" that contains the information we need.
df_nocleantc = pd.DataFrame(top_cards)

In [12]:
# Display DataFrame.
df_nocleantc

Unnamed: 0,name,decks,porcentage
0,Ash Blossom & Joyous Spring,2206,66.1668
1,Divine Arsenal AA-ZEUS - Sky Thunder,1818,54.5291
2,Infinite Impermanence,1803,54.0792
3,Called by the Grave,1249,37.4625
4,Pot of Prosperity,1111,33.3233
...,...,...,...
195,Raiza the Mega Monarch,137,4.1092
196,Muckraker From the Underworld,137,4.1092
197,Floowandereeze & Stri,137,4.1092
198,Floowandereeze & Empen,137,4.1092


In [13]:
# Open CSV all_cards.
df_toadd = pd.read_csv('../data/all_cards.csv')

In [14]:
df_toadd

Unnamed: 0,id_,name,type,frametype,desc,race,archetype,atk,def,level,attribute,url_,cardmarket_price,tcgplayer_price,ebay_price,amazon_price,coolstuffinc_price,mean_price
0,34541863,"""A"" Cell Breeding Device",Spell Card,spell,"During each of your Standby Phases, put 1 A-Co...",Continuous,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Breedi...",4.55,0.18,0.99,24.45,0.25,6.084
1,64163367,"""A"" Cell Incubator",Spell Card,spell,Each time an A-Counter(s) is removed from play...,Continuous,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Incubator",0.16,0.21,1.25,0.50,0.25,0.474
2,91231901,"""A"" Cell Recombination Device",Spell Card,spell,Target 1 face-up monster on the field; send 1 ...,Quick-Play,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Recomb...",0.14,0.19,0.99,0.50,0.49,0.462
3,73262676,"""A"" Cell Scatter Burst",Spell Card,spell,"Select 1 face-up ""Alien"" monster you control. ...",Quick-Play,Alien,,,,,"https://yugioh.fandom.com/wiki/""A""_Cell_Scatte...",0.02,0.13,2.00,9.76,0.25,2.432
4,98319530,"""Infernoble Arms - Almace""",Spell Card,spell,While this card is equipped to a monster: You ...,Equip,Noble Knight,,,,,"https://yugioh.fandom.com/wiki/""Infernoble_Arm...",0.00,0.00,0.00,0.00,0.00,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12661,2648201,ZW - Sleipnir Mail,Effect Monster,effect,"You can target 1 ""Utopia"" monster you control;...",Beast,Utopia,1000.0,1000.0,4.0,LIGHT,https://yugioh.fandom.com/wiki/ZW_-_Sleipnir_Mail,0.13,0.22,0.99,0.49,0.99,0.564
12662,95886782,ZW - Sylphid Wing,Effect Monster,effect,"You can only control 1 ""ZW - Sylphid Wing"". Yo...",Beast,Utopia,800.0,1600.0,4.0,LIGHT,https://yugioh.fandom.com/wiki/ZW_-_Sylphid_Wing,0.05,0.06,1.50,0.30,0.39,0.460
12663,81471108,ZW - Tornado Bringer,Effect Monster,effect,"You can target 1 ""Utopia"" monster you control;...",Dragon,Utopia,1300.0,1800.0,5.0,WIND,https://yugioh.fandom.com/wiki/ZW_-_Tornado_Br...,0.12,0.06,0.99,1.23,0.49,0.578
12664,18865703,ZW - Ultimate Shield,Effect Monster,effect,When this card is Normal or Special Summoned: ...,Aqua,Utopia,0.0,2000.0,4.0,EARTH,https://yugioh.fandom.com/wiki/ZW_-_Ultimate_S...,0.08,0.05,0.99,0.20,0.49,0.362


In [15]:
# Perform the merge using the common key column.
df_topcards = pd.merge(df_toadd, df_nocleantc, on='name', how='inner')


In [16]:
# we set the "id_" column as the index.
df_topcards.set_index('id_', inplace=True)

In [17]:
# We remove the columns that we don't need.
colums_clean = ['name','decks','porcentage']
df_topcards = df_topcards[colums_clean]

In [20]:
df_topcards = df_topcards.sort_values(by='porcentage', ascending=False)

In [21]:
df_topcards

Unnamed: 0_level_0,name,decks,porcentage
id_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
14558127,Ash Blossom & Joyous Spring,2206,66.1668
90448279,Divine Arsenal AA-ZEUS - Sky Thunder,1818,54.5291
10045474,Infinite Impermanence,1803,54.0792
24224830,Called by the Grave,1249,37.4625
84211599,Pot of Prosperity,1111,33.3233
...,...,...,...
71607202,Muckraker From the Underworld,137,4.1092
69327790,Raiza the Mega Monarch,137,4.1092
80611581,Floowandereeze & Empen,137,4.1092
88093706,Update Jammer,137,4.1092


In [22]:
# Save the DataFrame to a CSV.
df_topcards.to_csv('../data/top_cards.csv', index=True)