In [1]:
import requests
from bs4 import BeautifulSoup
import tqdm

In [2]:
allowed_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-.,?()! '

In [3]:
prefix = 'https://www.serebii.net'
page = requests.get(prefix + '/attackdex-swsh/')

In [4]:
soup = BeautifulSoup(page.content)
all_attack_links = [row['value'] for row in soup.findAll('option', value=True)]
all_attack_links = sorted(list(set(all_attack_links)))

In [5]:
class Attack:
    def __init__(self, name, atk_type, atk_class, power, accuracy, pkmn):
        self.name = name
        self.atk_type = atk_type
        self.atk_class = atk_class
        self.power = power
        self.accuracy = accuracy
        self.pkmn = pkmn

In [6]:
def get_attack_info(suffix):
    page = requests.get(prefix + suffix)
    soup = BeautifulSoup(page.content)
    name = ''.join(x for x in soup.findAll('table', {'class': 'dextable'})[0].findAll('td')[3].text if x in allowed_chars).lstrip().rstrip()
    atk_type = soup.findAll('table', {'class': 'dextable'})[0].findAll('td')[4].a['href'].replace('/attackdex-swsh/', '').replace('.shtml', '')
    atk_class = soup.findAll('table', {'class': 'dextable'})[0].findAll('td')[5].a['href'].replace('/attackdex-swsh/', '').replace('.shtml', '')
    power = int(soup.findAll('table', {'class': 'dextable'})[0].findAll('td')[10].text.replace('\r', '').replace('\t', '').replace('\n', ''))
    accuracy = int(soup.findAll('table', {'class': 'dextable'})[0].findAll('td')[11].text.replace('\r', '').replace('\t', '').replace('\n', ''))
    pkm = []
    k = len(soup.findAll('table', {'class': 'dextable'}))
    for i in range(2, k):
        pkm += [row.find('img')['src'].replace('/pokedex-swsh/icon/', '').replace('.png', '') for row in soup.findAll('table', {'class': 'dextable'})[i].findAll('table', {'class': 'pkmn'})]
    pkm = list(set(pkm))
    return [Attack(name, atk_type, atk_class, power, accuracy, pkm)]

In [7]:
all_attacks = []
for suffix in tqdm.tqdm_notebook(all_attack_links):
    all_attacks += get_attack_info(suffix)

HBox(children=(IntProgress(value=0, max=841), HTML(value='')))




In [8]:
import pandas as pd
data = pd.DataFrame({
    'Name': [a.name for a in all_attacks],
    'Type': [a.atk_type.capitalize() for a in all_attacks],
    'Class': [a.atk_class.capitalize() for a in all_attacks],
    'Power': [a.power for a in all_attacks],
    'Accuracy': [a.accuracy for a in all_attacks]
})

In [9]:
data.to_csv('attack_swsh_data.csv', index=False)

In [10]:
pkmn_data = pd.read_csv('pokemon_swsh_data.csv')

In [11]:
pkmn_data.loc[4]['Alt Number']

nan

In [12]:
all_attacks[1].pkmn

['407',
 '290',
 '114',
 '797',
 '592',
 '715',
 '187',
 '617',
 '267',
 '252',
 '291',
 '272',
 '547',
 '465',
 '743',
 '189',
 '636',
 '548',
 '593',
 '103-a',
 '402',
 '546',
 '045',
 '046',
 '168',
 '273',
 '271',
 '140',
 '781',
 '286',
 '182',
 '047',
 '706',
 '596',
 '167',
 '770',
 '315',
 '388',
 '042',
 '292',
 '704',
 '756',
 '191',
 '103',
 '714',
 '769',
 '141',
 '270',
 '549',
 '616',
 '331',
 '332',
 '637',
 '556',
 '169',
 '275',
 '406',
 '590',
 '595',
 '188',
 '254',
 '705',
 '274',
 '043',
 '389',
 '192',
 '742',
 '044',
 '755',
 '285',
 '591',
 '253',
 '041',
 '102']

In [13]:
def can_learn(pkm_num, attack_num):
    test1 = pkmn_data.loc[pkm_num]['Number'] in all_attacks[attack_num].pkmn
    test2 = pkmn_data.loc[pkm_num]['Alt Number'] in all_attacks[attack_num].pkmn
    if test1 or test2:
        return 1
    return 0

In [14]:
pkmn_nums = range(len(pkmn_data))
attack_nums = range(len(data))

In [15]:
learnset_matrix = []
for p in tqdm.tqdm_notebook(pkmn_nums):
    learnset_matrix.append([])
    for a in attack_nums:
        learnset_matrix[-1].append(can_learn(p, a))

HBox(children=(IntProgress(value=0, max=739), HTML(value='')))




In [16]:
len(learnset_matrix)

739

In [17]:
len(learnset_matrix[0])

841

In [18]:
pd.DataFrame(learnset_matrix).to_csv('learnset.csv', index=False)