In [1]:
import requests
from bs4 import BeautifulSoup
import warnings
import pandas as pd
warnings.filterwarnings('ignore', message='A NumPy version.*"')

# Set \# and Name Extraction

In [19]:
# base variables, set to site
base_url = 'https://jp.pokellector.com/'
url = 'https://jp.pokellector.com/Pokemon-151-Expansion/'
html = requests.get(url)
soup = BeautifulSoup(html.text, 'html.parser')

In [20]:
# get all card numbers and names
soup_elements = soup.find_all(class_='plaque')

In [21]:
# append info into a list
pokemon_list = []
for soup_element in soup_elements:
    curr_list = soup_element.text.split('-')
    pokemon_list.append([curr_list[0][:-1], curr_list[1][1:]])

# Rarity Extraction

In [22]:
rarity_url = 'https://www.tcgcollector.com/cards/jp/pokemon-card-151?releaseDateOrder=newToOld&cardsPerPage=30&displayAs=images'
rarity_html = requests.get(rarity_url)
rarity_soup = BeautifulSoup(rarity_html.text, 'html.parser')
rarity_soup_elements = rarity_soup.find_all('img', class_='card-rarity-symbol')

In [23]:
rarity_list = []
for rarity_soup_element in rarity_soup_elements:
    curr_rarity = rarity_soup_element['alt']
    filtered_rarity = curr_rarity.split()[-1].strip('()')
    rarity_list.append(filtered_rarity)

In [24]:
# append rarity_list elements to pokemon list
for i in range(len(pokemon_list)):
    pokemon_list[i].append(rarity_list[i])

# Price Extraction

In [17]:
price_url = 'https://www.pricecharting.com/search-products?q=%5BMaster+Ball%5D+151&type=prices'
price_html = requests.get(price_url)
price_soup = BeautifulSoup(price_html.text, 'html.parser')

In [15]:
# create emtpy list
num_and_price_list = []

# get all masterball cards set # and price
# all lists contain 153 elements (total number of masterballs)
num_soup_elements = price_soup.find_all('td', class_='title')
price_soup_elements = price_soup.find_all('td', class_='price numeric used_price')

# populate empty list
for num_element, price_element in zip(num_soup_elements, price_soup_elements):
    filtered_num_element = num_element.text.strip().split('#')[-1]
    filtered_price_element = price_element.text.strip()
    num_and_price_list.append(['#' + filtered_num_element, filtered_price_element[1:]])

In [18]:
# put data into data frame and output
prices_df = pd.DataFrame(num_and_price_list, columns=['Set #', 'Price'])
prices_df.head()

Unnamed: 0,Set #,Price
0,#63,9.55
1,#142,18.8
2,#59,21.5
3,#144,13.5
4,#15,11.16


# Output into .xlsx file

In [33]:
# put data into data frame and output
df = pd.DataFrame(pokemon_list, columns=['Set #', 'Name', 'Rarity'])
df.head()

Unnamed: 0,Set #,Name,Rarity
0,#1,Bulbasaur,C
1,#2,Ivysaur,U
2,#3,Venusaur ex,RR
3,#4,Charmander,C
4,#5,Charmeleon,U


In [34]:
# Left join prices_df onto df using Set # column to get corresponding
# master ball prices
combined_df = df.merge(prices_df, how='left', left_on='Set #', right_on='Set #')
combined_df.fillna(0) # fill nulls with 0s (meaning no master ball for that card)
combined_df.head()

Unnamed: 0,Set #,Name,Rarity,Price
0,#1,Bulbasaur,C,22.64
1,#2,Ivysaur,U,15.0
2,#3,Venusaur ex,RR,
3,#4,Charmander,C,33.0
4,#5,Charmeleon,U,14.25


In [35]:
# output into csv file
combined_df.to_excel('sv2a.xlsx',index=False)