In [None]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from urllib.request import urlopen
import re 

In [None]:
url='https://civilization.fandom.com/wiki/List_of_technologies_in_Civ6'
html = urlopen(url) 
soup = BeautifulSoup(html, 'html.parser')
tables = soup.find_all('table')

In [None]:
header = ['Technology', 'Prerequisites', 'Eureka', 'Infrastructure', 'Units', 'Effects']

tech = []
pre = []
eureka = []
inf = []
unit = []
effect = []

for table in tables:
  rows = table.find_all('tr')
  ths = table.find_all('th')
  headings = [th.text.strip() for th in ths]
  for row in rows:
    cells = row.find_all('td')
    if headings == header:
      if len(cells) > 1:
        techs = cells[0]
        tech.append(techs.text.strip())
            
        pres = cells[1]
        pre.append(pres.text.strip())

        eurekas = cells[2]
        eureka.append(eurekas.text.strip())

        infs = cells[3]
        inf.append(infs.text.strip())

        units = cells[4]
        unit.append(units.text.strip())

        effects = cells[5]
        effect.append(effects.text.strip())
            
data = np.row_stack([header[:2], np.column_stack([tech, pre])])

In [None]:
sci_costs = ['Science Cost']

for techs in data[1:-1,0]:
  tech = re.sub('[^a-zA-Z]+', '', techs)
  tech = re.sub("([a-z])([A-Z])", "\\1_\\2", tech)
  url = 'https://civilization.fandom.com/wiki/'+str(tech)+'_(Civ6)'
  html = urlopen(url) 
  soup = BeautifulSoup(html, 'html.parser')
  divs = soup.find_all('div')
  cost = 0
  for div in divs:
    if 'data-source' in div.attrs:
      if div['data-source'] == 'cost':
        cost = div
  if cost == 0:
    sci_cost = 'NA'
  else:
    sci_cost = cost.text.strip()
    sci_cost = sci_cost[sci_cost.find('\n')+1:]
  sci_costs.append(sci_cost)

In [None]:
tech_data = np.column_stack([data[:-1,:], sci_costs])
pd.DataFrame(tech_data).to_csv("Civ6_Techs.csv", header=None, index=None)

In [None]:
url='https://civilization.fandom.com/wiki/List_of_buildings_in_Civ6'
html = urlopen(url) 
soup = BeautifulSoup(html, 'html.parser')
tables = soup.find_all('table')

In [None]:
header1 = ['Building', 'District', 'Unlocked with', 'Era']
header2 = ['Building', 'Prerequisites'] 

building = []
district = []
pre = []
era = []

for table in tables:
  rows = table.find_all('tr')
  ths = table.find_all('th')
  headings = [th.text.strip() for th in ths]
  for row in rows:
    cells = row.find_all('td')
    if headings == header1:
      if len(cells) > 2:
        buildings = cells[0]
        building.append(buildings.text.strip())
            
        districts = cells[1]
        district.append(districts.text.strip())

        pres = cells[2]
        pre.append(pres.text.strip())

            
data = np.row_stack([header2, np.column_stack([building, pre])])

In [None]:
pro_costs = ['Production Cost']

for buildings in data[1:,0]:
  building = re.sub("\(.*?\)","()", buildings)
  building = re.sub('[^a-zA-Z]+', '', building)
  building = re.sub("([a-z])([A-Z])", "\\1_\\2", building)
  url = 'https://civilization.fandom.com/wiki/'+str(building)+'_(Civ6)'
  html = urlopen(url) 
  soup = BeautifulSoup(html, 'html.parser')
  divs = soup.find_all('div')
  cost = 0
  for div in divs:
    if 'data-source' in div.attrs:
      if div['data-source'] == 'cost':
        cost = div
  if cost == 0:
    pro_cost = 'NA'
  else:
    pro_cost = cost.text.strip()
    pro_cost = pro_cost.split(' ',1)[0]
  pro_costs.append(pro_cost)

In [None]:
sci_boosts = ['Science Boost']

for buildings in data[1:,0]:
  building = re.sub("\(.*?\)","()", buildings)
  building = re.sub('[^a-zA-Z]+', '', building)
  building = re.sub("([a-z])([A-Z])", "\\1_\\2", building)
  url = 'https://civilization.fandom.com/wiki/'+str(building)+'_(Civ6)'
  html = urlopen(url) 
  soup = BeautifulSoup(html, 'html.parser')
  divs = soup.find_all('div')
  for div in divs:
    if 'data-source' in div.attrs:
      if div['data-source'] == 'effect':
        effects = div
  sci_boost = effects.text.strip()
  if sci_boost.find('Science') > 0:
    sci_boost = sci_boost[:sci_boost.find('Science')][::-1]
    sci_boost = sci_boost[:sci_boost.find('+')]
    sci_boost = sci_boost.replace(' ', '')
    if sci_boost.isnumeric() == False:
      sci_boost = 'NA'
  else:
    sci_boost = 'NA'
  sci_boosts.append(sci_boost)

In [None]:
pro_boosts = ['Production Boost']

for buildings in data[1:,0]:
  building = re.sub("\(.*?\)","()", buildings)
  building = re.sub('[^a-zA-Z]+', '', building)
  building = re.sub("([a-z])([A-Z])", "\\1_\\2", building)
  url = 'https://civilization.fandom.com/wiki/'+str(building)+'_(Civ6)'
  html = urlopen(url) 
  soup = BeautifulSoup(html, 'html.parser')
  divs = soup.find_all('div')
  for div in divs:
    if 'data-source' in div.attrs:
      if div['data-source'] == 'effect':
        effects = div
  pro_boost = effects.text.strip()
  if pro_boost.find('Production') > 0:
    pro_boost = pro_boost[:pro_boost.find('Production')][::-1]
    pro_boost = pro_boost[:pro_boost.find('+')]
    pro_boost = pro_boost.replace(' ', '')
    if pro_boost.isnumeric() == False:
      pro_boost = 'NA'
  else:
    pro_boost = 'NA'
  pro_boosts.append(pro_boost)

In [None]:
building_data = np.column_stack([data, pro_costs, sci_boosts, pro_boosts])
pd.DataFrame(building_data).to_csv("Civ6_Buildings.csv", header=None, index=None)