In [None]:
from requests_html import HTMLSession
import pprint

session = HTMLSession()
pp = pprint.PrettyPrinter()

# List of nuclear tests globally
global_url = 'https://en.wikipedia.org/wiki/List_of_nuclear_weapons_tests'
response = session.get(global_url)

# Nuclear weapons testing totals by country
totals_data = {}
totals_table = response.html.find('.wikitable, .sortable', first=True)

columns = totals_table.find('tr', first=True)
column_labels = [column.text.split('[')[0].rstrip() 
                 for column in columns.find('th')][1:]
rows = totals_table.find('tr')[1:]

for i, row in enumerate(rows):
    
    country_name = row.find('th')[0].text.split('[')[0].rstrip()
    country_stats = row.find('td')
    
    if country_name and country_name != 'Totals': 
        totals_data[country_name] = {}

        for link in row.find('th')[0].absolute_links:
            if 'List_of_nuclear_weapons_tests_of' in link:
                # List of nuclear testing series by country
                totals_data[country_name]['URL'] = link

        columns_label_index=0
        for i, stat in enumerate(country_stats):
            # Ignore reference links
            if i!=1:
                totals_data[country_name][column_labels[columns_label_index]] = stat.text.split('[')[0].rstrip()
                columns_label_index+=1

                
print('Nuclear weapons totals by country:\n')
pp.pprint(totals_data)

In [None]:
# Nuclear weapons testing series by country 
series_data = {}

for country_name, totals_stats in totals_data.items():
    response = session.get(totals_stats['URL'])
    
    series_table = response.html.find('.wikitable, .sortable', first=True)
    series_data[country_name] = []
    
    columns = series_table.find('tr', first=True)
    column_labels = [column.text.split('[')[0].rstrip()
                     for column in columns.find('th')]
    rows = series_table.find('tr')[1:]
    
    # USA, USSR, UK, France: indivudual nuclear tests listed @ url_depth=2
    if country_name in ['USA', 'USSR', 'UK', 'France']:
        for i, row in enumerate(rows):
            
            series_name = row.find('th')[0].text
            series_stats = row.find('th') + row.find('td')

            if series_name and series_name != 'Totals':
                
                dict_stats = {column_labels[i] : stat.text for i, stat in enumerate(series_stats)}
                dict_url = {'URL': link for link in row.find('th')[0].absolute_links}
                
                series_data[country_name].append({**dict_stats, **dict_url})
    
    # China, India, Pakistan, North Korea: indivudual nuclear tests listed @ url_depth=1
    elif country_name in ['China', 'India', 'Pakistan', 'North Korea']:
        series_data[country_name] = [{ 
            'Series or years' : 'None',
            'URL' : totals_data[country_name]['URL']}]
            
print('Nuclear weapons test series by country:\n')
pp.pprint(series_data)

In [None]:
# Nuclear weapons individual tests by country 
tests_data = {}

for country_name, series_stats in series_data.items():
    tests_data[country_name] = []
    
    for stat in series_stats:
        response = session.get(stat['URL'])
        print('Done: ' + stat['URL'])
        
        try:
            # North Korea's first table is not the one we want
            if country_name == 'North Korea': 
                tests_table = response.html.find('.wikitable, .sortable')[1]
            else: 
                tests_table = response.html.find('.wikitable, .sortable', first=True)
            
            table_caption = tests_table.find('caption', first=True).text

            columns = tests_table.find('tr', first=True)
            column_labels = [column.text.split('[')[0].rstrip()
                             for column in columns.find('th')]
            rows = tests_table.find('tr')[1:]
            
            for i, row in enumerate(rows):  
                # North Korea's table has notes in its own row
                if country_name == 'North Korea' and i % 2:
                    dict_stats['Notes'] = row.text
                else:
                    test_name = row.find('th')[0].text
                    test_stats = row.find('th') + row.find('td')

                    dict_stats = {column_labels[i] : stat.text.replace(u'\n', u' ').replace(u'\xa0', u' ').replace(u'\ufeff', u'') 
                                  for i, stat in enumerate(test_stats)}
                    if country_name == 'North Korea': continue
                dict_series = {}

                if 'Series or years' in stat:
                    dict_series = {'Series': stat['Series or years']}
                elif 'Series' in stat:
                    dict_series = {'Series': stat['Series']}
                elif 'Name' in stat:
                    dict_series = {'Series': stat['Name']}
                elif 'Sequence' in stat:
                    dict_series = {'Series': stat['Sequence']}
                    
                tests_data[country_name].append({**dict_stats, **dict_series}) 
        except Exception as e:
            print('Error: Cannot scrape. ' + e)

print('Nuclear weapons individual tests by country:\n')
pp.pprint(tests_data)

In [None]:
# Manual entries that can't be scraped

tests_data['USA'].append(
    {'Date time (UTC)': 'July 16, 1945 11:29:21.0', 
     'Delivery,': 'Tower, weapons development',
     'Device': 'Plutonium implosion fission',
     'Elevation + height': '0 + 1,500 m (4,921 ft)',
     'Fallout': '',
     'Local time zone': 'MWT (-6 hrs)',
     'Location': 'White Sands Missile Range, New Mexico ~ 33°40′38″N, 106°28′31″W / \'
 \'33.677222°N -106.475278°W / 33.677222; -106.475278 (Gadget)'
     'Name': 'Gadget',
     'Notes': '',
     'References': '',
     'Series': 'Trinity',
     'Yield': '22 kt'})

tests_data['USA'].append(
    {'Date time (UTC)': 'August 5, 1945 23:15:44.0', 
     'Delivery,': 'free air drop, warfare',
     'Device': 'Mk I',
     'Elevation + height': '0 + 0 m',
     'Fallout': '',
     'Local time zone': 'JST (+9 hrs)',
     'Location': 'Hiroshima, Japan ~ 34°23′4″N, 132°27′11″E / \'
 \'34.384498°N 132.453165°E / 34.384498; 132.453165 (Fat Man)'
     'Name': 'Little Boy',
     'Notes': '',
     'References': '',
     'Series': 'World War II',
     'Yield': '15 kt'})

tests_data['USA'].append(
    {'Date time (UTC)': 'August 9, 1945 2:02:43.0', 
     'Delivery,': 'free air drop, warfare',
     'Device': 'Mk III',
     'Elevation + height': '0 + 0 m',
     'Fallout': '',
     'Local time zone': 'JST (+9 hrs)',
     'Location': 'Nagasaki, Japan ~ 32°46′15″N, 129°51′28″E / \'
 \'32.77091°N 129.85791°W / 32.77091; 129.85791 (Fat Man)'
     'Name': 'Fat Man',
     'Notes': '',
     'References': '',
     'Series': 'World War II',
     'Yield': '21 kt'})

print('Nuclear weapons individual tests by country:\n')
pp.pprint(tests_data)