# Fetching Isotopes data

In [1]:
import requests
from bs4 import BeautifulSoup
import unidecode
from mendeleev import get_session, Element, Isotope

Download raw data from nucleardata.nuclear.lu.se

In [2]:
def fetch_isotope(atomic_number):
    url = 'http://nucleardata.nuclear.lu.se/toi/listnuc.asp?Z={}'.format(atomic_number)
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "lxml")
    gdp_table = soup.find("table")
    data = gdp_table.find_all("td")
    #corrected_list = data[:-1]
    raw_isotope = []
    samples = int(len(data[:-1])/7)
    for i in range(samples):
        raw_isotope.append(data[i*7:i*7+7])
        
    return raw_isotope

Download atomic mass for all isotopes from physics.nist.gov

In [3]:
def fetch_atomic_mass():
    url = 'https://physics.nist.gov/cgi-bin/Compositions/stand_alone.pl?ele=&isotype=all'
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "lxml")
    gdp_table = soup.find("table")
    
    return gdp_table

Convert html data to Isotope class

In [4]:
def extract_mass(raw_mass):
    data = raw_mass.find_all("tr")
    del(data[0:4])
    isotopes_mass = []
    new_Z = True
    for i in range(len(data)):
        isotope = Isotope()
        a = unidecode.unidecode(data[i].text).split('\n')
        if len(a) == 1: # blank line, set true and skip
            new_Z = True
            if i == len(data)-6: # end of file, break
                break  
            continue
        if new_Z: # first occurance
            Z = int(a[1])
            number = int(a[3])
            mass = a[4].split('(')[0].replace(' ','')
            uncertainty = a[4].split('(')[1].replace(')','')
            new_Z = False
        else:
            if 'D' in a[1] or 'T' in a[1]: # exception
                number = int(a[2])
                mass = a[3].split('(')[0].replace(' ','')
                uncertainty = a[3].split('(')[1].replace(')','')
            else:
                number = int(a[1])
                mass = a[2].split('(')[0].replace(' ','')
                uncertainty = a[2].split('(')[1].replace(')','') 
        isotope.atomic_number = Z
        isotope.mass_number = number
        isotope.mass = mass
        isotope.mass_uncertainty = uncertainty.replace('#','')
        isotopes_mass.append(isotope)
        
        
    return isotopes_mass

Convert 

In [5]:
def extract_isotope(raw_isotope):
    isotopes_list = []
    for row in raw_isotope:
        isotope = Isotope()
        isotope.atomic_number = int(row[0].text)
        isotope.mass_number = int(row[1].text) +int(row[0].text)
        #isotope.decay_mode = row[2].text
        hl = unidecode.unidecode(row[3].text)
        hl = hl.split()
        if len(hl) > 1:
            try:
                isotope.half_life = float(hl[0])
            except:
                isotope.half_life = float(hl[0][1:])
            isotope.half_life_unit = hl[1]
        isotope.excitation_energy = row[4].text        
        #isotope.spin = row[5].text
        if row[6].text != '':
            abu = unidecode.unidecode(row[6].text).split()[0]
            isotope.abundance = float(abu)/100
        isotopes_list.append(isotope) 

    return isotopes_list

Connect to database

In [6]:
session = get_session()

Iterate over elements, fetching data from http://nucleardata.nuclear.lu.se/ and adding it to the database

In [7]:
for i in range(1,113):
    raw_isotopes = fetch_isotope(i)
    isotopes = extract_isotope(raw_isotopes)
    for row in isotopes:
        isotope = session.query(Isotope).join(Element).filter(
            Element.atomic_number == row.atomic_number, Isotope.mass_number == row.mass_number).first()
        if not isotope:  # TODO: include isotopes with same Z and A but different decay mode (ex: Sc46m)  
            session.add(row)
    session.commit()

Download mass data and extract it

In [8]:
raw_mass = fetch_atomic_mass()
isotopes_mass = extract_mass(raw_mass)

Get list of all isotopes from the database

In [9]:
all_isotopes = session.query(Isotope).all()

Find isotopes with no mass and update it

In [10]:
for row in all_isotopes:
    if not row.mass:
        for i in isotopes_mass:
            if i.atomic_number == row.atomic_number and i.mass_number == row.mass_number:
                row.mass = i.mass
                row.mass_uncertainty = i.mass_uncertainty
                break
        session.add(row)
session.commit()

In [11]:
session.close()

In [12]:
all_isotopes

DetachedInstanceError: Instance <Isotope at 0x7fe1806c05e0> is not bound to a Session; attribute refresh operation cannot proceed (Background on this error at: http://sqlalche.me/e/13/bhk3)

Add missing 'mass' for isotopes from https://physics.nist.gov/