# Exercises: Data Analysis with Python

In [57]:
import polars as pl
import matplotlib.pyplot as plt

# Binding Energy
You are going to analyse the binding energies of the known isotopes.

#### Reading data
The data is read from a *parquet* file. This file format contains the datatypes (e.g. int or float) in addition to the actual values.

In [58]:
isotopes = pl.read_parquet('data/binding_energy.parquet')

#### Browsing the data
Have first look at the dataframe and find out about the different columns, the number of isotopes, etc.

In [59]:
display(isotopes.head(10)) # display 10 random rows

N,Z,Mass Number,Atomic Symbol,Odd,Mass Excess,ME error,Binding Energy per Nucleon,BE error,Beta Type,Beta-Decay Energy,BD error,Atomic Mass,AM error
i16,i16,i16,str,str,f64,f64,f64,f64,str,f64,f64,f64,f64
1,0,1,"""n""","""""",8071.31806,0.00044,0.0,0.0,"""B-""",782.347,0.0004,1008700.0,0.00047
0,1,1,"""H""","""""",7288.971064,1.3e-05,0.0,0.0,"""B-""",,,1007800.0,1e-05
1,1,2,"""H""","""""",13135.722895,1.5e-05,1112.2831,0.0002,"""B-""",,,2014100.0,1e-05
2,1,3,"""H""","""""",14949.8109,8e-05,2827.2654,0.0003,"""B-""",18.59202,6e-05,3016000.0,8e-05
1,2,3,"""He""","""""",14931.21888,6e-05,2572.68044,0.00015,"""B-""",-13736.0,2000.0,3016000.0,6e-05
0,3,3,"""Li""","""-pp""",28667.0,2000.0,-2267.0,667.0,"""B-""",,,3030775.0,2147.0
3,1,4,"""H""","""-n""",24621.129,100.0,1720.4491,25.0,"""B-""",22196.2131,100.0,4026400.0,107.354
2,2,4,"""He""","""""",2424.91587,0.00015,7073.9156,0.0002,"""B-""",-22898.274,212.132,4002600.0,0.00016
1,3,4,"""Li""","""-p""",25323.19,212.132,1153.7603,53.033,"""B-""",,,4027200.0,227.733
4,1,5,"""H""","""-nn""",32892.447,89.443,1336.3592,17.8885,"""B-""",21661.2131,91.6515,5035300.0,96.02


In [40]:
rows, cols = isotopes.shape
print(f'{rows} rows and {cols} columns')

3558 rows and 14 columns


#### Some search tasks
Answer the following questions:
- Which isotope has the greates atomic mass?
- How many carbon isotopes are known?
- Which element has the greatest number of isotopes?
- Which lead (Pb) isotopes have a non-negligible abundance (column *Isotopic Composition*) and do their abundances add up to 100 %?

In [41]:
# find the isotope with the maximum atomic mass (Og stands for Oganessum)
isotopes.filter(pl.col('Atomic Mass') == pl.max('Atomic Mass'))

N,Z,Mass Number,Atomic Symbol,Odd,Mass Excess,ME error,Binding Energy per Nucleon,BE error,Beta Type,Beta-Decay Energy,BD error,Atomic Mass,AM error
i16,i16,i16,str,str,f32,f32,f32,f32,str,f32,f32,f32,f32
177,118,295,"""Og""","""-a""",201369.0,655.0,7076.0,2.0,"""B-""",,,295216192.0,703.0


In [17]:
# filter oxygen (O) isotopes; there are 17 rows
isotopes.filter(pl.col('Atomic Symbol') == 'O')

N,Z,Mass Number,Atomic Symbol,Odd,Mass Excess,ME error,Binding Energy per Nucleon,BE error,Beta Type,Beta-Decay Energy,BD error,Atomic Mass,AM error
i16,i16,i16,str,str,f32,f32,f32,f32,str,f32,f32,f32,f32
3,8,11,"""O""","""-pp""",47738.914062,60.037998,3162.437256,5.458,"""B-""",,,51249.828125,64.453003
4,8,12,"""O""","""-pp""",32013.335938,12.0,4881.975586,1.0,"""B-""",,,34367.726562,12.882
5,8,13,"""O""","""+3n""",23115.431641,9.526,5811.763672,0.7328,"""B-""",-18915.0,500.0,24815.435547,10.226
6,8,14,"""O""","""""",8007.780762,0.025,7052.27832,0.0018,"""B-""",-23956.621094,41.118698,8596.706055,0.027
7,8,15,"""O""","""""",2855.62207,0.49,7463.691406,0.0327,"""B-""",-13711.129883,14.0086,3065.635986,0.526
…,…,…,…,…,…,…,…,…,…,…,…,…,…
16,8,24,"""O""","""x""",18500.404297,164.873993,7039.685547,6.8698,"""B-""",10955.888672,191.632706,19861.0,177.0
17,8,25,"""O""","""-n""",27329.029297,165.084,6727.805664,6.6034,"""B-""",15994.863281,191.190903,29338.919922,177.225006
18,8,26,"""O""","""-nn""",34661.039062,164.949997,6497.479004,6.3442,"""B-""",15986.385742,196.630096,37210.15625,177.080994
19,8,27,"""O""","""x""",44670.0,500.0,6185.0,19.0,"""B-""",19536.0,514.0,47955.0,537.0


In [42]:
# group by atomic number, aggregate number of isotopes (len), sort by number of isotopes (descending)
(isotopes
    .group_by('Z')
    .agg([
        pl.col('Atomic Symbol'), 
        pl.len().alias('Number of Isotopes')]
        )
    .with_columns(pl.col('Atomic Symbol').list.first())
    .sort('Number of Isotopes', descending=True)
) 

Z,Atomic Symbol,Number of Isotopes
i16,str,u32
80,"""Hg""",47
78,"""Pt""",44
54,"""Xe""",43
81,"""Tl""",43
79,"""Au""",43
…,…,…
115,"""Mc""",6
116,"""Lv""",5
117,"""Ts""",4
118,"""Og""",3


### Mass Excess
The *mass excess* (or *mass defect*) corresponds to the difference between the measured mass ('Atomic Mass', unit µu) and the mass number ('Mass Number', unit u). In the datafram it is expressed as the equivalent energy in keV. Add another column to verify this relation.

In [75]:
from scipy.constants import eV, m_u, speed_of_light

(isotopes
    .with_columns(
        ((pl.col('Atomic Mass') * 1e-6 - pl.col('Mass Number')) * m_u * speed_of_light**2 /(1000 * eV))
            .alias('My Mass Excess'))
    .with_columns((pl.col('Mass Excess') / pl.col('My Mass Excess')).alias('rel'))
    .select(['Atomic Symbol', 'N', 'Mass Excess', 'My Mass Excess', 'rel'])
).sort('rel')

Atomic Symbol,N,Mass Excess,My Mass Excess,rel
str,i16,f64,f64,f64
"""Pt""",87,-318.0,-318.570983,0.998208
"""K""",36,470.0,470.404522,0.99914
"""Hg""",131,-390.0,-390.296029,0.999242
"""V""",18,310.0,310.187537,0.999395
"""Bi""",101,1254.0,1254.722558,0.999424
…,…,…,…,…
"""Pt""",88,-4783.0,-4782.290728,1.000148
"""Bi""",102,-2236.0,-2235.585849,1.000185
"""Ar""",34,-1380.0,-1379.542768,1.000331
"""Cl""",32,740.0,739.606318,1.000532


### Nuclear Binding Energy
The nuclear binding energy of a nucleus can be approximated as the mass difference of its measured mass ('Atomic Mass') and the sum of the masses of the protons, neutrons and electrons, multiplied by the square of the speed of light.

Add columns for the binding energy (in MeV) and the binding energy divided by the mass number. Compare the calculated value to the values in the column 'Binding Energy per Nucleus' in the dataframe. The deviations are mostly due the electronic binding energy, which has been neglected in this calculation.

In [136]:
from scipy.constants import m_n, m_p, m_e, u, speed_of_light, eV

def e_bind(Z, N, m):
    """
    function to calculate the binding energy of a nucleus
    Z: atomic number (number of protons)
    N: neutron number
    m: atomic mass (in µu)
    """
    
    dm = Z * (m_p + m_e) + N * m_n - m * 1e-6 * u # mass difference in kg
    eb = dm * speed_of_light**2 / eV # binding energy in eV
    
    return eb / 1e6 # return binding energy in MeV


# add columns for binding energy and binding energy per nucleon
binding_energy = isotopes.with_columns(
    pl.struct(['Z', 'N', 'Atomic Mass'])
    .map_elements(lambda s: e_bind(s['Z'], s['N'], s['Atomic Mass']), return_dtype=pl.Float64)
    .alias('Binding Energy')
).with_columns(
    (pl.col('Binding Energy') / pl.col('Mass Number'))
        .alias('My Binding Energy per Nucleon'),
    pl.col('Binding Energy per Nucleon') / 1000
)

# dataframe with only highest binding energy for each mass number (used for upper envelope)
binding_energy_max = (binding_energy
                      .sort('Binding Energy per Nucleon')
                      .group_by('Mass Number')
                      .agg([
                          pl.col('Z').last(),
                          pl.col('N').last(),
                          pl.col('Atomic Symbol').last(), 
                          pl.col('Binding Energy per Nucleon').max()
                      ])
                     )

In [108]:
(binding_energy.select(['Atomic Symbol', 'Z', 'Mass Number',
                       pl.col('Binding Energy per Nucleon'), 'My Binding Energy per Nucleon', 
                       (1e-3 * pl.col('Binding Energy per Nucleon') / pl.col('My Binding Energy per Nucleon'))
                           .abs().alias('rel')])
    .sort('rel', descending=True)
)

Atomic Symbol,Z,Mass Number,Binding Energy per Nucleon,My Binding Energy per Nucleon,rel
str,i16,i16,f64,f64,f64
"""Be""",4,5,0.018,0.017717,0.001016
"""Li""",3,3,-2.267,-2.266592,0.001
"""F""",9,13,4.297,4.29663,0.001
"""F""",9,31,6.011,6.010558,0.001
"""Na""",11,35,6.745,6.744536,0.001
…,…,…,…,…,…
"""Ar""",18,29,6.276,6.276469,0.001
"""H""",1,7,0.94,0.940216,0.001
"""B""",5,6,-0.467,-0.467276,0.000999
"""n""",0,1,0.0,1.4537e-7,0.0


#### Graph
Graph the nuclear binding energy per nucleon in a diagram. Find the isotope with the highest binding energy per nucleon, i.e. the most stable isotope. List the isotopes with a negative binding energy.

In [150]:
import altair as alt

# chart1 with all data points
chart1 = (binding_energy.plot.point(
            x=alt.X('Mass Number',
                    scale=alt.Scale(type='log', domain=(1, 300)) # logarithmic scale (looks better)
                   ),
            y='Binding Energy per Nucleon',
            tooltip=[
                'Atomic Symbol', 
                'Mass Number', 
                alt.Tooltip('Binding Energy per Nucleon', format='.2f') # round values to 2 decimal places in tooltips
            ]
            )
          .properties(width=600, title='Binding Energy per Nucleon')
        )


#chart2 with only max for each mass number
chart2 = (binding_energy_max.plot.line(
            x=alt.X('Mass Number',
                    scale=alt.Scale(type='log', domain=(1, 300))
                   ),
            y=alt.Y('Binding Energy per Nucleon',
                   title='Binding Energy per Nucleon (MeV)'
                   ),
            tooltip=['Atomic Symbol', 'Mass Number', 'Binding Energy per Nucleon']
            )
        )

chart = ((chart1 + chart2)
           .configure_point(size=1).configure_line(color='red') # format points and line
       )

chart

In [114]:
binding_energy.filter(pl.col('Binding Energy per Nucleon') == pl.max('Binding Energy per Nucleon'))

N,Z,Mass Number,Atomic Symbol,Odd,Mass Excess,ME error,Binding Energy per Nucleon,BE error,Beta Type,Beta-Decay Energy,BD error,Atomic Mass,AM error,Binding Energy,My Binding Energy per Nucleon
i16,i16,i16,str,str,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64
34,28,62,"""Ni""","""""",-66746.44,0.425,8.794556,0.0069,"""B-""",-3958.8965,0.4751,61928000.0,0.455,545.26283,8.794562


The most stable isotope (isotope with highest binding energy per nucleon) is Ni-62.

In [117]:
binding_energy.filter(pl.col('Binding Energy per Nucleon') < 0)

N,Z,Mass Number,Atomic Symbol,Odd,Mass Excess,ME error,Binding Energy per Nucleon,BE error,Beta Type,Beta-Decay Energy,BD error,Atomic Mass,AM error,Binding Energy,My Binding Energy per Nucleon
i16,i16,i16,str,str,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64
0,3,3,"""Li""","""-pp""",28667.0,2000.0,-2.267,667.0,"""B-""",,,3030775.0,2147.0,-6.799777,-2.266592
1,5,6,"""B""","""x""",47320.0,2003.0,-0.467,334.0,"""B-""",,,6050800.0,2150.0,-2.803659,-0.467276


In [157]:
def droplet(N, Z):
    
    A = N + Z # number of nucleons

    pairing = 'e' if N % 2 == 0 else 'o'
    pairing += 'e' if Z % 2 == 0 else 'o'
    match pairing:
        case 'ee':
            aP = 11.18
        case 'oo':
            aP = -11.18
        case _:
            aP = 0
            

    EV = 15.76 * A
    EO = -17.81 * A**(2/3)
    EC = - 0.711 * Z * (Z - 1) * A**(-1/3)
    ES = -23.702* (N - Z)**2 / (4 * A)
    EP = aP * A**(-1/2)

    return (EV + EO + EC + ES + EP) / A

In [158]:
droplet = binding_energy_max.with_columns(pl.struct(['Z', 'N']).map_elements(lambda s: droplet(s['Z'], s['N']), return_dtype=pl.Float64).alias('Droplet'))

In [159]:
#chart3 with binding energy predicted by droplet model (only envelope)
chart3 = (droplet.plot.line(
            x=alt.X('Mass Number',
                    scale=alt.Scale(type='log', domain=(1, 300))
                   ),
            y=alt.Y('Droplet',
                   title='Binding Energy per Nucleon (MeV)'
                   ),
            tooltip=['Atomic Symbol', 'Mass Number', 'Droplet']
            )
        )

In [160]:
chart4 = chart + chart3

In [161]:
chart4