# Exercises: Data Analysis with Python

In [None]:
import polars as pl
import matplotlib.pyplot as plt

# Isotope Masses
You are going to analyse the masses of the known isotopes.

#### Reading data
The data is read from a *parquet* file. This file format contains the datatypes (e.g. int or float) in addition to the actual values.

In [None]:
isotopes = pl.read_parquet('data/isotopes.parquet')

#### Browsing the data
Have first look at the dataframe and find out about the different columns, the number of isotopes, etc.

In [None]:
display(isotopes.sample(10)) # display 10 random rows

In [None]:
rows, cols = isotopes.shape
print(f'{rows} rows and {cols} columns')

#### Some search tasks
Answer the following questions:
- Which isotope has the greates atomic mass?
- How many carbon isotopes are known?
- Which element has the greatest number of isotopes?
- Which lead (Pb) isotopes have a non-negligible abundance (column *Isotopic Composition*) and do their abundances add up to 100 %?

In [None]:
# find the isotope with the maximum atomic mass (Og stands for Oganessum)
isotopes.filter(pl.col('Relative Atomic Mass') == pl.max('Relative Atomic Mass'))

In [None]:
# filter oxygen (O) isotopes; there are 17 rows
isotopes.filter(pl.col('Atomic Symbol') == 'O')

In [None]:
# group by atomic number, aggregate number of isotopes (len), sort by number of isotopes (descending)
(isotopes
    .group_by('Atomic Number')
    .agg([
        pl.col('Atomic Symbol'), 
        pl.len().alias('Number of Isotopes')]
        )
    .with_columns(pl.col('Atomic Symbol').list.first())
    .sort('Number of Isotopes', descending=True)
) 

In [None]:
# filter for Pb isotopes with isotopic composition not null
pb_not_null = isotopes.filter((pl.col('Atomic Symbol') == 'Pb') & pl.col('Isotopic Composition').is_not_null())
display(pb_not_null)

display(pb_not_null.select(pl.sum('Isotopic Composition').alias('Sum of Abundances')))

### Binding Energy
The binding energy of a nucleus can be calculated as the mass difference of its measured mass and the sum of the masses of the protons and neutrons bound in the nucleus, multiplied by the square of the speed of light.

Add columns for the binding energy (in MeV) and the binding energy divided by the mass number. Graph the binding energy vs the mass number. Find the isotope with the highest binding energy per nucleon.

In [None]:
import altair as alt
from scipy.constants import m_n, m_p, m_e, u, speed_of_light, eV

def e_bind(Z, N, m_rel):
    """
    function to calculate the binding energy of a nucleus
    Z: atomic number (number of protons)
    N: mass number (number of protons + number of neutrons)
    m_rel: relative atomic mass
    """
    
    dm = Z * (m_p + m_e) + (N - Z) * m_n - m_rel * u # mass difference in kg
    eb = dm * speed_of_light**2 / eV # binding energy in eV
    
    return eb / 1e6 # return binding energy in MeV


# add columns for binding energy and binding energy per nucleon
binding_energy = isotopes.with_columns(
    pl.struct(['Atomic Number', 'Mass Number', 'Relative Atomic Mass'])
    .map_elements(lambda s: e_bind(s['Atomic Number'], s['Mass Number'], s['Relative Atomic Mass']), return_dtype=pl.Float32)
    .alias('Binding Energy')
).with_columns(
    (pl.col('Binding Energy') / pl.col('Mass Number'))
        .alias('Binding Energy per Nucleon')
)

# dataframe with only highest binding energy for each mass number (used for upper envelope)
binding_energy_max = (binding_energy
                      .sort('Binding Energy per Nucleon')
                      .group_by('Mass Number')
                      .agg([
                          pl.col('Atomic Symbol').last(), 
                          pl.col('Binding Energy per Nucleon').max()
                      ])
                     )

In [None]:
# chart1 with all data points
chart1 = (binding_energy.plot.point(
            x=alt.X('Mass Number',
                    scale=alt.Scale(type='log', domain=(1, 300)) # logarithmic scale (looks better)
                   ),
            y='Binding Energy per Nucleon',
            tooltip=[
                'Atomic Symbol', 
                'Mass Number', 
                alt.Tooltip('Binding Energy per Nucleon', format='.2f') # round values to 2 decimal places in tooltips
            ]
            )
          .properties(width=600, title='Binding Energy per Nucleon')
        )

#chart2 with only max for each mass number
chart2 = (binding_energy_max.plot.line(
            x=alt.X('Mass Number',
                    scale=alt.Scale(type='log', domain=(1, 300))
                   ),
            y=alt.Y('Binding Energy per Nucleon',
                   title='Binding Energy per Nucleon (MeV)'
                   ),
            tooltip=['Atomic Symbol', 'Mass Number', 'Binding Energy per Nucleon']
            )
        )

chart = ((chart1 + chart2)
           .configure_point(size=1).configure_line(color='red') # format points and line
       )

chart

In [None]:
binding_energy.filter(pl.col('Binding Energy per Nucleon') == pl.max('Binding Energy per Nucleon'))

The most stable isotope (isotope with highest binding energy per nucleon) is Ni-62.