In [None]:
!pip install matplotlib

# Code Example: MP501 - Retrieving numerical attenuation coefficients from NIST
In MP501, you will often use data from the National Institute of Standards and Technology [(NIST)](https://physics.nist.gov/PhysRefData/XrayMassCoef/tab3.html). This is problematic due to the fact the data is not easily stored for electronic use. Each attenuation coefficient table will have three columns:

\begin{array}{ccc}
Energy&μ/ρ&μ_{en}/ρ
\end{array}

And due to the way you select text, you can only extract individual or groups of rows as opposed to individual columns. This is an issue in that a lot of the time, you may want to specifically use Energy and $μ_{en}$ and we need a way to isolate just those parts of the data.

---

## Copying the table as a string
A work around for this is to copy it as a string, and individually go through each element, extracting specficially the values that are relevent to what we are doing. Defining the string using triple apostraphies (```''' string goes here '''```) will conserve the new lines for every row.

## Converting the string to a list
Using the string method ```.split()``` will seperate the string at each 'white space', or every space, tab, new line, et cetera, turning it into a massive 1 x
N list, where N is the total number of entries. This can be found in python by using the function ```len( my_list )```.

## Relationship between index and element
We can now iterate through this list, and extract elements based off of their indicies. We know the list has 3 columns, going left to right across these, the 0th index is Energy, the 1st index is μ/ρ, the 2nd index is $μ_{en}$, the 3rd index is back to Energy as follows:

\begin{array}{ccc}
Energy&μ/ρ&μ_{en}/ρ\\\
0&1&2\\\
3&4&5\\\
6&7&8\\\
9&10&11\\\
\end{array}

Following this logic, we see the indicies follow this pattern where _i_ starts from 0 and iterates to the number of rows (N / #Columns):
\begin{array}{c}
Energy\:Index = i * 3 \\
μ/ρ\:Index = i * 3 + 1 \\
μ_{en}/ρ\:Index = i * 3 + 2
\end{array}

Further more, we can iterate through the number of rows by using the function ```arange( N / #Columns)```

## Changing the type from string to float
The elements at this point are all stored as a individual strings within the list. Because every character in the string represents a numercial value, we can change the type from ```str``` to ```float``` by using the function ```float('string')```, returning the same number as a float.

## Appending each element to respective, organized lists
As we iterate through the original, master list and seperate it into groups, we can append the values directly to a predefined list through the list method ```.append( val )```.

## Converting from a type ```list``` to a type ```np.ndarray```
Finally we can convert the final list to a np.ndarray using the function ```np.array()``` from the numpy package.

---

## Applying:

Below, I have copied and pasted the ascii table for carbon:

In [None]:
NIST_carbon = """1.00000E-03  2.211E+03  2.209E+03
1.50000E-03  7.002E+02  6.990E+02
2.00000E-03  3.026E+02  3.016E+02
3.00000E-03  9.033E+01  8.963E+01
4.00000E-03  3.778E+01  3.723E+01
5.00000E-03  1.912E+01  1.866E+01
6.00000E-03  1.095E+01  1.054E+01
8.00000E-03  4.576E+00  4.242E+00
1.00000E-02  2.373E+00  2.078E+00
1.50000E-02  8.071E-01  5.627E-01
2.00000E-02  4.420E-01  2.238E-01
3.00000E-02  2.562E-01  6.614E-02
4.00000E-02  2.076E-01  3.343E-02
5.00000E-02  1.871E-01  2.397E-02
6.00000E-02  1.753E-01  2.098E-02
8.00000E-02  1.610E-01  2.037E-02
1.00000E-01  1.514E-01  2.147E-02
1.50000E-01  1.347E-01  2.449E-02
2.00000E-01  1.229E-01  2.655E-02
3.00000E-01  1.066E-01  2.870E-02
4.00000E-01  9.546E-02  2.950E-02
5.00000E-01  8.715E-02  2.969E-02
6.00000E-01  8.058E-02  2.956E-02
8.00000E-01  7.076E-02  2.885E-02
1.00000E+00  6.361E-02  2.792E-02
1.25000E+00  5.690E-02  2.669E-02
1.50000E+00  5.179E-02  2.551E-02
2.00000E+00  4.442E-02  2.345E-02
3.00000E+00  3.562E-02  2.048E-02
4.00000E+00  3.047E-02  1.849E-02
5.00000E+00  2.708E-02  1.710E-02
6.00000E+00  2.469E-02  1.607E-02
8.00000E+00  2.154E-02  1.468E-02
1.00000E+01  1.959E-02  1.380E-02
1.50000E+01  1.698E-02  1.258E-02
2.00000E+01  1.575E-02  1.198E-02
"""

Now we can write a for loop script to seperate the elements into grouped lists:

In [1]:
# Spliting the big string (above) into a list of smaller strings
data_list = NIST_carbon.split()

# Finding the number of rows in the data, knowing there are 3 columns:
num_rows = int(len(data_list) / 3)

# Defining empty lists to hold the respective groups
energy = []
atten = []
absorp = []

# Defining the for loop to iterate over the number of rows
for i in range(num_rows):
  # energy is found in indicies 0, 3, 6, 9 ...
  i_energy = i * 3
  # attenuation is found in indicies 1, 4, 7, 10 ...
  i_atten = (i * 3) + 1
  # absorption is found in indicies 2, 5, 8, 11 ...
  i_absorp = (i * 3) + 2

  # Pull the data based on their index
  str_energy = data_list[i_energy]
  str_atten = data_list[i_atten]
  str_absorp = data_list[i_absorp]

  # Convert the data to a float
  float_energy = float(str_energy)
  float_atten = float(str_atten)
  float_absorp = float(str_absorp)

  # append the data to their lists
  energy.append(float_energy)
  atten.append(float_atten)
  absorp.append(float_absorp)

NameError: name 'NIST_carbon' is not defined

In [None]:
# As a QA check:
print(f'Examining the energy values:')
print(f'Data type: {type(energy)}')
print(f'Length: {len(energy)}\n')

print(f'Examining the list of attenuation values:')
print(f'Data type: {type(atten)}')
print(f'Length: {len(atten)}\n')

print(f'Examining the list of absorption values:')
print(f'Data type: {type(absorp)}')
print(f'Length: {len(absorp)}\n')

In [None]:
# importing the matplotlib pyplot package as plt
import matplotlib.pyplot as plt

# Defining the plotting space for a single plot
fig, ax = plt.subplots(figsize=(5,4))

# Plotting the data
ax.scatter(x=energy, y=atten, label=r'$\rm\mu/\rho$')
ax.scatter(x=energy, y=absorp, label=r'$\rm\mu_{en}/\rho$')

# Tailoring the plot to our needs (Labeling)
ax.set_title(r'$\rm\mu/\rho\;vs\;Energy$ for carbon', fontsize=20)
ax.set_xlabel('Energy (MeV)', fontsize=16)
ax.set_ylabel(r'Coefficient ($\rm cm^2/g$)', fontsize=16)
ax.legend(fontsize=14)

# Tailoring the plot to our needs ()
ax.set_yscale('log')
ax.set_xscale('log')
ax.tick_params(labelsize=14)

plt.show()