In [25]:
import pandas as pd
import scipy.constants
from scipy.optimize import curve_fit
from scipy.integrate import quad
from sklearn.metrics import r2_score
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.dates as md
import matplotlib.transforms as transforms
from matplotlib.ticker import LogFormatter
import os
import fnmatch
from pyxll import xl_func
%matplotlib qt
# %matplotlib inline
import datetime as dt
pd.set_option('display.max_rows', 100) 

## Reading in the data and preparing DataFrames

#### MID

In [26]:
#read in the file and create dataframe
#excplicitly state the part
#path = r'C:\Users\einot\OneDrive\Documents\CERN VScode\CERN-Python\4K desorption measurements\Cu sample\EGA fully in\S1 no preinjection at cold temps'
#mid = pd.read_csv(os.path.join(path, "MID.tsv"), sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")

#List molecule masses used in MID scan

#masslist = [2,4,12,15,16,18,28,32,40,44]

#Define datetime format
#MID_datetime = "%Y/%m/%d %H:%M:%S.%f"

#Reformat time values
def timeformat(df,format):
    new_df = df.copy()
    for idx, col in enumerate(df):
        if idx % 2 == 0:  # Check if the column index is even (every other column)
            pandas_timestamp = pd.to_datetime(df[col], format=format)
            timestamp = pandas_timestamp.dt.strftime('%d-%m-%Y %H:%M:%S')
            new_df[col] = timestamp  # Replace the time column in the new dataframe
    return new_df

#call the function with correct parameters to modify MID dataframe
#mid = timeformat(mid,MID_datetime)

#### Vaclogger

In [27]:
""" #Reading in the file
vaclog = pd.read_csv(os.path.join(path, "vaclog"), sep="\t")

#Define original datetime format
vaclog_datetime = "%d/%m/%Y %H:%M:%S"
 """
#Adding an elapsed time column for temperature fits
def elapsed_time(df,timecol,format):
    new_df = df.copy()
    pandas_timestamp = pd.to_datetime(new_df[timecol],format=format)
    runtime = (pandas_timestamp-pandas_timestamp[0]).dt.total_seconds()
    insert_idx = df.columns.get_loc(timecol) + 1  # Get the index to insert the new column
    new_df.insert(insert_idx,"Elapsed time", runtime)
    reformat = pandas_timestamp.dt.strftime('%d-%m-%Y %H:%M:%S') #New timestamp format for plotting
    new_df[timecol] = reformat
    new_df[timecol] = pd.to_datetime(new_df[timecol],format='%d-%m-%Y %H:%M:%S') #convert to pd datetime
    return new_df
""" 
#call the function with correct parameters to modify vaclogger dataframe
vaclog = elapsed_time(vaclog,"Time",vaclog_datetime) """

' \n#call the function with correct parameters to modify vaclogger dataframe\nvaclog = elapsed_time(vaclog,"Time",vaclog_datetime) '

In [28]:
def process_data(root):
    #Define datetime formats
    MID_datetime = "%Y/%m/%d %H:%M:%S.%f"
    vaclog_datetime = "%d/%m/%Y %H:%M:%S"
    hv_datetime = "%d/%m/%Y %H:%M:%S.%f"
    
    #Create an empty dictionary to store processed dataframes
    dataframes = {}

    # Iterate over the subdirectories starting from the specified directory
    for dirpath, dirs, files in os.walk(root):
        for filename in files:
            filepath = os.path.join(dirpath,filename)
            #print(filepath)
            # Create a variable name using the relative path
            relative_path = os.path.relpath(root, dirpath)
            #print(relative_path)
            variable_name = os.path.join(relative_path, os.path.splitext(filename)[0])
            #print(variable_name)
            #Read in the MID file
            if fnmatch.fnmatch(filename,"*MID*.tsv"):
                mid = pd.read_csv(filepath, sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")
                mid = timeformat(mid, MID_datetime)

                # Create a variable name using the base name of the MID filename
                #variable_name = os.path.splitext(filename)[0]

                # Store the MID dataframe using the variable name
                dataframes[variable_name] = mid

            #Read in the vaclog file
            elif fnmatch.fnmatch(filename,"recycling*.csv"):
                vaclog = elapsed_time(pd.read_csv(filepath, sep="\t",header= 0), "Time", vaclog_datetime)                
                # Create a variable name using the base name of the MID filename
                #variable_name = os.path.splitext(filename)[0]

                # Store the vaclog dataframe using the variable name
                dataframes[variable_name] = vaclog

            #Read in the hivolta file
            elif fnmatch.fnmatch(filename,"*hv log*"):
                hv = pd.read_csv(filepath, sep=",")
                hv["hv_grid"] = [abs(element) * 1e-6 for element in hv["IMon1"]]
                hv["I_em"] = [abs(element) * 1e-6 for element in hv["IMon2"]]
                hv["datetime"] = hv['Date'] + ' ' + hv['Time']
                hv = elapsed_time(hv, "datetime", hv_datetime)

                # Create a variable name using the base name of the MID filename
                #variable_name = os.path.splitext(filename)[0]
                
                # Store the hv dataframe using the variable name
                dataframes[variable_name] = hv   
            
    return dataframes




## Data processing

In [29]:
#Accessing the files
root = r"C:\Users\etiirine\cernbox\Documents\etiirinen\Python\4K desorption measurements\Elena's data"
data = process_data(root)

# Access the processed dataframes using the variable names
for variable_name, dataframe in data.items():
    print(variable_name)
    print(dataframe)
    print("-----------------")

.\recycling-and-warm-up-10-03-2020
      Live comments                Time  Elapsed time          DUAL  \
0               NaN 2020-03-10 11:47:16           0.0  1.010000e-09   
1               NaN 2020-03-10 11:47:25           9.0  1.010000e-09   
2               NaN 2020-03-10 11:47:35          19.0  1.010000e-09   
3               NaN 2020-03-10 11:47:45          29.0  1.010000e-09   
4               NaN 2020-03-10 11:47:55          39.0  1.010000e-09   
...             ...                 ...           ...           ...   
29838           NaN 2020-03-13 08:44:21      248225.0  1.740000e-08   
29839           NaN 2020-03-13 08:44:30      248234.0  1.920000e-08   
29840           NaN 2020-03-13 08:44:40      248244.0  1.620000e-08   
29841           NaN 2020-03-13 08:44:50      248254.0  1.640000e-08   
29842           NaN 2020-03-13 08:45:00      248264.0  1.590000e-08   

           Barion_2      Barion_1        DUAL.1  helium  I_emission  I_grid  \
0      1.720000e-09  6.880000e-10

#### CernOx R-T conversion 

In [30]:
def tempconvert(df):
    new_df = df.copy()
    #Temperatzure curve for CERNOX - for temp stability
    A=[230.317302,-6170.1513,71837.9529,-477946.76,2.003668910085786e+6,-5.488690193047771e+6,9.830475663897528e+6,-1.111226817786569e+7,7.202477878914065e+6,-2.04194551328507e+6]

    #specify fit parameters A, data (Resistance values)
    def polyfit(param,data):
        total=[]
        for j in data: 
            exp=0
            for i in range(len(param)):
                exp += (param[i]/(math.log10(j))**i)
            total.append(10**exp)
        return(total)
    col_loc = int(df.columns.get_loc("T-CERNOX") + 1)    
    new_df.insert(col_loc, "CernOx Temp", polyfit(A, new_df["T-CERNOX"]))
    return new_df


#### Coefficients for gauge readings
Applied for the gauge readings using the formula below:
    $$
    \frac{p_2}{p_1}=\sqrt \frac{T_2}{T_1}
    $$
Here $p_2$ and $T_2$ are the pressure and temperature in the cold part and $p_1$, $T_1$ are the pressure, temperature the gauge is exposed to.

In [31]:
#Thermal transpiration
T2 = 4.2
T1 = 293
p_coef = 1#np.sqrt(T1/T2)
print(p_coef)
#N2 to H2 equivalent conversion
CF_h2 = 2.49
def gauge_correction(df):
    new_df = df.copy()
    #apply correction factors
    ba1 = df["Barion_1"]*CF_h2
    ba2 = df["Barion_2"]*CF_h2
    new_df["Barion_1"] = ba1
    new_df["Barion_2"] = ba2
    new_df.rename(columns={"Barion_1": "Barion_1 corr", "Barion_2": "Barion_2 corr"},inplace=True)
    return new_df

1


#### Comments for annotations

In [32]:
def comments(dataframe,timecol,commentcol):
    #print hv comments
    #print(pd.unique(dataframe[colname]))
    new_dataframe = dataframe.loc[:,[timecol,commentcol]]
    #delete excess comments (hv log program writes each comment 4x)

    # Find the indices of the first occurrence of each unique event
    first_unique_indices = new_dataframe.drop_duplicates(subset=commentcol, keep="first").index

    #replace the comments in rows that are not the first occurrence of each unique event with NaN values
    new_dataframe.loc[~new_dataframe.index.isin(first_unique_indices), commentcol] = np.nan

    #drop NaN values
    new_dataframe.dropna(inplace=True)
    print(new_dataframe.index)
    return new_dataframe

### Primary desorption yield

$$
\eta_{app}=\frac{C_{H_2}\cdot \Delta(p_{2} - {p}_{1}) \cdot q_e}{k_B\cdot T \cdot I_{EM}}\\
    \eta_{app} = \frac{C_{H_2}\cdot \bigl[\left(p_{2.EM}-p_{2.base}\right)-\left(p_{1.EM}-p_{1.base}\right)\bigr] \cdot q_e}{k_B\cdot T \cdot I_{EM}}
$$

In [33]:
#Subset data depending on emission condition
def emission_data(df, comment, select='before'):
    new_df = df.copy()
    cutoff = new_df.loc[new_df["Live comments"] == f"{comment}"].index[0]  # Get the index of the first occurrence of the comment

    if select == 'before':
        # Subset the DataFrame from the beginning until the cutoff
        new_df = new_df.iloc[:cutoff+1]
        after_cutoff = False
    elif select == 'after':
        # Subset the DataFrame starting from the cutoff until the end
        new_df = new_df.iloc[cutoff:]
        after_cutoff = True
    else:
        raise ValueError("Invalid value for 'select' parameter. Use 'before' or 'after'.")

    return new_df



In [34]:
#Constants for ESD yield
c_chimney = 65.77 / 1000 #l to m3
c_orif = 37.91 #H2 equiv
c_tot=c_orif/1000 #l to m3
#Calculate the desorption yield using the formula
def eta_prime(no_em,emission):

    # Remove background from outgassing
    # For this, calculate the flux going into the sample through the chimney conductance, assuming negligible downstream pressure
    pV_bck = c_chimney * np.mean(no_em["Barion_2 corr"])*100#mbar to Pa
    # Calculate delta p
    deltap_ba1 = (emission["Barion_1 corr"]-np.mean(no_em["Barion_1 corr"]))*100#mbar to Pa
    #print(*(emission["Barion_1 corr"]-(no_em["Barion_1 corr"]))*100)
    deltap_ba2 = (emission["Barion_2 corr"]-np.mean(no_em["Barion_2 corr"]))*100#mbar to Pa
    eta = ((c_tot*(deltap_ba2-deltap_ba1))-pV_bck)/(scipy.constants.Boltzmann*T1)*(scipy.constants.elementary_charge/emission["I_emission"])
    print((pV_bck-(c_tot*(deltap_ba2-deltap_ba1)))/(scipy.constants.Boltzmann*T1))
    # Filter out negative and inf values from eta
    eta_filtered = eta#[(eta >= 0) & (eta <= 0.3) & np.isfinite(eta)]
    
    # Create a new DataFrame with filtered eta and corresponding Elapsed time
    emission_filtered = pd.DataFrame({
        "Elapsed time": emission["Elapsed time"].values[:len(eta_filtered)],
        "eta_filtered": eta_filtered
    })
    print(emission["Elapsed time"].values[:len(eta_filtered)])
    return emission_filtered


## Calling the data analysis functions and writing to file

#### Defining variables to the called functions

In [35]:
# Defining a variable for original measurement data
vaclog1 = data[".\\recycling-and-warm-up-10-03-2020"]
vaclog2 = data[".\\recycling-measurement-10-03-2020"]


In [36]:
#Write to excel
%xl_set vaclog1 --cell Sheet1!A2

UsageError: Line magic function `%xl_set` not found.


In [37]:
#Cut values before injection start

def rows_from_started_comment(df, start_comment):
    # Find the index where the comment "started injection" is first found
    start_index = dataframe.loc[df["Live comments"] == start_comment].index.min()

    # Extract all rows from the start_index to the end of the DataFrame
    extracted_rows = df.loc[start_index:]

    return extracted_rows


In [38]:
#Calculate Number of molecules and coverage, CernOx temperature, apply gauge correction
vaclog_new1 = (gauge_correction(tempconvert(vaclog1)))
vaclog_new2 = (gauge_correction(tempconvert(vaclog2)))

#Pre and post-em data
pre_em = emission_data(vaclog_new2, "ega on", select="before")
em = emission_data(vaclog_new2, "ega on", select="after")

#calculate apparent primary desorption yields:
eta = eta_prime(pre_em,em)
print(eta)
#Comments
vaclog_comments1 = comments(vaclog1,"Time", "Live comments")
vaclog_comments2 = comments(vaclog2,"Time", "Live comments")
    

359      3.849441e+12
360      3.665096e+12
361      3.667430e+12
362      3.597426e+12
363      3.562424e+12
             ...     
29838   -3.987979e+13
29839   -3.969311e+13
29840   -3.969311e+13
29841   -3.976311e+13
29842   -3.969311e+13
Length: 29484, dtype: float64
359      3.340643e+13
360      2.846815e+13
361      2.711062e+13
362      4.042064e+13
363      4.078077e+13
             ...     
29838    0.000000e+00
29839    0.000000e+00
29840    0.000000e+00
29841    0.000000e+00
29842    0.000000e+00
Name: I_emission, Length: 29484, dtype: float64
[  3517.   3525.   3533. ... 248244. 248254. 248264.]
       Elapsed time  eta_filtered
359          3517.0     -0.115231
360          3525.0     -0.128744
361          3533.0     -0.135277
362          3541.0     -0.089000
363          3549.0     -0.087355
...             ...           ...
29838      248225.0           inf
29839      248234.0           inf
29840      248244.0           inf
29841      248254.0           inf
29842     

In [39]:
# Write em, eta to file
%xl_set em --cell Sheet1!O2
# Write pre-em to file
%xl_set pre_em --cell Sheet1!AE2
# Write eta
%xl_set eta --cell Sheet1!AT2

UsageError: Line magic function `%xl_set` not found.


## Electron dose


Find:

$$
Q = \int_{t_1}^{t_2} I \,dt
$$

Where $t_2$ and $t_1$ are the timestamps at which an emission current reading was taken


In [40]:
def e_dose(df):
    i = df["I_emission"] 
    t = df["Elapsed time"].diff(periods=1).fillna(0)
    integrand = i*t
    integral = np.sum(integrand)/400 #divide by emission area 400 mm2
    print(integral)
    cumulative_charge = np.cumsum(integrand)/400 #divide by emission area 400 mm2
    return cumulative_charge



In [41]:
electron_charge = e_dose(em)
# Create a new DataFrame with filtered eta and corresponding Elapsed time
e_dose_results = pd.DataFrame({
    "Elapsed time": em["Elapsed time"].values,
    "Charge emitted per mm2": electron_charge})
# Write to file
%xl_set e_dose_results --cell Sheet1!BE2

0.00198598738152005


UsageError: Line magic function `%xl_set` not found.


In [42]:
#### Writing to excel file
def write_to_excel(filename):
    #Write to excel file 1
    writer = pd.ExcelWriter(os.path.join(os.getcwd(),filename),engine="xlsxwriter")
    vaclog1.to_excel(writer,sheet_name='recycling-and-warm-up-10-03-2020')
    vaclog_new1.to_excel(writer, sheet_name='Analysed data recycling-and-warm-up-10-03-2020')
    #file 2
    vaclog2.to_excel(writer,sheet_name='recycling-measurement-10-03-2020')
    vaclog_new2.to_excel(writer, sheet_name='Analysed data recycling-measurement-10-03-2020')

    writer.save()
    return



## Plotting

In [43]:
#define parameters for plotting
#annotation params
font = dict(size = "x-small", color ="green", style ="italic",rotation="vertical")

plt.rcParams["figure.figsize"] = [8,5]
plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.grid"] = True
plt.rcParams["grid.color"] = "black"
plt.rcParams["grid.linewidth"] = 0.35

formatter = md.DateFormatter('%H:%M')

# Create the "graphs" folder path
graphs_folder = os.path.join(root, 'graphs')

# Ensure the "graphs" folder exists, if not create it
if not os.path.exists(graphs_folder):
    os.makedirs(graphs_folder)
    

In [44]:
#Plotting the lumped injection adsorption isotherms up to 1e16 M/cm2
fig, ax = plt.subplots()

#file1 
ax.plot(vaclog_new1["Time"],vaclog_new1["Barion_1 corr"],marker=".", markersize=5,label='BA1 h2 equiv')
ax.plot(vaclog_new1["Time"],vaclog_new1["Barion_2 corr"],marker=".", markersize=5,label='BA2 h2 equiv')

ax.legend(loc="lower right")
ax.set_xlabel('Timestamp')
ax.set_ylabel('Pressure (mbar)')
ax.set_yscale('log')
plt.title("recycling-and-warm-up-10-03-2020 barions")

ax.xaxis.set_major_formatter(formatter)
ax.xaxis.set_major_locator(plt.MaxNLocator(12))
for label in ax.get_xticklabels(which='major'):
    label.set(rotation=30, horizontalalignment='right')    
     
#Writing comments as plot annotations
trans  = transforms.blended_transform_factory(
    ax.transData, ax.transAxes)
for i in vaclog_comments1.index:
    plt.text(vaclog_comments1["Time"][i],0.8,str(vaclog_comments1["Live comments"][i]),fontdict=font, transform=trans)      

plt.savefig(os.path.join(graphs_folder, 'recycling-and-warm-up-10-03-2020 barions.png'), dpi=300, bbox_inches='tight')
plt.show()

In [45]:
# same graph, cut values
fig, ax = plt.subplots()
#file2
ax.plot(vaclog_new2["Elapsed time"]/3600,vaclog_new2["Barion_1 corr"],marker=".", markersize=5,label='BA1 h2 equiv')
ax.plot(vaclog_new2["Elapsed time"]/3600,vaclog_new2["Barion_2 corr"],marker=".", markersize=5,label='BA2 h2 equiv')

ax.legend(loc="lower right")
ax.set_xlabel('Elapsed time (h)')
ax.set_ylabel('Pressure (mbar)')
ax.set_yscale('log')
ax.set_xlim(0, 22)

plt.title("recycling-measurement-10-03-2020 barions")
plt.savefig(os.path.join(graphs_folder, 'recycling-measurement-10-03-2020 barions.png'), bbox_inches='tight')
plt.show()

In [52]:
#Plotting apparent primary desorption yield vs time
fig, ax = plt.subplots()
#file2
ax.plot(electron_charge,eta["eta_filtered"],marker=".", markersize=5,label='eta_prime')
ax.set_xlim(0, 0.001)
ax.set_ylim(bottom=0, top=0.3)
ax.legend(loc="upper right")
ax.set_xlabel('C/mm2')
ax.set_ylabel('Desorption yield (molecules/electron)')
ax.set_yscale('linear')

""" charge to dose for plotting
def charge_to_dose(val):
    return val/scipy.constants.elementary_charge

#inverse function of dose -> charge for plotting
def dose_to_charge(val):
    return scipy.constants.elementary_charge/val

secax=ax.secondary_xaxis("top", functions=(charge_to_dose(electron_charge),dose_to_charge))
secax.set_xlabel('electrons/mm^2') """
plt.title("E. Bez - apparent primary desorption yield vs charge")
plt.savefig(os.path.join(graphs_folder, 'E. Bez apparent primary desorption yield vs dose_recycling-measurement-10-03-2020 barions.png'), bbox_inches='tight')
plt.ticklabel_format(axis="x",style="sci")
plt.show()

In [47]:
#Plotting apparent primary desorption yield vs time up to 22h
fig, ax = plt.subplots()
#file2
ax.plot(eta["Elapsed time"]/3600,eta["eta_filtered"],marker=".", markersize=5,label='eta_prime')
ax.set_xlim(0, 22)

#ax.set_ylim(bottom=0,top=0.02)
ax.legend(loc="upper right")
ax.set_xlabel('Elapsed time (h)')
ax.set_ylabel('Desorption yield (molecules/electron)')
ax.set_yscale('linear')
plt.title("recycling-measurement-10-03-2020 barions - apparent primary desorption yield vs time 0-22h")
plt.savefig(os.path.join(graphs_folder, '0-22 h apparent primary desorption yield vs time_recycling-measurement-10-03-2020 barions.png'), bbox_inches='tight')
plt.show()

In [48]:
#emission stability
fig, ax = plt.subplots()
#file2
ax.plot(em["Elapsed time"]/3600,em["I_emission"],marker=".", markersize=5,label='eta_prime')
ax.set_xlim(0, 60)
#ax.set_ylim(bottom=0,top=0.02)
ax.legend(loc="upper right")
ax.set_xlabel('Elapsed time (h)')
ax.set_ylabel('Emission current (A)')
ax.set_yscale('log')

plt.title("recycling-measurement-10-03-2020 emission current")
plt.savefig(os.path.join(graphs_folder, 'emission current recycling-measurement-10-03-2020 barions.png'), bbox_inches='tight')
plt.show()