In [73]:
import pandas as pd
import scipy.constants
from scipy.optimize import curve_fit
from scipy.integrate import quad
from sklearn.metrics import r2_score
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.dates as md
import matplotlib.transforms as transforms
from matplotlib.ticker import LogFormatter
import os
%matplotlib qt
# %matplotlib inline
import datetime as dt
pd.set_option('display.max_rows', 100) 

## Reading in the data and preparing DataFrames

#### MID

In [74]:
#read in the file and create dataframe
#excplicitly state the part
#path = r'C:\Users\einot\OneDrive\Documents\CERN VScode\CERN-Python\4K desorption measurements\Cu sample\EGA fully in\S1 no preinjection at cold temps'
#mid = pd.read_csv(os.path.join(path, "MID.tsv"), sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")

#List molecule masses used in MID scan

#masslist = [2,4,12,15,16,18,28,32,40,44]

#Define datetime format
#MID_datetime = "%Y/%m/%d %H:%M:%S.%f"

#Reformat time values
def timeformat(df,format):
    new_df = df.copy()
    for idx, col in enumerate(df):
        if idx % 2 == 0:  # Check if the column index is even (every other column)
            pandas_timestamp = pd.to_datetime(df[col], format=format)
            timestamp = pandas_timestamp.dt.strftime('%d-%m-%Y %H:%M:%S')
            new_df[col] = timestamp  # Replace the time column in the new dataframe
    return new_df

#call the function with correct parameters to modify MID dataframe
#mid = timeformat(mid,MID_datetime)

#### Vaclogger

In [75]:
""" #Reading in the file
vaclog = pd.read_csv(os.path.join(path, "vaclog"), sep="\t")

#Define original datetime format
vaclog_datetime = "%d/%m/%Y %H:%M:%S"
 """
#Adding an elapsed time column for temperature fits
def elapsed_time(df,timecol,format):
    new_df = df.copy()
    pandas_timestamp = pd.to_datetime(df[timecol],format=format)
    runtime = (pandas_timestamp-pandas_timestamp[0]).dt.total_seconds()
    insert_idx = df.columns.get_loc(timecol) + 1  # Get the index to insert the new column
    df.insert(insert_idx,"Elapsed time", runtime)
    reformat = pandas_timestamp.dt.strftime('%d-%m-%Y %H:%M:%S') #New timestamp format for plotting
    new_df[timecol] = reformat
    return new_df
""" 
#call the function with correct parameters to modify vaclogger dataframe
vaclog = elapsed_time(vaclog,"Time",vaclog_datetime) """

' \n#call the function with correct parameters to modify vaclogger dataframe\nvaclog = elapsed_time(vaclog,"Time",vaclog_datetime) '

#### HiVolta log

In [76]:
""" #Reading in the file
hv = pd.read_csv(os.path.join(path,"hv log"), sep=",")

#Define grid and emission current parameters in [A]
hv["hv_grid"]=[abs(element) * 1e-6 for element in hv["IMon1"]]
hv["I_em"]=[abs(element) * 1e-6 for element in hv["IMon2"]]

#Define datetime format for Hv log
hv["datetime"] = hv['Date'] + ' ' + hv['Time'] #combine date and time columns
hv_datetime = "%d/%m/%Y %H:%M:%S.%f"

#Call the 'elapsed time' function with hv parameters
hv = elapsed_time(hv,"datetime" ,hv_datetime)

hv.head() """

' #Reading in the file\nhv = pd.read_csv(os.path.join(path,"hv log"), sep=",")\n\n#Define grid and emission current parameters in [A]\nhv["hv_grid"]=[abs(element) * 1e-6 for element in hv["IMon1"]]\nhv["I_em"]=[abs(element) * 1e-6 for element in hv["IMon2"]]\n\n#Define datetime format for Hv log\nhv["datetime"] = hv[\'Date\'] + \' \' + hv[\'Time\'] #combine date and time columns\nhv_datetime = "%d/%m/%Y %H:%M:%S.%f"\n\n#Call the \'elapsed time\' function with hv parameters\nhv = elapsed_time(hv,"datetime" ,hv_datetime)\n\nhv.head() '

#### Function for reading in all data files for one measurement

In [77]:
""" def process_data(root):
    #Define datetime formats
    MID_datetime = "%Y/%m/%d %H:%M:%S.%f"
    vaclog_datetime = "%d/%m/%Y %H:%M:%S"
    hv_datetime = "%d/%m/%Y %H:%M:%S.%f"
    
    #Create an empty dictionary to store processed dataframes
    dataframes = {}

    # Iterate over the subdirectories starting from the specified directory
    for dirpath, dirs, files in os.walk(root):
        for filename in files:
            filepath = os.path.join(dirpath,filename)
            #print(filepath)
            # Create a variable name using the relative path
            relative_path = os.path.relpath(root, dirpath)
            #print(relative_path)
            variable_name = os.path.join(relative_path, os.path.splitext(filename)[0])
            #print(variable_name)
            #Read in the MID file
            if filename == "MID.tsv":
                mid = pd.read_csv(filepath, sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")
                mid = timeformat(mid, MID_datetime)

                # Create a variable name using the base name of the MID filename
                #variable_name = os.path.splitext(filename)[0]

                # Store the MID dataframe using the variable name
                dataframes[variable_name] = mid

            #Read in the vaclog file
            elif filename == "vaclog":
                vaclog = pd.read_csv(filepath, sep="\t")
                vaclog = elapsed_time(vaclog, "Time", vaclog_datetime)

                # Create a variable name using the base name of the MID filename
                #variable_name = os.path.splitext(filename)[0]

                # Store the vaclog dataframe using the variable name
                dataframes[variable_name] = vaclog

            #Read in the hivolta file
            elif filename == "hv log":
                hv = pd.read_csv(filepath, sep=",")
                hv["hv_grid"] = [abs(element) * 1e-6 for element in hv["IMon1"]]
                hv["I_em"] = [abs(element) * 1e-6 for element in hv["IMon2"]]
                hv["datetime"] = hv['Date'] + ' ' + hv['Time']
                hv = elapsed_time(hv, "datetime", hv_datetime)

                # Create a variable name using the base name of the MID filename
                #variable_name = os.path.splitext(filename)[0]
                
                # Store the hv dataframe using the variable name
                dataframes[variable_name] = hv   
            
    return dataframes


 """

' def process_data(root):\n    #Define datetime formats\n    MID_datetime = "%Y/%m/%d %H:%M:%S.%f"\n    vaclog_datetime = "%d/%m/%Y %H:%M:%S"\n    hv_datetime = "%d/%m/%Y %H:%M:%S.%f"\n    \n    #Create an empty dictionary to store processed dataframes\n    dataframes = {}\n\n    # Iterate over the subdirectories starting from the specified directory\n    for dirpath, dirs, files in os.walk(root):\n        for filename in files:\n            filepath = os.path.join(dirpath,filename)\n            #print(filepath)\n            # Create a variable name using the relative path\n            relative_path = os.path.relpath(root, dirpath)\n            #print(relative_path)\n            variable_name = os.path.join(relative_path, os.path.splitext(filename)[0])\n            #print(variable_name)\n            #Read in the MID file\n            if filename == "MID.tsv":\n                mid = pd.read_csv(filepath, sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")\n                mid = tim

In [78]:
def process_data(directory):
    # Define datetime formats
    MID_datetime = "%Y/%m/%d %H:%M:%S.%f"
    vaclog_datetime = "%d/%m/%Y %H:%M:%S"
    hv_datetime = "%d/%m/%Y %H:%M:%S.%f"

    # Create an empty dictionary to store the processed dataframes
    dataframes = {}
    
    # Create a variable name using the pathname + variable name
    pathname = os.path.basename(directory)

    # Iterate over the source files in the directory
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)  # Get the absolute file path

        if filename == "MID.tsv":
            mid = pd.read_csv(file_path, sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")
            mid = timeformat(mid, MID_datetime)

            # Create a variable name using the base name of the MID filename
            variable_name = f"{pathname}_{os.path.splitext(filename)[0]}"

            # Store the MID dataframe using the variable name
            dataframes[variable_name] = mid

        # Read in the vaclog file
        elif filename == "vaclog":
            vaclog = pd.read_csv(file_path, sep="\t")
            vaclog = elapsed_time(vaclog, "Time", vaclog_datetime)

            # Create a variable name using the base name of the MID filename
            variable_name = f"{pathname}_{os.path.splitext(filename)[0]}"

            # Store the vaclog dataframe using the variable name
            dataframes[variable_name] = vaclog
        
        elif filename == "hv log":
            hv = pd.read_csv(file_path, sep=",")
            hv["hv_grid"] = [abs(element) * 1e-6 for element in hv["IMon1"]]
            hv["I_em"] = [abs(element) * 1e-6 for element in hv["IMon2"]]
            hv["datetime"] = hv['Date'] + ' ' + hv['Time']
            hv = elapsed_time(hv, "datetime", hv_datetime)

            # Create a variable name using the base name of the MID filename
            variable_name = f"{pathname}_{os.path.splitext(filename)[0]}"

            # Store the hv dataframe using the variable name
            dataframes[variable_name] = hv   


    return dataframes


In [79]:
#Accessing the files
root = r'G:\Departments\TE\Groups\VSC\VSM\etiirinen\Python\4K desorption measurements\HiLumi sample\4K desorption yield with preinjection\dynamic vacuum\EGA fully in\series1 no desorption'
data = process_data(root)

# Access the processed dataframes using the variable names
for variable_name, dataframe in data.items():
    print(variable_name)
    print(dataframe)
    print("-----------------")


series1 no desorption_hv log
            Date          Time  VMon1  VMon2  VMon3  VMon4  VMon5  VMon6  \
0     13/12/2022  17:30:03.498   0.06   0.10   0.18   0.18    0.0    0.0   
1     13/12/2022  17:30:04.676   0.06   0.10   0.18   0.18    0.0    0.0   
2     13/12/2022  17:30:05.864   0.06   0.10   0.18   0.18    0.0    0.0   
3     13/12/2022  17:30:07.046   0.06   0.10   0.18   0.18    0.0    0.0   
4     13/12/2022  17:31:58.343   0.06   0.10   0.14   0.16    0.0    0.0   
...          ...           ...    ...    ...    ...    ...    ...    ...   
1872  13/12/2022  18:09:37.428   0.08   0.06   0.06   0.14    0.0    0.0   
1873  13/12/2022  18:09:38.600   0.08   0.06   0.06   0.12    0.0    0.0   
1874  13/12/2022  18:09:39.789   0.08   0.06   0.06   0.12    0.0    0.0   
1875  13/12/2022  18:09:40.969   0.08   0.06   0.06   0.12    0.0    0.0   
1876  13/12/2022  18:09:42.149   0.08   0.06   0.06   0.12    0.0    0.0   

      VMon7  VMon8  ...   IMon4   IMon5   IMon6   IMon7   

#### 

## Data processing

#### CernOx R-T conversion

In [80]:
#Temperature curve for CERNOX - for temp stability
A=[230.317302,-6170.1513,71837.9529,-477946.76,2.003668910085786e+6,-5.488690193047771e+6,9.830475663897528e+6,-1.111226817786569e+7,7.202477878914065e+6,-2.04194551328507e+6]

#specify fit parameters A, data (Resistance values)
def polyfit(params,data):
    total=[]
    for j in data: 
        exp=0
        for i in range(len(params)):
            exp += (params[i]/(math.log10(j))**i)
        total.append(10**exp)
    return(total)    


#### Comments for annotations

In [81]:
def comments(dataframe,colname):
    #print hv comments
    print(pd.unique(dataframe[colname]))

    #delete excess comments (hv log program writes each comment 4x)

    # Find the indices of the first occurrence of each unique event
    first_unique_indices = dataframe.drop_duplicates(subset=colname, keep="first").index
    print(first_unique_indices)

    #replace the comments in rows that are not the first occurrence of each unique event with NaN values
    dataframe.loc[~dataframe.index.isin(first_unique_indices), colname] = np.nan

    return dataframe

In [82]:
# testing
comments(data["series1 no desorption_hv log"],"Comment")

[nan 'no emission' 'emission on' 'minimal emission current'
 'grid repelling' 'grid transparent' 'ega turned' 'grid repelling again'
 'no electron activity']
Int64Index([0, 17, 339, 652, 660, 776, 834, 1359, 1857], dtype='int64')


Unnamed: 0,Date,Time,VMon1,VMon2,VMon3,VMon4,VMon5,VMon6,VMon7,VMon8,...,IMon4,IMon5,IMon6,IMon7,IMon8,Comment,Unnamed: 19,hv_grid,I_em,datetime
0,13/12/2022,17:30:03.498,0.06,0.10,0.18,0.18,0.0,0.0,0.0,0.02,...,0.0105,0.0167,-0.0142,-0.0258,-0.0100,,,1.000000e-10,6.200000e-09,13-12-2022 17:30:03
1,13/12/2022,17:30:04.676,0.06,0.10,0.18,0.18,0.0,0.0,0.0,0.02,...,0.0105,0.0164,-0.0142,-0.0258,-0.0100,,,1.000000e-10,6.200000e-09,13-12-2022 17:30:04
2,13/12/2022,17:30:05.864,0.06,0.10,0.18,0.18,0.0,0.0,0.0,0.02,...,0.0105,0.0101,-0.0142,-0.0258,-0.0100,,,1.000000e-10,6.200000e-09,13-12-2022 17:30:05
3,13/12/2022,17:30:07.046,0.06,0.10,0.18,0.18,0.0,0.0,0.0,0.02,...,0.0105,0.0171,-0.0142,-0.0258,-0.0100,,,1.000000e-10,6.200000e-09,13-12-2022 17:30:07
4,13/12/2022,17:31:58.343,0.06,0.10,0.14,0.16,0.0,0.0,0.0,0.02,...,0.0082,0.0124,-0.0143,-0.0262,-0.0099,,,2.000000e-10,5.700000e-09,13-12-2022 17:31:58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1872,13/12/2022,18:09:37.428,0.08,0.06,0.06,0.14,0.0,0.0,0.0,0.02,...,-0.0100,-0.0053,-0.0153,-0.0282,-0.0112,,,1.000000e-10,5.200000e-09,13-12-2022 18:09:37
1873,13/12/2022,18:09:38.600,0.08,0.06,0.06,0.12,0.0,0.0,0.0,0.02,...,-0.0097,-0.0053,-0.0153,-0.0282,-0.0112,,,1.000000e-10,4.800000e-09,13-12-2022 18:09:38
1874,13/12/2022,18:09:39.789,0.08,0.06,0.06,0.12,0.0,0.0,0.0,0.02,...,-0.0097,-0.0058,-0.0153,-0.0282,-0.0112,,,0.000000e+00,4.800000e-09,13-12-2022 18:09:39
1875,13/12/2022,18:09:40.969,0.08,0.06,0.06,0.12,0.0,0.0,0.0,0.02,...,-0.0097,-0.0058,-0.0153,-0.0282,-0.0114,,,3.000000e-10,4.800000e-09,13-12-2022 18:09:40


#### Electron dose calculations

Find:

$$
Q = \int_{t_1}^{t_2} I \,dt
$$

Where $t_2$ and $t_1$ are the timestamps at which an emission current reading was taken


In [83]:
""" def electron_dose(hv):
    #define the n and n-1
    t_delta = hv["Elapsed time"].diff(periods=1).fillna(0)

    #dealing with missing datapoints and false integration times
    #Compare t_delta and I_em values: if I_em values between t_delta indexes n, n-1 are negligible, ignore this time period in integration





    return hv """

' def electron_dose(hv):\n    #define the n and n-1\n    t_delta = hv["Elapsed time"].diff(periods=1).fillna(0)\n\n    #dealing with missing datapoints and false integration times\n    #Compare t_delta and I_em values: if I_em values between t_delta indexes n, n-1 are negligible, ignore this time period in integration\n\n\n\n\n\n    return hv '

#### Simpson's rule for integration

$$ 
I_{Simps} = \frac{h}{3}\left(y_0+2 \sum_{i=1}^{n/2-1} y_{2i}+4 \sum_{i=1}^{n/2} y_{2i-1+y_n}\right)
$$

In [86]:
def electron_dose(df, threshold):
    # Find the indices where the emission current is above the threshold
    mask_above_threshold = df['I_em'] > threshold
    print()
    # Initialize variables
    Q = 0
    start_idx = None
    end_idx = None

    for idx, row in df.iterrows():
        print(mask_above_threshold[idx])
        if mask_above_threshold[idx]:
            if start_idx is None:
                start_idx = idx
            end_idx = idx
        else:
            if start_idx is not None:
                end_idx = idx - 1

                # Apply Simpson's rule to the subset
                subset = df.loc[start_idx:end_idx]
                subset_size = len(subset)
                h = subset['t'].diff().mean()

                # Ensure the subset has at least 4 data points for Simpson's rule
                if subset_size >= 4:
                    even_sum = subset['I_em'].iloc[2:subset_size-2:2].sum()
                    odd_sum = subset['I_em'].iloc[1:subset_size-1:2].sum()
                    integral = (subset['I_em'].iloc[0] + subset['I_em'].iloc[subset_size-1] + 4 * odd_sum + 2 * even_sum) * (h / 3)

                    Q += integral

                start_idx = None
                end_idx = None
    return Q


# Example usage
data = {'t': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
        'I_em': [200, 600, 700, 300, 800, 900, 400, 100, 50, 700, 800, 200, 500, 300, 900, 100, 200, 600, 700, 300]}
df = pd.DataFrame(data)

threshold = 500

Q = electron_dose(df, threshold) #Threshold for ESD - 500nA
print("Integration result:", electron_dose(df, threshold))



False
True
True
False
True
True
False
False
False
True
True
False
False
False
True
False
False
True
True
False

False
True
True
False
True
True
False
False
False
True
True
False
False
False
True
False
False
True
True
False
Integration result: 0


#### Fitting

Fitting the mathematical model PSD (ESD) dose dependence according to Malyshev:


$$\eta(D) = \eta_0 \cdot \left(\frac{D+D_1}{D_0+D_1} \right)^{-a}$$

Where parameters $D_0$ and $D_1$ are added to extend the applicability towards low doses in a way that the curve asymptotically approaches the constant initial ESD yield $eta_0$ as $D \rarr 0$. $D_0$ represents the dose imparted at the lowest measurable data point and $D_1$ is used to position the end of the initial plateau. The exponent α determines the steepness, here referred to as the conditioning rate.

In [85]:
#Define function for ESD dose dependence
def esd_fit(eta_0,D,D_0,D_1,a):
    return eta_0*((D+D_1)/(D_0+D_1))^(-a)

#Use curve_fit for dependent variable D
D_0 = 0


## Plotting