In [27]:
import pandas as pd
import scipy.constants
from scipy.optimize import curve_fit
from scipy.integrate import quad
from sklearn.metrics import r2_score
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.dates as md
import matplotlib.transforms as transforms
from matplotlib.ticker import LogFormatter
import os
%matplotlib qt
# %matplotlib inline
import datetime as dt
pd.set_option('display.max_rows', 100) 

## Reading in the data and preparing DataFrames

#### MID

In [28]:
#read in the file and create dataframe
#excplicitly state the part
#path = r'C:\Users\einot\OneDrive\Documents\CERN VScode\CERN-Python\4K desorption measurements\Cu sample\EGA fully in\S1 no preinjection at cold temps'
#mid = pd.read_csv(os.path.join(path, "MID.tsv"), sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")

#List molecule masses used in MID scan

#masslist = [2,4,12,15,16,18,28,32,40,44]

#Define datetime format
#MID_datetime = "%Y/%m/%d %H:%M:%S.%f"

#Reformat time values
def timeformat(df,format):
    new_df = df.copy()
    for idx, col in enumerate(df):
        if idx % 2 == 0:  # Check if the column index is even (every other column)
            pandas_timestamp = pd.to_datetime(df[col], format=format)
            timestamp = pandas_timestamp.dt.strftime('%d-%m-%Y %H:%M:%S')
            new_df[col] = timestamp  # Replace the time column in the new dataframe
    return new_df

#call the function with correct parameters to modify MID dataframe
#mid = timeformat(mid,MID_datetime)

#### Vaclogger

In [29]:
""" #Reading in the file
vaclog = pd.read_csv(os.path.join(path, "vaclog"), sep="\t")

#Define original datetime format
vaclog_datetime = "%d/%m/%Y %H:%M:%S"
 """
#Adding an elapsed time column for temperature fits
def elapsed_time(df,timecol,format):
    new_df = df.copy()
    pandas_timestamp = pd.to_datetime(df[timecol],format=format)
    runtime = (pandas_timestamp-pandas_timestamp[0]).dt.total_seconds()
    insert_idx = df.columns.get_loc(timecol) + 1  # Get the index to insert the new column
    df.insert(insert_idx,"Elapsed time", runtime)
    reformat = pandas_timestamp.dt.strftime('%d-%m-%Y %H:%M:%S') #New timestamp format for plotting
    new_df[timecol] = reformat
    return new_df
""" 
#call the function with correct parameters to modify vaclogger dataframe
vaclog = elapsed_time(vaclog,"Time",vaclog_datetime) """

' \n#call the function with correct parameters to modify vaclogger dataframe\nvaclog = elapsed_time(vaclog,"Time",vaclog_datetime) '

#### HiVolta log

In [30]:
""" #Reading in the file
hv = pd.read_csv(os.path.join(path,"hv log"), sep=",")

#Define grid and emission current parameters in [A]
hv["hv_grid"]=[abs(element) * 1e-6 for element in hv["IMon1"]]
hv["I_em"]=[abs(element) * 1e-6 for element in hv["IMon2"]]

#Define datetime format for Hv log
hv["datetime"] = hv['Date'] + ' ' + hv['Time'] #combine date and time columns
hv_datetime = "%d/%m/%Y %H:%M:%S.%f"

#Call the 'elapsed time' function with hv parameters
hv = elapsed_time(hv,"datetime" ,hv_datetime)

hv.head() """

' #Reading in the file\nhv = pd.read_csv(os.path.join(path,"hv log"), sep=",")\n\n#Define grid and emission current parameters in [A]\nhv["hv_grid"]=[abs(element) * 1e-6 for element in hv["IMon1"]]\nhv["I_em"]=[abs(element) * 1e-6 for element in hv["IMon2"]]\n\n#Define datetime format for Hv log\nhv["datetime"] = hv[\'Date\'] + \' \' + hv[\'Time\'] #combine date and time columns\nhv_datetime = "%d/%m/%Y %H:%M:%S.%f"\n\n#Call the \'elapsed time\' function with hv parameters\nhv = elapsed_time(hv,"datetime" ,hv_datetime)\n\nhv.head() '

#### Function for reading in all data files for one measurement

In [31]:
def process_data(root):
    #Define datetime formats
    MID_datetime = "%Y/%m/%d %H:%M:%S.%f"
    vaclog_datetime = "%d/%m/%Y %H:%M:%S"
    hv_datetime = "%d/%m/%Y %H:%M:%S.%f"
    
    #Create an empty dictionary to store processed dataframes
    dataframes = {}

    # Iterate over the subdirectories starting from the specified directory
    for dirpath, dirs, files in os.walk(root):
        for filename in files:
            filepath = os.path.join(dirpath,filename)
            #print(filepath)
            # Create a variable name using the relative path
            relative_path = os.path.relpath(root, dirpath)
            #print(relative_path)
            variable_name = os.path.join(relative_path, os.path.splitext(filename)[0])
            #print(variable_name)
            try:
                #Read in the MID file
                if filename == "MID.tsv":
                    mid = pd.read_csv(filepath, sep="\t", skiprows=lambda x: x<=11, on_bad_lines="skip")
                    mid = timeformat(mid, MID_datetime)

                    # Create a variable name using the base name of the MID filename
                    variable_name = os.path.splitext(filename)[0]

                    # Store the MID dataframe using the variable name
                    dataframes[variable_name] = mid

                #Read in the vaclog file
                elif filename == "vaclog":
                    vaclog = pd.read_csv(filepath, sep="\t")
                    vaclog = elapsed_time(vaclog, "Time", vaclog_datetime)

                    # Create a variable name using the base name of the MID filename
                    variable_name = os.path.splitext(filename)[0]

                    # Store the vaclog dataframe using the variable name
                    dataframes[variable_name] = vaclog

                #Read in the hivolta file
                elif filename == "hv log":
                    hv = pd.read_csv(filepath, sep=",")
                    hv["hv_grid"] = [abs(element) * 1e-6 for element in hv["IMon1"]]
                    hv["I_em"] = [abs(element) * 1e-6 for element in hv["IMon2"]]
                    hv["datetime"] = hv['Date'] + ' ' + hv['Time']
                    hv = elapsed_time(hv, "datetime", hv_datetime)

                    # Create a variable name using the base name of the MID filename
                    variable_name = os.path.splitext(filename)[0]
                    
                    # Store the hv dataframe using the variable name
                    dataframes[variable_name] = hv   
            except Exception as e:
                print(f"Error processing file: {filepath}\nError message: {str(e)}")
            
            
    return dataframes




In [32]:
#Accessing the files
root = r'G:\Departments\TE\Groups\VSC\VSM\etiirinen\Python\4K desorption measurements\HiLumi sample\4K desorption yield with preinjection\dynamic vacuum'
processed_data = process_data(root)
# Access the processed dataframes using the variable names
#mid_df, vaclog_df, hv_df = processed_data['MID', 'vaclog', 'hv log']
#mid = processed_data['MID']
#vaclog = processed_data['vaclog']
#hv = processed_data['hv log']
#print(mid, vaclog, hv)
#for variable_name, dataframe in processed_data.items():
  #  print("series1 no desorption/vaclog")
  #  print("mid")
  #  print("-----------------")
print(process_data(root).items())
# Access the processed dataframes using the variable names
#folder_name = "series1 no desorption"
#relative_path = os.path.relpath(root, folder_name)
#variable_name = os.path.join(relative_path, "MID")

#mid_data = processed_data[variable_name]

  return Index(result, name=self.name)
  return Index(result, name=self.name)
  return Index(result, name=self.name)
  return Index(result, name=self.name)


dict_items([('hv log',             Date          Time  VMon1   VMon2  VMon3  VMon4  VMon5  VMon6  \
0     12/12/2022  17:26:24.335   0.04    0.06   0.04   0.10    0.0    0.0   
1     12/12/2022  17:26:25.524   0.04    0.06   0.04   0.10    0.0    0.0   
2     12/12/2022  17:26:26.708   0.04    0.06   0.04   0.10    0.0    0.0   
3     12/12/2022  17:26:27.894   0.04    0.06   0.04   0.10    0.0    0.0   
4     12/12/2022  17:26:29.079   0.04    0.06   0.04   0.10    0.0    0.0   
...          ...           ...    ...     ...    ...    ...    ...    ...   
3577  12/12/2022  18:37:48.635   0.04  100.10   0.22   0.20    0.0    0.0   
3578  12/12/2022  18:37:49.819   0.04  100.10   0.18   0.24    0.0    0.0   
3579  12/12/2022  18:37:51.009   0.04  100.10   0.18   0.18    0.0    0.0   
3580  12/12/2022  18:37:52.196   0.04  100.10   0.20   0.22    0.0    0.0   
3581  12/12/2022  18:37:53.376   0.04  100.10   0.22   0.24    0.0    0.0   

      VMon7  VMon8  ...   IMon4   IMon5   IMon6   IM

## Data processing

#### CernOx R-T conversion

In [33]:
#Temperature curve for CERNOX - for temp stability
A=[230.317302,-6170.1513,71837.9529,-477946.76,2.003668910085786e+6,-5.488690193047771e+6,9.830475663897528e+6,-1.111226817786569e+7,7.202477878914065e+6,-2.04194551328507e+6]

#specify fit parameters A, data (Resistance values)
def polyfit(params,data):
    total=[]
    for j in data: 
        exp=0
        for i in range(len(params)):
            exp += (params[i]/(math.log10(j))**i)
        total.append(10**exp)
    return(total)    


#### Comments for annotations

In [34]:
#print vaclog comments
print(pd.unique(vaclog["Live comments"]))

NameError: name 'vaclog' is not defined

In [None]:
#print hv comments
print(pd.unique(hv["Comment"]))

#delete excess comments (hv log program writes each comment 4x)

# Find the indices of the first occurrence of each unique event
first_unique_indices = hv.drop_duplicates(subset="Comment", keep="first").index
print(first_unique_indices)

#replace the comments in rows that are not the first occurrence of each unique event with NaN values
hv.loc[~hv.index.isin(first_unique_indices), "Comment"] = np.nan


[nan 'no emission' 'emission on' 'minimal emission current'
 'grid repelling' 'grid transparent' 'ega turned' 'grid repelling again'
 'no electron activity']
Int64Index([0, 17, 339, 652, 660, 776, 834, 1359, 1857], dtype='int64')


#### Electron dose calculations

Find:

$$
Q = \int_{t_1}^{t_2} I \,dt
$$

Where $t_2$ and $t_1$ are the timestamps at which an emission current reading was taken


#### Fitting

Fitting the mathematical model PSD (ESD) dose dependence according to Malyshev:


$$\eta(D) = \eta_0 \cdot \left(\frac{D+D_1}{D_0+D_1} \right)^{-a}$$

Where parameters $D_0$ and $D_1$ are added to extend the applicability towards low doses in a way that the curve asymptotically approaches the constant initial ESD yield $eta_0$ as $D \rarr 0$. $D_0$ represents the dose imparted at the lowest measurable data point and $D_1$ is used to position the end of the initial plateau. The exponent α determines the steepness, here referred to as the conditioning rate.

In [None]:
#Define function for ESD dose dependence
def esd_fit(eta_0,D,D_0,D_1,a):
    return eta_0*((D+D_1)/(D_0+D_1))^(-a)

#Use curve_fit for dependent variable D
D_0 = 0


## Plotting