In [1]:
import pandas as pd
import numpy as np
from scipy.interpolate import UnivariateSpline


In [2]:
xlsx = pd.ExcelFile('TFSCD10.1.xlsx', engine='openpyxl')

# Get all sheet names
sheet_names = xlsx.sheet_names
profile_sheets = [name for name in sheet_names if name.startswith("Profile")]

profiles = {sheet: xlsx.parse(sheet) for sheet in profile_sheets}

for name in profiles:
    df = profiles[name]
    df = df.iloc[1:, 1:]  # skip first row and first column
    df.reset_index(drop=True, inplace=True)  
    df.columns.values[0] = "Distance"
    df.columns.values[1] = "Height"
    spline = UnivariateSpline(df['Distance'], df['Height'], s=.001)  # s is smoothing factor
    y_smooth = spline(df['Distance'])
    df['Smooth'] = y_smooth
    df['Derivative'] = np.gradient(y_smooth, df['Distance'])
    df['Derivative2'] = df['Derivative']**2
    df["Baseline"] = df["Derivative2"].apply(lambda x: x if x >= 0.1 else np.nan)

    profiles[name] = df  

for name, df in profiles.items():
    print(f"--- {name} ---")
    print(df.head(), "\n")


--- Profile 1 ---
   Distance    Height    Smooth  Derivative  Derivative2  Baseline
0         0  0.384033  0.383572    0.041135     0.001692       NaN
1  0.006393  0.384277  0.383835    0.050044     0.002504       NaN
2  0.012785  0.384583  0.384212    0.067439     0.004548       NaN
3  0.019179   0.38501  0.384697    0.083991     0.007054       NaN
4  0.025571  0.385559  0.385286    0.099696     0.009939       NaN 

--- Profile 2 ---
   Distance    Height    Smooth  Derivative  Derivative2  Baseline
0         0  0.360291  0.361026    0.041424     0.001716       NaN
1  0.006393   0.36084  0.361291    0.053062     0.002816       NaN
2  0.012785  0.361511  0.361704    0.075864     0.005755       NaN
3  0.019178  0.362244  0.362260    0.097723     0.009550       NaN
4  0.025571  0.363037  0.362954    0.118641     0.014076       NaN 

--- Profile 3 ---
   Distance    Height    Smooth  Derivative   Derivative2  Baseline
0         0  0.434814  0.436114   -0.042467  1.803407e-03       NaN
1 

In [3]:
for name, df in profiles.items():
    print(f"--- {name} ---")
    print(df.head(), "\n")

--- Profile 1 ---
   Distance    Height    Smooth  Derivative  Derivative2  Baseline
0         0  0.384033  0.383572    0.041135     0.001692       NaN
1  0.006393  0.384277  0.383835    0.050044     0.002504       NaN
2  0.012785  0.384583  0.384212    0.067439     0.004548       NaN
3  0.019179   0.38501  0.384697    0.083991     0.007054       NaN
4  0.025571  0.385559  0.385286    0.099696     0.009939       NaN 

--- Profile 2 ---
   Distance    Height    Smooth  Derivative  Derivative2  Baseline
0         0  0.360291  0.361026    0.041424     0.001716       NaN
1  0.006393   0.36084  0.361291    0.053062     0.002816       NaN
2  0.012785  0.361511  0.361704    0.075864     0.005755       NaN
3  0.019178  0.362244  0.362260    0.097723     0.009550       NaN
4  0.025571  0.363037  0.362954    0.118641     0.014076       NaN 

--- Profile 3 ---
   Distance    Height    Smooth  Derivative   Derivative2  Baseline
0         0  0.434814  0.436114   -0.042467  1.803407e-03       NaN
1 

In [4]:
for name, df in profiles.items():
    spline = UnivariateSpline(df['Distance'], df['Height'], s=.001)  # s is smoothing factor
    y_smooth = spline(df['Distance'])
    df['Smooth'] = y_smooth
    plt.figure(figsize=(8, 5))
    plt.plot(df['Distance'], df['Height'], 'o', label='Original')
    plt.plot(df['Distance'], y_smooth, '-', label='Smoothed')
    plt.title(f"{name} - Smoothed Curve")
    plt.xlabel("Distance")
    plt.ylabel("Height")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()
    


NameError: name 'plt' is not defined

In [None]:
df = profiles['Profile 1']
plt.figure(figsize=(8, 5))
plt.plot(df['Distance'], df['Height'], 'o', label='Original')
plt.plot(df['Distance'], df['Derivative2'], '-', label='Derivative^2')
plt.plot(df['Distance'], df['Baseline'], '-', label='Baseline')
plt.plot(df['Distance'][peaks], df['Baseline'][peaks], "x", label="Peaks", markersize=8, color='red')
plt.title(f"{name} - Smoothed Curve")
plt.xlabel("Distance")
plt.ylabel("Height")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [41]:
class File:
    def __init__(self, filename):
        self.filename = filename

    def load(self):
        from pandas import ExcelFile
        xlsx = ExcelFile(self.filename, engine='openpyxl')

        self.profile_names = [
            name for name in xlsx.sheet_names if name.startswith("Profile")]
        data = {sheet: xlsx.parse(sheet) for sheet in self.profile_names}
        for name in data:
            df = data[name]
            df = df.iloc[1:, 1:]
            df.reset_index(drop=True, inplace=True)
            df.columns.values[0] = "Distance"
            df.columns.values[1] = "Height"

            from scipy.interpolate import UnivariateSpline
            spline = UnivariateSpline(df['Distance'], df['Height'], s=.001)
            df['Smooth'] = spline(df['Distance'])

            from numpy import gradient
            df['Derivative'] = gradient(df['Smooth'], df['Distance'])
            df['Derivative2'] = df['Derivative']**2
            df["Baseline"] = df["Derivative2"].apply(
                lambda x: x if x >= 0.1 else 0)
            data[name] = df
        self.data = data

    def findPeaks(self):      
        from scipy.signal import find_peaks  
        peaks_dict = {}
        for name in self.data:
            df = self.data[name]
            peaks, _ = find_peaks(df['Baseline'])
            peaks_df = df.iloc[peaks].copy()
            peaks_dict[name] = peaks_df
        self.peaks = peaks_dict
        
    def analyzePeaks(self):
        rows = []
        for name, df in self.peaks.items():
            rows.append({
                "name": name,
                "num_peaks": len(df),
                "avg_height": df["Height"].mean(),
                "std_height": df["Height"].std(),
                "max_height": df["Height"].max()
            })

        from pandas import DataFrame
        self.analyzedPeaks = DataFrame(rows)
        return self.analyzedPeaks
    
    def save(self):
        if not hasattr(self, 'analyzedPeaks') or self.analyzedPeaks is None:
            print('No Peaks found, run .analyzePeaks()')
            return
        
        new_filename = self.filename.replace(".xlsx", "_analyzed.csv")

        self.analyzedPeaks.to_csv(new_filename, index=False)

        
        
    def getProfile(self, profile_id):
        return self.data['Profile ' + str(profile_id)]
    
            
    def getPeaks(self, profile_id):
        return self.peaks['Profile ' + str(profile_id)]


In [42]:
file = File('TFSCD10.1.xlsx')
file.load()
file.findPeaks()
file.analyzePeaks()
file.save()