<!-- # New Approach -->

In [7]:
import torch
import torch.distributed
import pandas as pd
import numpy as np
from ABRA_v1_2_26 import interpolate_and_smooth, CNN, plot_wave, calculate_and_plot_wave, plot_waves_single_frequency, arfread, get_str, calculate_hearing_threshold, all_thresholds, peak_finding
import warnings
from sklearn.preprocessing import StandardScaler,MinMaxScaler
warnings.filterwarnings('ignore')
import os
import io
import re
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks

In [8]:
peak_finding_model = CNN()
model_loader = torch.load('./models/waveI_cnn_model1.pth')
peak_finding_model.load_state_dict(model_loader)
peak_finding_model.eval()

CNN(
  (conv1): Conv1d(1, 16, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(16, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (fc1): Linear(in_features=1952, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (batch_norm1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batch_norm2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [9]:
def peak_finding(wave):
    # Prepare waveform
    waveform=interpolate_and_smooth(wave) # Added indexing per calculate and plot wave function
    # waveform_torch = torch.tensor(waveform, dtype=torch.float32).unsqueeze(0) archived ABRA
    waveform_torch = torch.tensor(waveform, dtype=torch.float32).unsqueeze(0).unsqueeze(0) #newer ABRA
    # print(waveform_torch)
    # Get prediction from model
    outputs = peak_finding_model(waveform_torch)
    prediction = int(round(outputs.detach().numpy()[0][0], 0))
    # prediction_test = int(round(outputs.detach().numpy()[0], 0))
    # print("Model output:", outputs, "Prediction true start:", prediction)

    # Apply Gaussian smoothing
    smoothed_waveform = gaussian_filter1d(waveform, sigma=1)

    # Find peaks and troughs
    n = 18
    t = 14
    # start_point = prediction - 9 archived ABRA
    start_point = prediction - 6 #newer ABRA
    smoothed_peaks, _ = find_peaks(smoothed_waveform[start_point:], distance=n)
    smoothed_troughs, _ = find_peaks(-smoothed_waveform, distance=t)
    sorted_indices = np.argsort(smoothed_waveform[smoothed_peaks+start_point])
    highest_smoothed_peaks = np.sort(smoothed_peaks[sorted_indices[-5:]] + start_point)
    relevant_troughs = np.array([])
    for p in range(len(highest_smoothed_peaks)):
        c = 0
        for t in smoothed_troughs:
            if t > highest_smoothed_peaks[p]:
                if p != 4:
                    try:
                        if t < highest_smoothed_peaks[p+1]:
                            relevant_troughs = np.append(relevant_troughs, int(t))
                            break
                    except IndexError:
                        pass
                else:
                    relevant_troughs = np.append(relevant_troughs, int(t))
                    break
    relevant_troughs = relevant_troughs.astype('i')
    return highest_smoothed_peaks, relevant_troughs

def extract_metadata(metadata_lines):
    # Dictionary to store extracted metadata
    metadata = {}
    
    for line in metadata_lines:
        # Extract SW FREQ
        freq_match = re.search(r'SW FREQ:\s*(\d+\.?\d*)', line)
        if freq_match:
            metadata['SW_FREQ'] = float(freq_match.group(1))
        
        # Extract LEVELS
        levels_match = re.search(r':LEVELS:\s*([^:]+)', line)
        if levels_match:
            # Split levels and convert to list of floats
            metadata['LEVELS'] = [float(level) for level in levels_match.group(1).split(';') if level]
    
    return metadata

def read_custom_tsv(file_path):
    # Read the entire file
    with open(file_path, 'r', encoding='ISO-8859-1') as f:
        content = f.read()
    
    # Split the content into metadata and data sections
    metadata_lines = []
    data_section = None
    
    # Find the ':DATA' marker
    data_start = content.find(':DATA')
    
    if data_start != -1:
        # Extract metadata (lines before ':DATA')
        metadata_lines = content[:data_start].split('\n')
        
        # Extract data section
        data_section = content[data_start:].split(':DATA')[1].strip()
    
    # Extract specific metadata
    metadata = extract_metadata(metadata_lines)
    
    # Read the data section directly
    try:
        # Use StringIO to create a file-like object from the data section
        raw_data = pd.read_csv(
            io.StringIO(data_section), 
            sep='\s+',  # Use whitespace as separator
            header=None
        )
        raw_data = raw_data.T
        # Add metadata columns to the DataFrame
        if 'SW_FREQ' in metadata:
            raw_data['Freq(Hz)'] = metadata['SW_FREQ']
            raw_data['Freq(Hz)'] = raw_data['Freq(Hz)'].apply(lambda x: x*1000)
        
        if 'LEVELS' in metadata:
            # Repeat levels to match the number of rows
            levels_repeated = metadata['LEVELS'] * (len(raw_data) // len(metadata['LEVELS']) + 1)
            raw_data['Level(dB)'] = levels_repeated[:len(raw_data)]
        
        filtered_data = raw_data.apply(pd.to_numeric, errors='coerce').dropna()
        filtered_data.columns = filtered_data.columns.map(str)

        columns = ['Freq(Hz)'] + ['Level(dB)'] + [col for col in filtered_data.columns if col.isnumeric() == True]
        filtered_data = filtered_data[columns]
        return filtered_data
    
    except Exception as e:
        print(f"Error reading data: {e}")
        return None, metadata

In [10]:
# def peaks_troughs_amp_final(df, freq, db, time_scale=10):
#     khz = df[(df['Freq(Hz)'] == freq) & (df['Level(dB)'] == db)]
#     if not khz.empty:
#         index = khz.index.values[0]
#         final = df.loc[index, '0':].dropna()
#         final = pd.to_numeric(final, errors='coerce').dropna()

#         target = int(244 * (time_scale / 10))
        
#         y_values = interpolate_and_smooth(final, target)  # Original y-values for plotting
#         sampling_rate = len(y_values) / time_scale

#         x_values = np.linspace(0, len(y_values) / sampling_rate, len(y_values))

#         y_values = interpolate_and_smooth(final[:244])

#         fpf = df[(df['Freq(Hz)'] == freq)].loc[:, '0':]

#         # Flatten the data to scale all values across the group
#         flattened_data = fpf.values.flatten().reshape(-1, 1)

#         # Step 1: Standardize the data
#         scaler = StandardScaler()
#         standardized_data = scaler.fit_transform(flattened_data)

#         # Step 2: Apply min-max scaling
#         min_max_scaler = MinMaxScaler(feature_range=(0, 1))
#         scaled_data = min_max_scaler.fit_transform(standardized_data).reshape(fpf.shape)

#         # Reshape back to the original shape and update the group
#         fpf[fpf.columns] = scaled_data

#         finalfpf = fpf.loc[index, '0':].dropna()
#         finalfpf = pd.to_numeric(finalfpf, errors='coerce').dropna()

#         target = int(244 * (time_scale / 10))
        
#         y_values_fpf = interpolate_and_smooth(finalfpf, target)  # Original y-values for plotting
#         sampling_rate = len(y_values) / time_scale

#         y_values_fpf = interpolate_and_smooth(finalfpf[:244])

#         highest_peaks, relevant_troughs = peak_finding(y_values_fpf)

#         if highest_peaks.size > 0:  # Check if highest_peaks is not empty
#             first_peak_amplitude = y_values[highest_peaks[0]] - y_values[relevant_troughs[0]]

#         return highest_peaks, relevant_troughs, first_peak_amplitude
    
#     return None, None, None, None

In [11]:
def peaks_troughs_amp_final(df, freq, db, time_scale=10, multiply_y_factor=1.0, units='Microvolts'):
    db_column = 'Level(dB)'
    
    khz = df[(df['Freq(Hz)'] == freq) & (df[db_column] == db)]
    if not khz.empty:
        index = khz.index.values[0]
        final = df.loc[index, '0':].dropna()
        final = pd.to_numeric(final, errors='coerce').dropna()

        target = int(244 * (time_scale / 10))
        
        # Process the wave as in calculate_and_plot_wave
        y_values = interpolate_and_smooth(final, target)
        
        # Apply scaling factor
        y_values *= multiply_y_factor
        
        # Handle units conversion if needed
        if units == 'Nanovolts':
            y_values /= 1000
            
        # Generate normalized version for peak finding
        y_values_fpf = interpolate_and_smooth(y_values[:244])
        
        # Standardize and normalize for peak finding, exactly as in the original
        flattened_data = y_values_fpf.values.flatten().reshape(-1, 1)
        scaler = StandardScaler()
        standardized_data = scaler.fit_transform(flattened_data)
        min_max_scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = min_max_scaler.fit_transform(standardized_data).reshape(y_values_fpf.shape)
        y_values_fpf = interpolate_and_smooth(scaled_data[:244])
        
        # Find peaks using the normalized data
        highest_peaks, relevant_troughs = peak_finding(y_values_fpf)
        
        # Calculate amplitude on the processed but non-normalized data
        if highest_peaks.size > 0 and relevant_troughs.size > 0:
            # Following the same approach as in the display_metrics_table function
            first_peak_amplitude = y_values[highest_peaks[0]] - y_values[relevant_troughs[0]]
            return highest_peaks, relevant_troughs, first_peak_amplitude
    
    return None, None, None

In [12]:
time_scale = 18
amp_per_freq = {'Subject': [], 'Freq(Hz) (x1)': [], 'Level(dB) (x2)': [], 'Amplitude (x3)':[]}
start_path = '/Users/leahashebir/Downloads/Manor_Practicum/liberman_data/abr_data/WPZ Electrophysiology'
for subject in os.listdir(start_path):
    # print("Subject:",subject)
    for fq in os.listdir(os.path.join(start_path,subject)):
        # print(fq)
        if fq.startswith('ABR') and fq.endswith('.tsv'):
            path = os.path.join(start_path,subject,fq)
            data_df = read_custom_tsv(path)
            freqs = data_df['Freq(Hz)'].unique().tolist()
            levels = data_df['Level(dB)'].unique().tolist()
            for freq in freqs:
                for lvl in levels:
                    # print("Frequency=",freq, "Level=", lvl)
                    _, _, amp = peaks_troughs_amp_final(df=data_df, freq=freq, db=lvl, time_scale=time_scale)
                    # print(f'Amplitude: {amp}\n')
                    amp_per_freq['Subject'].append(subject)
                    amp_per_freq['Freq(Hz) (x1)'].append(freq)
                    amp_per_freq['Level(dB) (x2)'].append(lvl)
                    amp_per_freq['Amplitude (x3)'].append(amp)
        else:
            pass

AttributeError: 'numpy.ndarray' object has no attribute 'values'

In [53]:
amp_df_full = pd.DataFrame(data=amp_per_freq)

raw_synapse_counts = pd.read_excel('/Users/leahashebir/Downloads/Manor_Practicum/liberman_data/WPZ Ribbon and Synapse Counts.xlsx')
raw_synapse_counts = raw_synapse_counts.mask(lambda x: x.isnull()).dropna()
raw_synapse_counts['Synapses to IHC (y1)'] = raw_synapse_counts.iloc[:,6]
raw_synapse_counts['vx (x4)'] = raw_synapse_counts['vx']
raw_synapse_counts.drop(columns=['vx'], inplace=True)
raw_synapse_counts.rename(columns={'Freq':'Freq(Hz) (x1)'}, inplace=True)
raw_synapse_counts['Freq(Hz) (x1)'] = raw_synapse_counts['Freq(Hz) (x1)'].apply(lambda x: x*1000)
raw_synapse_counts.rename(columns={'Case':'Subject', 'IHCs' : 'IHCs (y2)'}, inplace=True)

paired = amp_df_full.join(raw_synapse_counts.set_index(['Subject', 'Freq(Hz) (x1)']), on=['Subject', 'Freq(Hz) (x1)'])
slice = paired[paired['Subject']=='WPZ174'][['Subject', 'Freq(Hz) (x1)', 'Level(dB) (x2)', 'Amplitude (x3)', 'vx (x4)','Synapses to IHC (y1)', 'IHCs (y2)']]
final = paired[['Subject', 'Freq(Hz) (x1)', 'Level(dB) (x2)', 'Amplitude (x3)', 'vx (x4)','Synapses to IHC (y1)', 'IHCs (y2)']]
final_clean = final.dropna()

In [54]:
np.unique(amp_df_full['Amplitude (x3)'])

array([-0.15950326, -0.14409909, -0.13224509, ...,  2.84571385,
        2.91967556,  2.98799673])

In [55]:
np.unique(final['Amplitude (x3)'])

array([-0.15950326, -0.14409909, -0.13224509, ...,  2.84571385,
        2.91967556,  2.98799673])

In [60]:
final_clean[final_clean['Amplitude (x3)'] < 0]

Unnamed: 0,Subject,Freq(Hz) (x1),Level(dB) (x2),Amplitude (x3),vx (x4),Synapses to IHC (y1),IHCs (y2)
139,WPZ174,45200.0,40.0,-0.011515,v1,12.549020,10.2
139,WPZ174,45200.0,40.0,-0.011515,v2,13.444444,9
408,WPZ116,32000.0,30.0,-0.018918,v1,15.208333,9.6
408,WPZ116,32000.0,30.0,-0.018918,v2,14.949495,9.9
1076,WPZ88,45200.0,70.0,-0.015700,v1,7.640449,8.9
...,...,...,...,...,...,...,...
6608,WPZ100,16000.0,50.0,-0.093725,v2,18.461538,9.1
6644,WPZ138,8000.0,25.0,-0.037091,v1,15.773196,9.7
6644,WPZ138,8000.0,25.0,-0.037091,v2,19.000000,8
7186,WPZ139,32000.0,80.0,-0.004645,v1,4.631579,9.5


In [69]:
# class CNN(nn.Module):
#     def __init__(self, dropout_prob=0.1):
#         super(CNN, self).__init__()
#         self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1)
#         self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
#         self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
#         self.fc1 = nn.Linear(32 * 61, 128)
#         self.fc2 = nn.Linear(128, 1)
#         self.dropout = nn.Dropout(dropout_prob)

#     def forward(self, x):
#         x = self.pool(nn.functional.relu(self.conv1(x)))
#         x = self.dropout(x)
#         x = self.pool(nn.functional.relu(self.conv2(x)))
#         x = self.dropout(x)
#         x = x.view(-1, 32 * 61)
#         x = nn.functional.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = self.fc2(x)
#         return x

# peak_finding_model = CNN()
# model_loader = torch.load('./models/waveI_cnn_model.pth')
# peak_finding_model.load_state_dict(model_loader)
# peak_finding_model.eval()

# def peak_finding(wave):
#     # Prepare waveform
#     waveform=interpolate_and_smooth(wave) # Added indexing per calculate and plot wave function
#     # waveform_torch = torch.tensor(waveform, dtype=torch.float32).unsqueeze(0) archived ABRA
#     waveform_torch = torch.tensor(waveform, dtype=torch.float32).unsqueeze(0).unsqueeze(0) #newer ABRA
#     # print(waveform_torch)
#     # Get prediction from model
#     outputs = peak_finding_model(waveform_torch)
#     prediction = int(round(outputs.detach().numpy()[0][0], 0))
#     # prediction_test = int(round(outputs.detach().numpy()[0], 0))
#     # print("Model output:", outputs, "Prediction true start:", prediction)

#     # Apply Gaussian smoothing
#     smoothed_waveform = gaussian_filter1d(waveform, sigma=1)

#     # Find peaks and troughs
#     n = 18
#     t = 14
#     # start_point = prediction - 9 archived ABRA
#     start_point = prediction - 6 #newer ABRA
#     smoothed_peaks, _ = find_peaks(smoothed_waveform[start_point:], distance=n)
#     smoothed_troughs, _ = find_peaks(-smoothed_waveform, distance=t)
#     sorted_indices = np.argsort(smoothed_waveform[smoothed_peaks+start_point])
#     highest_smoothed_peaks = np.sort(smoothed_peaks[sorted_indices[-5:]] + start_point)
#     relevant_troughs = np.array([])
#     for p in range(len(highest_smoothed_peaks)):
#         c = 0
#         for t in smoothed_troughs:
#             if t > highest_smoothed_peaks[p]:
#                 if p != 4:
#                     try:
#                         if t < highest_smoothed_peaks[p+1]:
#                             relevant_troughs = np.append(relevant_troughs, int(t))
#                             break
#                     except IndexError:
#                         pass
#                 else:
#                     relevant_troughs = np.append(relevant_troughs, int(t))
#                     break
#     relevant_troughs = relevant_troughs.astype('i')
#     return highest_smoothed_peaks, relevant_troughs

# import pandas as pd
# import io
# import re

# def extract_metadata(metadata_lines):
#     # Dictionary to store extracted metadata
#     metadata = {}
    
#     for line in metadata_lines:
#         # Extract SW FREQ
#         freq_match = re.search(r'SW FREQ:\s*(\d+\.?\d*)', line)
#         if freq_match:
#             metadata['SW_FREQ'] = float(freq_match.group(1))
        
#         # Extract LEVELS
#         levels_match = re.search(r':LEVELS:\s*([^:]+)', line)
#         if levels_match:
#             # Split levels and convert to list of floats
#             metadata['LEVELS'] = [float(level) for level in levels_match.group(1).split(';') if level]
    
#     return metadata

# def read_custom_tsv(file_path):
#     # Read the entire file
#     with open(file_path, 'r', encoding='ISO-8859-1') as f:
#         content = f.read()
    
#     # Split the content into metadata and data sections
#     metadata_lines = []
#     data_section = None
    
#     # Find the ':DATA' marker
#     data_start = content.find(':DATA')
    
#     if data_start != -1:
#         # Extract metadata (lines before ':DATA')
#         metadata_lines = content[:data_start].split('\n')
        
#         # Extract data section
#         data_section = content[data_start:].split(':DATA')[1].strip()
    
#     # Extract specific metadata
#     metadata = extract_metadata(metadata_lines)
    
#     # Read the data section directly
#     try:
#         # Use StringIO to create a file-like object from the data section
#         raw_data = pd.read_csv(
#             io.StringIO(data_section), 
#             sep='\s+',  # Use whitespace as separator
#             header=None
#         )
#         raw_data = raw_data.T
#         # Add metadata columns to the DataFrame
#         if 'SW_FREQ' in metadata:
#             raw_data['Freq(Hz)'] = metadata['SW_FREQ']
#             raw_data['Freq(Hz)'] = raw_data['Freq(Hz)'].apply(lambda x: x*1000)
        
#         if 'LEVELS' in metadata:
#             # Repeat levels to match the number of rows
#             levels_repeated = metadata['LEVELS'] * (len(raw_data) // len(metadata['LEVELS']) + 1)
#             raw_data['Level(dB)'] = levels_repeated[:len(raw_data)]
        
#         filtered_data = raw_data.apply(pd.to_numeric, errors='coerce').dropna()
#         filtered_data.columns = filtered_data.columns.map(str)

#         columns = ['Freq(Hz)'] + ['Level(dB)'] + [col for col in filtered_data.columns if col.isnumeric() == True]
#         filtered_data = filtered_data[columns]
#         return filtered_data
    
#     except Exception as e:
#         print(f"Error reading data: {e}")
#         return None, metadata

# # Use the function
# # abr_test = "/Users/leahashebir/Downloads/Manor_Practicum/liberman_data/abr_data/wpz_104L/11_3/ABR-104-L-11.3.tsv"
# # raw_data = read_custom_tsv(abr_test)

# def get_str(data):
#     # return string up until null character only
#     ind = data.find(b'\x00')
#     if ind > 0:
#         data = data[:ind]
#     return data.decode('utf-8')

# def interpolate_and_smooth(final, target_length=244): # To implement after moving beyond Manor data since we already have 244 time points
#     if len(final) > target_length:
#         new_points = np.linspace(0, len(final), target_length + 2)
#         interpolated_values = np.interp(new_points, np.arange(len(final)), final)
#         final = np.array(interpolated_values[:target_length], dtype=float)
#     elif len(final) < target_length:
#         original_indices = np.arange(len(final))
#         target_indices = np.linspace(0, len(final) - 1, target_length)
#         cs = CubicSpline(original_indices, final)
#         final = cs(target_indices)
#     return final

# def peaks_troughs_amp_final(df, freq, db, time_scale=10):
#     khz = df[(df['Freq(Hz)'] == freq) & (df['Level(dB)'] == db)]
#     if not khz.empty:
#         index = khz.index.values[0]
#         final = df.loc[index, '0':].dropna()
#         final = pd.to_numeric(final, errors='coerce').dropna()

#         target = int(244 * (time_scale / 10))
        
#         y_values = interpolate_and_smooth(final, target)  # Original y-values for plotting
#         sampling_rate = len(y_values) / time_scale

#         x_values = np.linspace(0, len(y_values) / sampling_rate, len(y_values))

#         y_values = interpolate_and_smooth(final[:244])

#         fpf = df[(df['Freq(Hz)'] == freq)].loc[:, '0':]

#         # Flatten the data to scale all values across the group
#         flattened_data = fpf.values.flatten().reshape(-1, 1)

#         # Step 1: Standardize the data
#         scaler = StandardScaler()
#         standardized_data = scaler.fit_transform(flattened_data)

#         # Step 2: Apply min-max scaling
#         min_max_scaler = MinMaxScaler(feature_range=(0, 1))
#         scaled_data = min_max_scaler.fit_transform(standardized_data).reshape(fpf.shape)

#         # Reshape back to the original shape and update the group
#         fpf[fpf.columns] = scaled_data

#         finalfpf = fpf.loc[index, '0':].dropna()
#         finalfpf = pd.to_numeric(finalfpf, errors='coerce').dropna()

#         target = int(244 * (time_scale / 10))
        
#         y_values_fpf = interpolate_and_smooth(finalfpf, target)  # Original y-values for plotting
#         sampling_rate = len(y_values) / time_scale

#         y_values_fpf = interpolate_and_smooth(finalfpf[:244])

#         highest_peaks, relevant_troughs = peak_finding(y_values_fpf)

#         if highest_peaks.size > 0:  # Check if highest_peaks is not empty
#             first_peak_amplitude = y_values[highest_peaks[0]] - y_values[relevant_troughs[0]]

#         return highest_peaks, relevant_troughs, first_peak_amplitude
    
#     return None, None, None, None

In [None]:
# time_scale=10
# amp_per_freq = {'Subject': [], 'Freq(Hz) (x1)': [], 'Level(dB) (x2)': [], 'Amplitude (x3)':[]}
# start_path = '/Users/leahashebir/Downloads/Manor_Practicum/liberman_data/abr_data/WPZ Electrophysiology'
# for subject in os.listdir(start_path):
#     # print("Subject:",subject)
#     for fq in os.listdir(os.path.join(start_path,subject)):
#         # print(fq)
#         if fq.startswith('ABR') and fq.endswith('.tsv'):
#             path = os.path.join(start_path,subject,fq)
#             data_df = read_custom_tsv(path)
#             freqs = data_df['Freq(Hz)'].unique().tolist()
#             levels = data_df['Level(dB)'].unique().tolist()
#             for freq in freqs:
#                 for lvl in levels:
#                     # print("Frequency=",freq, "Level=", lvl)
#                     _, _, amp = peaks_troughs_amp_final(df=data_df, freq=freq, db=lvl)
#                     # print(f'Amplitude: {amp}\n')
#                     amp_per_freq['Subject'].append(subject)
#                     amp_per_freq['Freq(Hz) (x1)'].append(freq)
#                     amp_per_freq['Level(dB) (x2)'].append(lvl)
#                     amp_per_freq['Amplitude (x3)'].append(amp)
#         else:
#             pass
#         # print(path)

# amp_df_full = pd.DataFrame(data=amp_per_freq)

# raw_synapse_counts = pd.read_excel('/Users/leahashebir/Downloads/Manor_Practicum/liberman_data/WPZ Ribbon and Synapse Counts.xlsx')
# raw_synapse_counts = raw_synapse_counts.mask(lambda x: x.isnull()).dropna()
# raw_synapse_counts['Synapses to IHC (y1)'] = raw_synapse_counts.iloc[:,6]
# raw_synapse_counts['vx (x4)'] = raw_synapse_counts['vx']
# raw_synapse_counts.drop(columns=['vx'], inplace=True)
# raw_synapse_counts.rename(columns={'Freq':'Freq(Hz) (x1)'}, inplace=True)
# raw_synapse_counts['Freq(Hz) (x1)'] = raw_synapse_counts['Freq(Hz) (x1)'].apply(lambda x: x*1000)
# raw_synapse_counts.rename(columns={'Case':'Subject', 'IHCs' : 'IHCs (y2)'}, inplace=True)

# paired = amp_df_full.join(raw_synapse_counts.set_index(['Subject', 'Freq(Hz) (x1)']), on=['Subject', 'Freq(Hz) (x1)'])
# slice = paired[paired['Subject']=='WPZ174'][['Subject', 'Freq(Hz) (x1)', 'Level(dB) (x2)', 'Amplitude (x3)', 'vx (x4)','Synapses to IHC (y1)', 'IHCs (y2)']]
# final = paired[['Subject', 'Freq(Hz) (x1)', 'Level(dB) (x2)', 'Amplitude (x3)', 'vx (x4)','Synapses to IHC (y1)', 'IHCs (y2)']]
# final_clean = final.dropna()