# Mechanical Test Analysis Workbook

Workbook for analysis mechanical testing data saved as csv files with three columns: time, force, and position. We additionally need a csv continaining the dimensions of each of the tested samples. 

## Data Analysis:

In [None]:
import pandas as pd
from pathlib import Path
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np
from scipy import stats
%matplotlib inline

In [None]:
path = r'C:\Users\djbar\OneDrive - University Of Cambridge\PhD\Results\Mech testing\Organoids Project\Subset Analysis'
csv_files = glob.glob(path + '/v???.csv')

df_dict = {}
for file in csv_files:
    df = pd.read_csv(file)
    df['Sample'] = Path(file).stem
    df_dict[Path(file).stem] = df

sample_dimensions = pd.read_csv('sample_details.csv')

We find the index values of the divider lines. We know these lines come after the first data set, then before and after all following datasets. We want to fill the "Repeat" column with the tray row number, so we can track where the data is from. This should be an integer from 1-3 normally. We then remove the rows with the separator data.


In [None]:
for df in df_dict.values():
    df['Repeat'] = np.nan
    start_end_index = df.loc[df['Time (min)'] == 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'].index.values

    for i in range(int(len(start_end_index) / 2)):
        if i == 0:
            df.loc[:start_end_index[0], 'Repeat'] = 1
        else:
            df.loc[start_end_index[(i*2)-1]:start_end_index[i*2], 'Repeat'] = i+1
        


In [None]:
all_data = pd.concat(df_dict.values(), ignore_index=True)

Convert the time, force, position columns to numeric, coerce the errors so that the repeated headers become "NaN", then drop those rows.

In [None]:
cols = ['Time (min)', 'Force (N)', 'Position (mm)']
all_data[cols] = all_data[cols].apply(pd.to_numeric, errors='coerce')
all_data.dropna(inplace=True)

In [None]:
sample_dimensions.head()

In [None]:
all_data.info()

Write a function to take the main dataframe and the dataframe containing the sample attributes, combine them into a single df and trim the unwanted high and low values.

In [None]:
def trim_clean_data(main_df, sample_dimensions, min_force=0.01, rolling=50):
    for row in sample_dimensions.values:
        try:
            sample = row[0]
            repeat = int(row[1])
            mask = (main_df['Sample'] == sample) & (main_df['Repeat'] == repeat)

            #If we don't start the position at 0 for some reason (failure to set on the testing machine), subtract the first position value from all
            if main_df.loc[mask, 'Position (mm)'].iloc[0] != 0:
                main_df.loc[mask, 'Position (mm)'] = main_df.loc[mask, 'Position (mm)'] - float(main_df.loc[mask, 'Position (mm)'].iloc[0])

            #Add the additional data calculated from the sample details dataset
            main_df.loc[mask, 'Height'] = row[2]
            main_df.loc[mask, 'Original Diameter'] = row[3]
            main_df.loc[mask, 'Strain'] = (main_df.loc[mask, 'Position (mm)'] / float(row[2]))
            main_df.loc[mask, 'Stress (Pa)'] = main_df.loc[mask, 'Force (N)'] / (((float(row[3]) / (2 * 1000)) ** 2) * math.pi) #Convert from mm to m
            main_df.loc[mask, 'Stress_roll_avg'] = main_df.loc[mask, 'Stress (Pa)'].rolling(50).mean().shift(-30)
            main_df.loc[mask, 'Crosslinking (%)'] = float(row[4])
            main_df.loc[mask, 'Collagen (%)'] = float(row[5])
            main_df.loc[mask, 'Elastin (%)'] = float(row[6])
        except:
            pass
        
        '''
        #Trim the unwated values > max and below a minimum force.Probably don't need to do this at all.
        max_index = main_df.loc[mask, 'Force (N)'].idxmax()
        main_df.drop(main_df.index[max_index + 1: main_df.loc[mask].last_valid_index() + 1], inplace=True)
        
        This works but do we even need to do it? Might make no difference, especially if we aren't then subtracting the values from everything else to "reset"
        
        min_index = np.argmax(main_df.loc[mask, 'Force (N)'] > min_force)
        main_df.drop(main_df.index[main_df.loc[mask].first_valid_index(): main_df.loc[mask].first_valid_index() + min_index], inplace=True)
        '''
        

In [None]:
trim_clean_data(main_df=all_data, sample_dimensions=sample_dimensions, rolling=50)

Use scipy.stats.linregress to get a linear regression line for each of the repeat cases, between set values of the strain. 

In [None]:
def lin_regress_calc(df, min_strain, max_strain):
    results_list = []
    for sample in df['Sample'].unique():
        for repeat in range(1,4):         
            df2 = df.loc[(df['Sample'] == sample) & (df['Repeat'] == repeat) & 
                        (df['Strain'] > min_strain) & (df['Strain'] < max_strain)].copy()
            if len(df2) != 0:
                result = stats.linregress(x=df2['Strain'].values, y=df2['Stress_roll_avg'].values)
                results_list.append([sample, repeat, result.slope, result.rvalue, result.stderr])
                #print(f'Sample: {sample} Repeat:{repeat} complete')
    df = pd.DataFrame(results_list, columns=['Sample', 'Repeat', 'Gradient', 'Rvalue', 'StdErr'])
    return df


In [None]:
lin_regression_results = lin_regress_calc(df=all_data, min_strain=0.02, max_strain=0.15)

In [None]:
lin_regression_results

We drop any values that are negative or with a low R

In [None]:
lin_regression_results.loc[lin_regression_results['Rvalue'] < 0.6] = np.nan

In [None]:
grouped = lin_regression_results.groupby('Sample')
grouped[['Gradient', 'Rvalue', 'StdErr']].describe()

In [None]:
merged_results = pd.merge(left=sample_dimensions, right=lin_regression_results, on=["Sample", "Repeat"])
merged_results.groupby(['Elastin (%)', 'Crosslinking (%)'])['Gradient'].describe()

## Graphical Analysis

In [None]:
ax = sns.barplot(data=merged_results.loc[merged_results['Elastin (%)'] == 0], x='Crosslinking (%)', y='Gradient', errorbar="sd")
ax.set(ylabel="Young's Modulus (Pa)")

In [None]:
ax = sns.barplot(data=merged_results.loc[merged_results['Elastin (%)'] != 0], x='Elastin (%)', y='Gradient', errorbar="sd")
ax.set(ylabel="Young's Modulus (Pa)")

## Results Plots

Test plot making sure the linear regression produces a sensible looking prediction.

In [None]:
df_2 = all_data.loc[(all_data['Sample'] == 'v009') & (all_data['Repeat'] == 3) &
                   (all_data['Strain'] > 0.0) & (all_data['Strain'] < 0.5)]

df_3 = all_data.loc[(all_data['Sample'] == 'v009') & (all_data['Repeat'] == 3) & 
                        (all_data['Strain'] > 0.01) & (all_data['Strain'] < 0.15)]

#sns.scatterplot(data=df_2, x='Strain (%)', y='Stress (Pa)')

rel = stats.linregress(df_3['Strain'], df_3['Stress (Pa)'])
sns.relplot(data=df_2, x='Strain', y='Stress_roll_avg', kind='line')
plt.plot(df_2['Strain'], rel.intercept + rel.slope*df_2['Strain'], 'r')

Young's Modulus plots of the linear regression results. With 95% confidence intervals at the moment. 

In [None]:
ax = sns.catplot(data=lin_regression_results,
           x='Sample',
           y='Gradient',
           kind='box'
           )
ax.set(ylabel="Young's Modulus (Pa)")