# Micro-CT Data Analysis Workbook

An analysis notebook for micro-CT data geneated by Bruker SkyScan micro-CT instruments. Data is extracted from batman files, grouped, plotted with response to independent variables, and statistical analysis carried out to look for significance.

In [None]:
import glob
import pandas as pd
import numpy as np
from pathlib import Path


In [None]:
path = r'path\to\analysis\files'
batman_files = glob.glob(path + '/**/*.batman.txt')

def batman_extract_and_add(batman_list):
    '''
    Takes the list of batman text files and extracts the sample name, original plate, row, repeat, and voi from the filename. 
    We also set the elastin percentage and crosslinking percentage for known plate numbers. We then read lines from the batman
    to extract the pixel size, pore size, and porosity. 
    
    Return: dataframe
    '''
    params_list = []
    for file in batman_list:
        name_split = Path(file).stem.split('_')
        original_plate = name_split[0]       
        condition = name_split[1]
        row = name_split[2]
        repeat = name_split[3]
        voi = name_split[-1].split('.')[0]
        
        if original_plate.lower() == 's008':
            crosslinking = condition
            elastin = '0'
        elif original_plate.lower() == 's011':
            elastin = condition
            crosslinking = '30'
        else:
            elastin = np.nan
            crosslinking = np.nan
            
        #print(f'Plate: {original_plate}, Condition: {condition}, Row: {row}, Repeat: {repeat}, Voi: {voi}, Elastin: {elastin}, XL: {crosslinking}')
        with open(file) as f:
            for line in f:
                split = line.split(',')
                if split[0] == 'Pixel size':
                    pixel_size = split[-2]
                elif split[0] == 'Structure separation':
                    pore_size_um = float(split[-2]) * 1000
                elif split[0] == 'Total porosity (percent)':
                    porosity_perc = split[-2]
        params_list.append([original_plate, condition, row, repeat, voi, pixel_size, pore_size_um, porosity_perc, 
                            crosslinking, elastin])
    
    
    columns = ['original_plate', 'condition', 'row', 'repeat', 'voi', 'pixel_size_um', 'pore_size_um', 'porosity_perc', 
               'crosslinking', 'elastin']
    ct_df = pd.DataFrame(data=params_list, columns=columns)
    cols = ['pixel_size_um', 'pore_size_um', 'porosity_perc', 'crosslinking', 'elastin']
    ct_df[cols] = ct_df[cols].apply(pd.to_numeric)
    
    return ct_df
                    

In [None]:
ct_df = batman_extract_and_add(batman_files)

In [None]:
ct_df.info()

In [None]:
ct_df.describe()

In [None]:
grouped = ct_df.groupby(['crosslinking', 'elastin'])[['pore_size_um', 'porosity_perc']]
grouped.describe()

In [None]:
ct_df[['crosslinking', 'elastin', 'pore_size_um', 'porosity_perc']].corr(numeric_only=False)

### Graphical Analysis

In [None]:
import seaborn as sns
%matplotlib inline

sns.set_style()
ax = sns.barplot(data=ct_df, x='crosslinking', y='pore_size_um', errorbar="sd")
ax.set_ylabel('Pore Size (um)')
ax.set_xlabel('Crosslinking (%)')
ax.set_title('Pore Size vs Crosslinking Percentage \n in 1% Collagen Scaffolds')

In [None]:
import seaborn as sns
%matplotlib inline

sns.set_style()
ax = sns.barplot(data=ct_df, x='elastin', y='pore_size_um', errorbar="sd")
ax.set_ylabel('Pore Size (um)')
ax.set_xlabel('Elastin (%)')
ax.set_title('Pore Size vs Elastin Percentage \n in 1% Collagen Scaffolds')

### Statistical Analysis

Calculating the ANOVA for the s008 samples, with varying crosslinking. First we separate the data into a series of series, then pass to the scipy anova test.

In [None]:
from scipy import stats

arrays = []
for value in ct_df.loc[(ct_df['original_plate'] == 's008'), 'crosslinking'].unique():
    arrays.append(ct_df.loc[(ct_df['original_plate'] == 's008') & (ct_df['crosslinking'] == value), 'pore_size_um'])

stats.f_oneway(arrays[0], arrays[1], arrays[2], arrays[3], arrays[4], arrays[5])


Similar analysis for the different elastin concentrations is below:

In [None]:
from scipy import stats

arrays = []
for value in ct_df.loc[(ct_df['original_plate'] == 's011'), 'elastin'].unique():
    arrays.append(ct_df.loc[(ct_df['original_plate'] == 's011') & (ct_df['elastin'] == value), 'pore_size_um'])

stats.f_oneway(arrays[0], arrays[1], arrays[2], arrays[3], arrays[4], arrays[5])

Similar analysis but not restricting the analysis to the different plates, incorporating all the data from all runs:

In [None]:

arrays = []
for value in ct_df['elastin'].unique():
    arrays.append(ct_df.loc[ct_df['elastin'] == value, 'pore_size_um'])

stats.f_oneway(arrays[0], arrays[1], arrays[2], arrays[3], arrays[4], arrays[5])

In [None]:

arrays = []
for value in ct_df['crosslinking'].unique():
    arrays.append(ct_df.loc[ct_df['crosslinking'] == value, 'pore_size_um'])

stats.f_oneway(arrays[0], arrays[1], arrays[2], arrays[3], arrays[4], arrays[5])