In [1]:
import matplotlib.pyplot as plt
import matplotlib.table as tbl
import math
import numpy as np
import pandas as pd

import csv
import os

In [37]:
# Load data

# Expects all relevant files to be in this folder
folder = 'FDs_ManySizeBy1_ArtVein'
files = [f for f in os.listdir(folder) if 'Lung' in f]

df_list = []
for f in files:
    df_dict = {}
    with open(os.path.join(folder, f), 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        csv_data = list(csv_reader)
    
    sc_arr = np.asarray(csv_data[4:]).astype(int)
    sizes, counts = sc_arr[:, 0], sc_arr[:, 1]
    
    df_dict = {
        'SID':                 '_'.join(f.split('_')[:-2]),
        'Lobe':                '',
        'Artery_Or_Vein':      '',
        'Fractal_Dimension':   float(csv_data[0][1]),
        'Coefficients':        np.array([csv_data[1][1], csv_data[2][1]]).astype(float),
        'Sizes':               sizes,
        'Counts':              counts,
    }
    
    f = f.lower()
    if 'left' in f:
        df_dict['Lobe'] = 'left'
    elif 'right' in f:
        df_dict['Lobe'] = 'right'
    elif 'whole' in f:
        df_dict['Lobe'] = 'whole'
    
    if 'artery' in f:
        df_dict['Artery_Or_Vein'] = 'artery'
    elif 'vein' in f:
        df_dict['Artery_Or_Vein'] = 'vein'
    
    df_list.append(df_dict)

df = pd.DataFrame(df_list)
display(df)

Unnamed: 0,SID,Lobe,Artery_Or_Vein,Fractal_Dimension,Coefficients,Sizes,Counts
0,001_pect_phfirst_060413,left,artery,2.232677,"[-2.2326770195896577, 13.448566994803741]","[101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91,...","[21, 21, 22, 22, 24, 24, 26, 27, 28, 28, 29, 2..."
1,001_pect_phfirst_060413,left,vein,2.086140,"[-2.086139596613957, 12.742011467560117]","[98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 8...","[21, 22, 23, 23, 23, 24, 24, 25, 25, 25, 26, 2..."
2,001_pect_phfirst_060413,right,artery,2.318657,"[-2.3186574631852026, 14.013400767629951]","[115, 114, 113, 112, 111, 110, 109, 108, 107, ...","[21, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 2..."
3,001_pect_phfirst_060413,right,vein,2.230513,"[-2.2305131597170287, 13.609355025938529]","[116, 115, 114, 113, 112, 111, 110, 109, 108, ...","[21, 21, 22, 22, 22, 22, 22, 22, 23, 23, 23, 2..."
4,001_pect_phfirst_060413,whole,artery,2.282190,"[-2.282189627688371, 14.46570682321803]","[141, 140, 139, 138, 137, 136, 135, 134, 133, ...","[20, 21, 21, 22, 22, 24, 24, 25, 25, 25, 25, 2..."
...,...,...,...,...,...,...,...
1323,388_pect_phfirst_080516,left,vein,1.840240,"[-1.840239550663824, 10.696754460901978]","[55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 4...","[26, 28, 29, 30, 31, 31, 33, 33, 36, 37, 40, 4..."
1324,388_pect_phfirst_080516,right,artery,2.269054,"[-2.269053811536129, 12.653310201999957]","[69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 5...","[18, 19, 20, 20, 22, 24, 25, 27, 27, 29, 31, 3..."
1325,388_pect_phfirst_080516,right,vein,1.944101,"[-1.9441009635377302, 11.244279615371445]","[59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 4...","[22, 25, 26, 27, 30, 32, 33, 33, 35, 35, 36, 4..."
1326,388_pect_phfirst_080516,whole,artery,2.181449,"[-2.181448515445331, 13.01511598532399]","[96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 8...","[20, 21, 23, 24, 26, 27, 27, 27, 28, 28, 29, 3..."


In [38]:
# Compute FD!

# [-(box_sizes_ind-1):]
box_sizes_ind = 20

folder = 'FDs_20SizeBy1_ArtVein'
os.makedirs(folder, exist_ok=True)

for num in range(0, len(df)):
    sid = df['SID'][num]
    lobe = df['Lobe'][num]
    sizes = df['Sizes'][num][-(box_sizes_ind-1):]
    counts = df['Counts'][num][-(box_sizes_ind-1):]
    sizes_log = np.log(sizes)
    counts_log = np.log(counts)
    art_vein = df['Artery_Or_Vein'][num]
    
    poly = np.polynomial.polynomial.Polynomial.fit(x=sizes_log, y=counts_log, deg=1, domain=[])
    
    cells = []
    cells.append(['Fractal_Dimension', f'{-poly.coef[1]:.16f}'])
    cells.append(['Coefficients', f'{poly.coef[1]:.16f}'])
    cells.append(['', f'{poly.coef[0]:.16f}'])
    cells.append(['Size', f'Box_Count'])
    
    for size, count in zip(sizes, counts):
        cells.append([f'{size}',f'{count}'])
    
    # EX: 001_pect_phfirst_060413_leftLungVesselParticlesConnectedArtery_fd
    f = open(f'{folder}/{sid}_{lobe}LungVesselParticlesConnected{art_vein.capitalize()}_fd.csv', 'w')
    writer = csv.writer(f, delimiter=',', lineterminator='\n')
    for x in cells:
        writer.writerow(x)

    f.close()

In [None]:
# Generate Stats!

r_squared_list = []
residuals_list = []
pairwise_slopes_list = []
for index, row in df.iterrows():
    sizes_log = np.log(row['Sizes'])
    counts_log = np.log(row['Counts'])
    coeffs = row['Coefficients']
    
    # Calculate R Squared
    correlation_matrix = np.corrcoef(sizes_log, counts_log)
    r_squared = correlation_matrix[0, 1]**2
    r_squared_list.append(r_squared)
    
    # Calculate Residuals
    residuals = counts_log - np.polyval(coeffs, sizes_log)
    residuals_list.append(residuals)
    
    # Calculate Pairwise Slopes
    pairwise_slopes = (counts_log[1:] - counts_log[:-1]) / (sizes_log[1:] - sizes_log[:-1])
    pairwise_slopes_list.append(pairwise_slopes)

df['R_Squared'] = r_squared_list
df['Residuals'] = residuals_list
df['Pairwise_Slopes'] = pairwise_slopes_list

# Find linear fits for the points near when residuals are 0

cells = [['SID', 'Lobe', 'Artery_Or_Vein', 'Fractal_Dimension', 'Linear_Fit', 'R_Squared', 'FD_@_1st_Root', 'FD_@_2nd_Root', 'FD_@_Positive_Residuals']]
for num in range(0, len(df)):
    # Grab all of the needed values from the dataframe
    sizes_log = np.log(df['Sizes'][num])
    sizes_log_half = (sizes_log[1:] + sizes_log[:-1]) / 2
    counts_log = np.log(df['Counts'][num])
    coeffs = df['Coefficients'][num]
    r_squared = df['R_Squared'][num]
    residuals = df['Residuals'][num]
    pairwise_slopes = df['Pairwise_Slopes'][num]
    fd = df['Fractal_Dimension'][num]
    sid = df['SID'][num]
    lobe = df['Lobe'][num]
    art_vein = df['Artery_Or_Vein'][num]

    zero_crossings = np.where(np.diff(np.sign(np.array(residuals))))[0]
    if len(zero_crossings) > 2:
        display(f'{sid}_{lobe}')
        continue
    
    # Fits
    residuals_poly = []
    counts_poly = []
    for i, c in enumerate(zero_crossings):
        residuals_poly.append(np.polynomial.polynomial.Polynomial.fit(x=sizes_log[c-1:c+3], y=residuals[c-1:c+3], deg=1, domain=[]))
        counts_poly.append(np.polynomial.polynomial.Polynomial.fit(x=sizes_log[c-1:c+3], y=counts_log[c-1:c+3], deg=1, domain=[]))
    
    counts_with_pos_residuals_poly = np.polynomial.polynomial.Polynomial.fit(x=sizes_log[residuals > 0], y=counts_log[residuals > 0], deg=1, domain=[])
    
    # See initialization of cells variable for labels
    cells.append([
        f'{sid}',
        f'{lobe}',
        f'{art_vein}',
        f'{fd:.4f}',
        f'y = {coeffs[0]:.4f}x + {coeffs[1]:.4f}',
        f'{r_squared}',
        f'{-counts_poly[0].coef[1]:.4f}',
        f'{-counts_poly[1].coef[1]:.4f}',
        f'{-counts_with_pos_residuals_poly.coef[1]:.4f}',
    ])
    
f = open(f'FDs_20SizeBy1_ArtVein_combined.csv', 'w')
writer = csv.writer(f, delimiter=',', lineterminator='\n')
for x in cells:
    writer.writerow(x)

f.close()