In [2]:
import matplotlib.pyplot as plt
import matplotlib.table as tbl
import math
import numpy as np
import pandas as pd

import csv
import os

In [71]:
# Load data

# Expects all relevant files to be in this folder
folder = 'FDs_data/FDs_ManySizeBy1'
files = [f for f in os.listdir(folder) if 'Lung' in f]

df_list = []
for f in files:
    df_dict = {}
    with open(os.path.join(folder, f), 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        csv_data = list(csv_reader)
    
    sc_arr = np.asarray(csv_data[4:]).astype(int)
    sizes, counts = sc_arr[:, 0], sc_arr[:, 1]
    
    df_dict = {
        'SID':                 '_'.join(f.split('_')[:-2]),
        'Lobe':                '',
        'Artery_Or_Vein':      '',
        'Fractal_Dimension':   float(csv_data[0][1]),
        'Coefficients':        np.array([csv_data[1][1], csv_data[2][1]]).astype(float),
        'Sizes':               sizes,
        'Counts':              counts,
    }
    
    f = f.lower()
    if 'left' in f:
        df_dict['Lobe'] = 'left'
    elif 'right' in f:
        df_dict['Lobe'] = 'right'
    elif 'whole' in f:
        df_dict['Lobe'] = 'whole'
    
    if 'artery' in f:
        df_dict['Artery_Or_Vein'] = 'artery'
    elif 'vein' in f:
        df_dict['Artery_Or_Vein'] = 'vein'
    
    df_list.append(df_dict)

df = pd.DataFrame(df_list)
display(df)

Unnamed: 0,SID,Lobe,Artery_Or_Vein,Fractal_Dimension,Coefficients,Sizes,Counts
0,001_pect_phfirst_060413,left,,2.308275,"[-2.3082747042124145, 13.837770076664365]","[204, 203, 202, 201, 200, 199, 198, 197, 196, ...","[4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, ..."
1,001_pect_phfirst_060413,right,,2.351995,"[-2.3519951941915305, 14.26005841805147]","[235, 234, 233, 232, 231, 230, 229, 228, 227, ...","[3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ..."
2,001_pect_phfirst_060413,whole,,2.369142,"[-2.3691423113866885, 14.89237096618412]","[283, 282, 281, 280, 279, 278, 277, 276, 275, ...","[4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ..."
3,002_pect_phfirst_050913,left,,2.313820,"[-2.313820132957811, 13.69099479654512]","[145, 144, 143, 142, 141, 140, 139, 138, 137, ...","[8, 10, 10, 10, 10, 11, 11, 12, 12, 12, 12, 12..."
4,002_pect_phfirst_050913,right,,2.329904,"[-2.3299038543963384, 14.000240865129113]","[200, 199, 198, 197, 196, 195, 194, 193, 192, ...","[4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, ..."
...,...,...,...,...,...,...,...
671,387_pect_phfirst_090117,right,,2.287754,"[-2.287753657634071, 13.637800439912635]","[194, 193, 192, 191, 190, 189, 188, 187, 186, ...","[4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, ..."
672,387_pect_phfirst_090117,whole,,2.340863,"[-2.3408628914607568, 14.476839831034797]","[251, 250, 249, 248, 247, 246, 245, 244, 243, ...","[4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ..."
673,388_pect_phfirst_080516,left,,2.258937,"[-2.258936754058771, 12.49459375489788]","[122, 121, 120, 119, 118, 117, 116, 115, 114, ...","[4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, ..."
674,388_pect_phfirst_080516,right,,2.272936,"[-2.272935564698213, 12.759954333812175]","[139, 138, 137, 136, 135, 134, 133, 132, 131, ...","[4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, ..."


In [81]:
# Compute FD!

folder_type = ''
artvein = ''
if folder_type:
    folder = f'FDs_data/FDs_{folder_type}'
else:
    folder = f'FDs_data/FDs'

if artvein:
    folder += '_artvein'

os.makedirs(folder, exist_ok=True)

for num in range(0, len(df)):    
    sid = df['SID'][num]
    lobe = df['Lobe'][num]
    
    sizes_row = df['Sizes'][num]
    
    # Number of generations based on greatest power of 2 less than minimum dimension
    n = 2 ** np.floor(np.log(sizes_row[0]) / np.log(2))
    n = int(np.log(n) / np.log(2))
    
    if folder_type == '20Size':
        mask = np.arange(20, 1, -2)
    elif folder_type == '20SizeBy1':
        mask = np.arange(20, 1, -1)
    elif folder_type == 'ManySize':
        mask = np.arange(2**(n), 1, -2)
    else:
        mask = 2**np.arange(n, 0, -1)
    
    mask_ind = len(sizes_row) - np.searchsorted(sizes_row[::-1], mask[::-1])[::-1] - 1

    sizes = sizes_row[mask_ind]
    counts = df['Counts'][num][mask_ind]
    sizes_log = np.log(sizes)
    counts_log = np.log(counts)
    art_vein = df['Artery_Or_Vein'][num]
    
    poly = np.polynomial.polynomial.Polynomial.fit(x=sizes_log, y=counts_log, deg=1, domain=[])
    
    cells = []
    cells.append(['Fractal_Dimension', f'{-poly.coef[1]:.16f}'])
    cells.append(['Coefficients', f'{poly.coef[1]:.16f}'])
    cells.append(['', f'{poly.coef[0]:.16f}'])
    cells.append(['Size', f'Box_Count'])
    
    for size, count in zip(sizes, counts):
        cells.append([f'{size}',f'{count}'])
    
    if artvein:
        # EX: 001_pect_phfirst_060413_leftLungVesselParticlesConnectedArtery_fd
        filename = f'{folder}/{sid}_{lobe}LungVesselParticlesConnected{art_vein.capitalize()}_fd.csv'
    else:
        # EX: 001_pect_phfirst_060413_leftLungVesselParticles_fd
        filename = f'{folder}/{sid}_{lobe}LungVesselParticles_fd.csv'
    
    
    f = open(filename, 'w')
    writer = csv.writer(f, delimiter=',', lineterminator='\n')
    for x in cells:
        writer.writerow(x)

    f.close()

In [None]:
# Generate Stats!

r_squared_list = []
residuals_list = []
pairwise_slopes_list = []
for index, row in df.iterrows():
    sizes_log = np.log(row['Sizes'])
    counts_log = np.log(row['Counts'])
    coeffs = row['Coefficients']
    
    # Calculate R Squared
    correlation_matrix = np.corrcoef(sizes_log, counts_log)
    r_squared = correlation_matrix[0, 1]**2
    r_squared_list.append(r_squared)
    
    # Calculate Residuals
    residuals = counts_log - np.polyval(coeffs, sizes_log)
    residuals_list.append(residuals)
    
    # Calculate Pairwise Slopes
    pairwise_slopes = (counts_log[1:] - counts_log[:-1]) / (sizes_log[1:] - sizes_log[:-1])
    pairwise_slopes_list.append(pairwise_slopes)

df['R_Squared'] = r_squared_list
df['Residuals'] = residuals_list
df['Pairwise_Slopes'] = pairwise_slopes_list

# Find linear fits for the points near when residuals are 0

cells = [['SID', 'Lobe', 'Artery_Or_Vein', 'Fractal_Dimension', 'Linear_Fit', 'R_Squared', 'FD_@_1st_Root', 'FD_@_2nd_Root', 'FD_@_Positive_Residuals']]
for num in range(0, len(df)):
    # Grab all of the needed values from the dataframe
    sizes_log = np.log(df['Sizes'][num])
    sizes_log_half = (sizes_log[1:] + sizes_log[:-1]) / 2
    counts_log = np.log(df['Counts'][num])
    coeffs = df['Coefficients'][num]
    r_squared = df['R_Squared'][num]
    residuals = df['Residuals'][num]
    pairwise_slopes = df['Pairwise_Slopes'][num]
    fd = df['Fractal_Dimension'][num]
    sid = df['SID'][num]
    lobe = df['Lobe'][num]
    art_vein = df['Artery_Or_Vein'][num]

    zero_crossings = np.where(np.diff(np.sign(np.array(residuals))))[0]
    if len(zero_crossings) > 2:
        display(f'{sid}_{lobe}')
        continue
    
    # Fits
    residuals_poly = []
    counts_poly = []
    for i, c in enumerate(zero_crossings):
        residuals_poly.append(np.polynomial.polynomial.Polynomial.fit(x=sizes_log[c-1:c+3], y=residuals[c-1:c+3], deg=1, domain=[]))
        counts_poly.append(np.polynomial.polynomial.Polynomial.fit(x=sizes_log[c-1:c+3], y=counts_log[c-1:c+3], deg=1, domain=[]))
    
    counts_with_pos_residuals_poly = np.polynomial.polynomial.Polynomial.fit(x=sizes_log[residuals > 0], y=counts_log[residuals > 0], deg=1, domain=[])
    
    # See initialization of cells variable for labels
    cells.append([
        f'{sid}',
        f'{lobe}',
        f'{art_vein}',
        f'{fd:.4f}',
        f'y = {coeffs[0]:.4f}x + {coeffs[1]:.4f}',
        f'{r_squared}',
        f'{-counts_poly[0].coef[1]:.4f}',
        f'{-counts_poly[1].coef[1]:.4f}',
        f'{-counts_with_pos_residuals_poly.coef[1]:.4f}',
    ])
    
f = open(f'FDs_20SizeBy1_ArtVein_combined.csv', 'w')
writer = csv.writer(f, delimiter=',', lineterminator='\n')
for x in cells:
    writer.writerow(x)

f.close()