In [1]:
import matplotlib.pyplot as plt
import matplotlib.table as tbl
import math
import numpy as np
import pandas as pd

import csv
import os

In [7]:
# Load data

# Expects all relevant files to be in this folder
folder = 'FDs_data/FDs_ManySizeBy1_allOffsets_ArtVein'
files = [f for f in os.listdir(folder) if 'Lung' in f]

df_list = []
for f in files:
    df_dict = {}
    with open(os.path.join(folder, f), 'r') as csv_file:
        csv_reader = csv.reader(csv_file)
        csv_data = list(csv_reader)
    
    sc_arr = np.asarray(csv_data[4:]).astype(int)
    sizes, counts = sc_arr[:, 0], sc_arr[:, 1]
    
    df_dict = {
        'SID':                 '_'.join(f.split('_')[:-2]),
        'Lobe':                '',
        'Artery_Or_Vein':      '',
        'Fractal_Dimension':   float(csv_data[0][1]),
        'Coefficients':        np.array([csv_data[1][1], csv_data[2][1]]).astype(float),
        'Sizes':               sizes,
        'Counts':              counts,
    }
    
    f = f.lower()
    if 'left' in f:
        df_dict['Lobe'] = 'left'
    elif 'right' in f:
        df_dict['Lobe'] = 'right'
    elif 'whole' in f:
        df_dict['Lobe'] = 'whole'
    
    if 'artery' in f:
        df_dict['Artery_Or_Vein'] = 'artery'
    elif 'vein' in f:
        df_dict['Artery_Or_Vein'] = 'vein'
    
    df_list.append(df_dict)

df = pd.DataFrame(df_list)
display(df)

Unnamed: 0,SID,Lobe,Artery_Or_Vein,Fractal_Dimension,Coefficients,Sizes,Counts
0,001_pect_phfirst_060413,whole,artery,2.102631,"[-2.1026310273369337, 14.062896620852737]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[2198, 2471, 2847, 3275, 3767, 4379, 5114, 600..."
1,001_pect_phfirst_060413,whole,vein,1.954222,"[-1.9542224798882817, 13.428013573303977]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1894, 2122, 2397, 2705, 3070, 3484, 4020, 467..."
2,002_pect_phfirst_050913,whole,artery,2.021757,"[-2.021757367357323, 13.547803671087971]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1731, 1945, 2214, 2484, 2854, 3265, 3775, 437..."
3,002_pect_phfirst_050913,whole,vein,1.919114,"[-1.9191137775165923, 13.180700495172301]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1618, 1813, 2062, 2319, 2654, 3022, 3443, 397..."
4,004_pect_phfirst_082913,whole,artery,2.095566,"[-2.095565565306421, 13.345951668093372]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1125, 1273, 1451, 1648, 1895, 2172, 2530, 298..."
...,...,...,...,...,...,...,...
447,386_pect_phfirst_080817,whole,vein,1.833005,"[-1.833005433262882, 13.010253969397791]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1670, 1879, 2144, 2448, 2790, 3203, 3704, 428..."
448,387_pect_phfirst_090117,whole,artery,1.993547,"[-1.9935471750634037, 13.477946181209509]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1653, 1868, 2129, 2444, 2809, 3261, 3830, 449..."
449,387_pect_phfirst_090117,whole,vein,1.901597,"[-1.9015974202133359, 13.130487076183568]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[1534, 1728, 1971, 2246, 2586, 2992, 3469, 404..."
450,388_pect_phfirst_080516,whole,artery,2.168420,"[-2.1684199840900424, 12.9926051776032]","[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9...","[620, 711, 799, 915, 1061, 1236, 1447, 1732, 2..."


In [8]:
# Compute FD!

# ManySize, 20Size, 20SizeBy1
folder_type = '20Size_allOffsets'
artvein = True

if folder_type:
    folder = f'FDs_data/FDs_{folder_type}'
else:
    folder = f'FDs_data/FDs'

if artvein:
    folder += '_ArtVein'

os.makedirs(folder, exist_ok=True)

for num in range(0, len(df)):
    sid = df['SID'][num]
    lobe = df['Lobe'][num]
    
    sizes_row = df['Sizes'][num]
    
    # Number of generations based on greatest power of 2 less than minimum dimension
    n = 2 ** np.floor(np.log(sizes_row[0]) / np.log(2))
    n = int(np.log(n) / np.log(2))
    
    if 'By1' in folder_type:
        m = 1
    else:
        m = 2
    
    if '20Size' in folder_type:
        mask = np.arange(20, 1, -m)
    elif 'ManySize' in folder_type:
        mask = np.arange(2**(n), 1, -m)
    else:
        mask = 2**np.arange(n, 0, -1)
    
    mask_ind = len(sizes_row) - np.searchsorted(sizes_row[::-1], mask[::-1])[::-1] - 1

    sizes = sizes_row[mask_ind]
    counts = df['Counts'][num][mask_ind]
    sizes_log = np.log(sizes)
    counts_log = np.log(counts)
    art_vein = df['Artery_Or_Vein'][num]
    
    poly = np.polynomial.polynomial.Polynomial.fit(x=sizes_log, y=counts_log, deg=1, domain=[])
    
    cells = []
    cells.append(['Fractal_Dimension', f'{-poly.coef[1]:.16f}'])
    cells.append(['Coefficients', f'{poly.coef[1]:.16f}'])
    cells.append(['', f'{poly.coef[0]:.16f}'])
    cells.append(['Size', f'Box_Count'])
    
    for size, count in zip(sizes, counts):
        cells.append([f'{size}',f'{count}'])
    
    if artvein:
        # EX: 001_pect_phfirst_060413_leftLungVesselParticlesConnectedArtery_fd
        filename = f'{folder}/{sid}_{lobe}LungVesselParticlesConnected{art_vein.capitalize()}_fd.csv'
    else:
        # EX: 001_pect_phfirst_060413_leftLungVesselParticles_fd
        filename = f'{folder}/{sid}_{lobe}LungVesselParticles_fd.csv'
    
    
    f = open(filename, 'w')
    writer = csv.writer(f, delimiter=',', lineterminator='\n')
    for x in cells:
        writer.writerow(x)

    f.close()

In [None]:
# Generate Stats!

r_squared_list = []
residuals_list = []
pairwise_slopes_list = []
for index, row in df.iterrows():
    sizes_log = np.log(row['Sizes'])
    counts_log = np.log(row['Counts'])
    coeffs = row['Coefficients']
    
    # Calculate R Squared
    correlation_matrix = np.corrcoef(sizes_log, counts_log)
    r_squared = correlation_matrix[0, 1]**2
    r_squared_list.append(r_squared)
    
    # Calculate Residuals
    residuals = counts_log - np.polyval(coeffs, sizes_log)
    residuals_list.append(residuals)
    
    # Calculate Pairwise Slopes
    pairwise_slopes = (counts_log[1:] - counts_log[:-1]) / (sizes_log[1:] - sizes_log[:-1])
    pairwise_slopes_list.append(pairwise_slopes)

df['R_Squared'] = r_squared_list
df['Residuals'] = residuals_list
df['Pairwise_Slopes'] = pairwise_slopes_list

# Find linear fits for the points near when residuals are 0

cells = [['SID', 'Lobe', 'Artery_Or_Vein', 'Fractal_Dimension', 'Linear_Fit', 'R_Squared', 'FD_@_1st_Root', 'FD_@_2nd_Root', 'FD_@_Positive_Residuals']]
for num in range(0, len(df)):
    # Grab all of the needed values from the dataframe
    sizes_log = np.log(df['Sizes'][num])
    sizes_log_half = (sizes_log[1:] + sizes_log[:-1]) / 2
    counts_log = np.log(df['Counts'][num])
    coeffs = df['Coefficients'][num]
    r_squared = df['R_Squared'][num]
    residuals = df['Residuals'][num]
    pairwise_slopes = df['Pairwise_Slopes'][num]
    fd = df['Fractal_Dimension'][num]
    sid = df['SID'][num]
    lobe = df['Lobe'][num]
    art_vein = df['Artery_Or_Vein'][num]

    zero_crossings = np.where(np.diff(np.sign(np.array(residuals))))[0]
    if len(zero_crossings) > 2:
        display(f'{sid}_{lobe}')
        continue
    
    # Fits
    residuals_poly = []
    counts_poly = []
    for i, c in enumerate(zero_crossings):
        residuals_poly.append(np.polynomial.polynomial.Polynomial.fit(x=sizes_log[c-1:c+3], y=residuals[c-1:c+3], deg=1, domain=[]))
        counts_poly.append(np.polynomial.polynomial.Polynomial.fit(x=sizes_log[c-1:c+3], y=counts_log[c-1:c+3], deg=1, domain=[]))
    
    counts_with_pos_residuals_poly = np.polynomial.polynomial.Polynomial.fit(x=sizes_log[residuals > 0], y=counts_log[residuals > 0], deg=1, domain=[])
    
    # See initialization of cells variable for labels
    cells.append([
        f'{sid}',
        f'{lobe}',
        f'{art_vein}',
        f'{fd:.4f}',
        f'y = {coeffs[0]:.4f}x + {coeffs[1]:.4f}',
        f'{r_squared}',
        f'{-counts_poly[0].coef[1]:.4f}',
        f'{-counts_poly[1].coef[1]:.4f}',
        f'{-counts_with_pos_residuals_poly.coef[1]:.4f}',
    ])
    
f = open(f'FDs_20SizeBy1_ArtVein_combined.csv', 'w')
writer = csv.writer(f, delimiter=',', lineterminator='\n')
for x in cells:
    writer.writerow(x)

f.close()