# Segmentation-Dependant Feature Reduction using ICC(3,1)

Sources:
1. https://github.com/cosanlab/nltools/blob/master/nltools/data/brain_data.py
2. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4533825/
3. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6838361/
4. https://www.statology.org/intraclass-correlation-coefficient-python/

In [None]:
import pandas as pd
import numpy as np
import pingouin as pg
import docx

In [None]:
df = pd.read_csv("../../data/initial/T1fs/icc_t1fs_output.csv", sep=",", index_col=0)

In [None]:
df.drop(df.columns[2:39], axis=1, inplace=True)
df['Image'] = df['Image'].str.slice(60, 65)
df = df.rename({'Image': 'ID_intern'}, axis=1)

In [None]:
df['Mask'] = [1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1]
df = df.rename({'Mask': 'Judge'}, axis=1)

In [None]:
df = df.sort_values(by=['ID_intern'])
df

In [None]:
features = list(df.columns[2:])
# features

In [None]:
final_columns = list(df.columns[0:2])
# final_columns

In [None]:
dfs = []
for feature in features:
    icc = pg.intraclass_corr(data=df, targets='ID_intern', raters='Judge', ratings=feature)
    icc=icc[2:3]
    icc['Feature'] = feature
    dfs.append(icc)
icc_df = pd.concat(dfs, ignore_index=True)
icc_df

In [None]:
icc_df = icc_df[icc_df['ICC'] < 0.8] # exclude < or 0.8??? 0.8 - 101 cols, <0.9 - 93 cols
new_df = icc_df.drop(icc_df.columns[[1,3,4,5,6,7]], axis=1)

In [None]:
new_df['ICC'] = new_df['ICC'].round(decimals = 2)
new_df

In [None]:
# open an existing document
doc = docx.Document('./tables.docx')

# add a table to the end and create a reference variable
# extra row is so we can add the header row
t = doc.add_table(new_df.shape[0]+1, new_df.shape[1])

# add the header rows.
for j in range(new_df.shape[-1]):
    t.cell(0,j).text = new_df.columns[j]

# add the rest of the data frame
for i in range(new_df.shape[0]):
    for j in range(new_df.shape[-1]):
        t.cell(i+1,j).text = str(new_df.values[i,j])

# save the doc
doc.save('./tables.docx')

In [None]:
final_columns = final_columns + list(icc_df['Feature'])
final_columns

In [None]:
df = df[df.columns.intersection(final_columns)]

In [None]:
df

In [None]:
# remaining_features = list(icc_df['Feature'])
# file = open("../../data/initial/T2/t2_features.txt", "w")
# for element in remaining_features:
#     file.write(element + "\n")
# file.close()