# Devel
Develop code snippets.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Imports
import sys
sys.path.append('../..')

from pandas import DataFrame, concat
from tqdm.notebook import tqdm
from pprint import pprint

from neurotk import login
from neurotk.girder_utils import get_items

In [None]:
# Authenticate client.
gc = login('https://megabrain.neurology.emory.edu/api/v1', username='jvizcar')

In [None]:
# Get the list of DSA items in the Emory-ADRC collection.
items = []

for item in get_items(gc, '641ba814867536bb7a225533'):
    if item.get('largeImage'):
        items.append(item)
        
print(f'{len(items)} WSIs available.')

In [None]:
# Compile the item info into a data frame.
item_df = []

for item in items:
    meta = item.get('meta', {})
    
    schema = meta.get('npSchema', {})
    clinical = meta.get('npClinical', {})
    
    item_df.append([
        item['_id'],
        item['name'],
        schema.get('caseID', ''),
        schema.get('regionName', ''),
        schema.get('blockID', ''),
        schema.get('stainID', ''),
        clinical.get('CERAD', ''),
        clinical.get('Braak Stage', ''),
        clinical.get('Thal', ''),
        clinical.get('ABC', ''),
        clinical.get('Clin Dx', ''),
        clinical.get('Age at Onset', ''),
        clinical.get('Age at Death/Bx', ''),
        clinical.get('Duration (years)', ''),
        clinical.get('Sex', ''),
        clinical.get('Race', ''),
        item,
        ''
    ])
    
item_df = DataFrame(
    item_df,
    columns=[
        '_id', 'name', 'case', 'region', 'block', 'stain', 'CERAD', 
        'Braak Stage', 'Thal', 'ABC', 'Clinical Diagnosis', 'Age at onset', 
        'Age at death', 'Duration (years)', 'Sex', 'Race', 'item', 'Region'
    ]
)

item_df.head()

In [None]:
for k, v in item_df.region.value_counts().items():
    if k:
        print(f'{k} (n={v})')

In [None]:
for k, v in item_df.region.value_counts().items():
    if 'cerebellum' in k.lower():
        print(f"        \'{k}\', ")

In [None]:
# Group together some regions into a single name.
for i, r in item_df.iterrows():
    region = r.region
    
    if region in (
        'Frontal cortex', 
        'Left Frontal cortex', 
        'Right Frontal cortex', 
        'Frontal cortex, right', 
        'Right frontal cortex', 
    ):
        item_df.loc[i, 'Region'] = 'Frontal cortex'
        
    if 'temporal cortex' in (
        'Temporal cortex', 
        'Left Temporal cortex', 
        'Temporal cortex, superior and middle gyri', 
        'Temporal cortex, inferior and middle gyri', 
        'Temporal cortex, left', 
        'Left Temporal cortex and straight gyrus', 
        'Right temporal cortex, bullet tract', 
        'Left temporal cortex, bullet tract', 
        'Right temporal cortex', 
    ):
        item_df.loc[i, 'Region'] = 'Temporal cortex'
        
    if region in (
        'Parietal cortex', 
        'Left Parietal cortex', 
        'Parietal cortex, left', 
    ):
        item_df.loc[i, 'Region'] = 'Parietal cortex'
        
    if region in (
        'Occipital cortex', 
        'Right occipital cortex', 
        'Left Occipital cortex', 
        'Occipital cortex with hemorrhage', 
        'Occipital cortex with greenish lesion in white matter', 
        'Occipital cortex, left', 
        'Right Occipital cortex and subcortical white matter', 
        'Occipital cortex with white matter degeneration', 
        'Occipital cortex with hemorrhagic infarct', 
        'Parieto-occipital cortex with subarachnoid hemorrhage', 
        'Parieto-occipital cortex with area of infarction', 
        'Possible area of infection, lateral occipital cortex', 
    ):
        item_df.loc[i, 'Region'] = 'Occipital cortex'
        
    if region in (
        'Cingulate cortex', 
        'Anterior cingulate cortex', 
        'Posterior cingulate cortex', 
        'Left Cingulate cortex', 
        'Cingulate cortex, left', 
    ):
        item_df.loc[i, 'Region'] = 'Cingulate cortex'
        
    if region in (
        'Insular cortex', 
        'Anterior basal ganglia and insular cortex', 
        'Insular cortex and anterior basal ganglia', 
        'Anterior basal ganglia and Insular cortex', 
        'Posterior basal ganglia and insular cortex', 
        'Mid-level basal ganglia and insular cortex', 
        'Insular cortex and anterior basal ganglia with nucleus accumbens', 
        'Insular cortex and anterior basal o', 
        'Insular cortex and posterior basal ganglia', 
        'Basal ganglia and Insular cortex', 
        'Mid-level basal ganglia and Insular cortex', 
        'Posterior basal ganglia and Insular cortex', 
        'Left Insular cortex', 
        'Right insular cortex', 
        'Left Cingulate cortex and left insular cortex', 
    ):
        item_df.loc[i, 'Region'] = 'Insular cortex'
        
    if region in (
        'Hippocampus', 
        'Left hippocampus', 
        'Right hippocampus', 
        'Left Hippocampus', 
        'Right Hippocampus', 
        'Hippocampus, left', 
        'Posterior hippocampus', 
    ):
        item_df.loc[i, 'Region'] = 'Hippocampus'
    
    if region in (
        'Amygdala', 
        'Left Amygdala', 
        'Right Amygdala', 
        'Left amygdala', 
        'Amygdala, left', 
        'Right amygdala', 
    ):
        item_df.loc[i, 'Region'] = 'Amygdala'
        
    if region in (
        'Cerebellum and dentate nucleus', 
        'Cerebellum', 
        'Anterior cerebellum', 
        'Medulla, cerebellum', 
    ):
        item_df.loc[i, 'Region'] = 'Cerebellum'

In [None]:
for k, v in item_df.Region.value_counts().items():
    if k:
        print(f'{k} (n={v})')

In [None]:
# Now to a count by case.
cases = item_df.case.unique()

print(f'{len(cases)} number of cases.')

# Identify cases that have all brain regions.
region_counts = {}
good_cases = 0

valid_regions = (
    'Hippocampus', 'Amygdala', 'Frontal cortex', 'Occipital cortex',
    'Parietal cortex', 'Insular cortex', 'Cingulate cortex', 'Cerebellum'
)
good_items = []

for case in cases:
    case_df = item_df[
        (item_df.case == case) & (item_df.stain == 'aBeta') & \
        (item_df.Region.isin(valid_regions))
    ]
    
    case_regions = case_df.Region.unique()
    
    n = len(case_regions)
    
    if f'{n} regions' not in region_counts:
        region_counts[f'{n} regions'] = 0
        
    if n == 8:
        # All regions, check the scores.
        cerad = [s for s in case_df.CERAD.unique() if s]
        braak = [s for s in case_df['Braak Stage'].unique() if s]
        thal = [s for s in case_df.Thal.unique() if s]
        abc = [s for s in case_df.ABC.unique() if s]
        
        if all([len(cerad), len(braak), len(thal), len(abc)]):
            good_cases += 1
        
            # Remove duplicate retions.
            case_df = case_df.sort_values(by='name')
            
            rs = []
            
            if len(case_df) > 8:
                for region in valid_regions:
                    rs.append(case_df[case_df.Region == region].iloc[0])
                    
                case_df = DataFrame(rs)
                
            good_items.append(case_df)
            
    region_counts[f'{n} regions'] += 1
        
df = concat(good_items, ignore_index=True)

    

In [None]:
# Remap some value.
df.ABC = df.ABC.replace({'-2': 'n/a'})

df['Braak Stage'] = df['Braak Stage'].replace({
    '(II)': 'II', '(IV)': 'IV', '(VI)': 'VI', '(V)': 'V'
})

df.Race = df.Race.replace({
    'w': 'Caucasian', 'b': 'Black / African American', 'h (b?)': 'na', '': 'na',
    'hw': 'other', 'h': 'other'
})

In [None]:
# Be very careful - update npSchema and npClinical
for _, r in tqdm(df.iterrows(), total=len(df)):
    item = r['item']
    
    meta = item.get('meta', {})
    schema = meta.get('npSchema', {})
    clinical = meta.get('npClinical', {})
    
    # Update the schema by adding 'region' key value
    schema['region'] = r.Region
    
    # Update the clinical metadata.
    clinical['ABC'] = r.ABC
    clinical['Braak Stage'] = r['Braak Stage']
    clinical['Race'] = r.Race
    
    # You don't have to do this any more.
    # _ = gc.addMetadataToItem(r._id, {'npSchema': schema, 'npClinical': clinical})


In [None]:
# Create neuroTK dataset item.
data = []

for _id in tqdm(df._id.tolist()):
    item = gc.getItem(_id)
    schema = item['meta']['npSchema']
    clinical = item['meta']['npClinical']
    
    data.append({
        '_id': _id,
        'name': item['name'],
        'npSchema': {
            'stainID': schema['stainID'],
            'caseID': schema['caseID'],
            'region': schema['region']
        },
        'npClinical': {
            'ABC': clinical['ABC'],
            'CERAD': clinical['CERAD'],
            'Braak Stage': clinical['Braak Stage'],
            'Thal': clinical['Thal'],
            'Age at Death/Bx': clinical['Age at Death/Bx'],
            'Age at Onset': clinical['Age at Onset'],
            'Clin Dx': clinical['Clin Dx'],
            'Duration (years)': clinical['Duration (years)'],
            'Race': clinical['Race'],
            'Sex': clinical['Sex']      
        }
    })