# Spectralis P.Pole Thickness Map
v1.9

----
### Add File Paths Here

In [55]:
# Participants file that ends with `.mer`
participants  = '/Users/gary/Desktop/GaryPPole/20160815RCPts.mer'

# Quality Control File
quality_control = '/Users/gary/Desktop/GaryPPole/FMPQC22Aug2016.mer'

# Folder full o' .mer files exported from FM
folder = '/Users/gary/Desktop/GaryPPole/20160823OHTS_SpectralisThickMap/'

---
### Segment Function

In [56]:
import glob, os
import numpy as np
import pandas as pd
import io

def segment(folder, qc, participants):
    rnfl_timey = ''
    ipl_timey = ''
    gcl_timey = ''
    # Create Empty DataFrame for failed QC Links
    df_unlinked = pd.DataFrame()
    
    #Create empty OHTS DataFrames
    ohts_ipl_df = pd.DataFrame()
    ohts_gcl_df = pd.DataFrame()
    ohts_rnfl_df = pd.DataFrame()
    
    # Create Participants DataFrame
    pts = pd.read_csv(participants)
    pts['rc_researchid'] = pts['rc_researchid'].astype(str)     
    pts['rc_researchid'] = pts['rc_researchid'].str.upper()
    ohts_ids = pts[pts.rc_ohts_id.notnull()]
    pts['rc_ohts_id'] = pts['rc_ohts_id'].astype(str)       
    pts['rc_ohts_id'] = pts['rc_ohts_id'].str.zfill(5)
    pts['rc_researchid'] = pts['rc_researchid'].str.zfill(5) 
    
    
    #Import and format QC file
    qc = pd.read_csv(qc, parse_dates=True, dayfirst=True, low_memory=False)
    qc['ExamDate'] = qc['ExamDate'].astype(str)
    qc.rename(columns={'ID': 'PatientID'}, inplace=True)
    qc['PatientID'] = qc['PatientID'].astype(str).str.zfill(5)

    # Create 2016 list
    year_2016 = qc[['PatientID', 'ExamDate']].where(qc['ExamDate'].str.slice(-2) == '16')
    for fi in glob.iglob(folder + '*.txt'):
        df = pd.read_csv(fi, sep='\t', encoding='iso-8859-1', low_memory=False)
        df['PatientID'] = df['PatientID'].astype(str)
        df['PatientID'] = df['PatientID'].str.zfill(5)
        df['PatientID'] = df['PatientID'].str.upper()
        df['ExamDate'] = df['ExamDate'].astype(str)
        year_2016 = year_2016.append(df[['PatientID', 'ExamDate']].where(df['ExamDate'].str.slice(-2) == '16'))      
    year_2016 = year_2016[year_2016['PatientID'].notnull()]
    year_2016 = year_2016.drop_duplicates()
    
    # Process Output Files
    for fi in glob.iglob(folder + '*.txt'):
        # Current Data File
        df = pd.read_csv(fi, sep='\t', encoding='iso-8859-1', low_memory=False)
        
        # Create file pieces
        file_parts = os.path.basename(fi).split('_')
        input_file = os.path.basename(fi).upper()
        path = os.path.dirname(fi)
        timey = file_parts[0]
        namey = ''
        save_path = path + '/' + 'PPOLE_OUTPUT/'
        
        # Create Sub-Directory if one does not yet exist
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        
        if 'IPL' in input_file:
            fi_type = 'IPL'
        elif 'GCL' in input_file:
            fi_type = 'GCL'
        elif 'RNFL' in input_file:
            fi_type = 'RNFL'
        
        # CASE FIX
        df['Lastname'] = df['Lastname'].astype(str)
        df['Lastname'] = df['Lastname'].str.zfill(5)
        df['Lastname'] = df['Lastname'].str.upper()
        df['PatientID'] = df['PatientID'].astype(str)
        df['PatientID'] = df['PatientID'].str.zfill(5)
        df['PatientID'] = df['PatientID'].str.upper()
        df['ExamDate'] = df['ExamDate'].astype(str)
        df['ExamMonth'] = df['ExamDate'].str.split('/').str[0].str.zfill(2)
        df['ExamDay'] = df['ExamDate'].str.split('/').str[1].str.zfill(2)
        df['ExamYear'] = df['ExamDate'].str.split('/').str[2].str.zfill(4)
        df['ExamDate'] = df['ExamMonth'].map(str) + '/' + df['ExamDay'].map(str) + '/' + df['ExamYear'].map(str)

        df['Lastname'] = df['Lastname'].str.upper()
        df['PatientID'] = df['PatientID'].str.upper()
        
             # Parse year
        df['2016 OHTS visit'] = np.where(df['PatientID'].isin(year_2016['PatientID']), 'Yes', 'No')

        df_all = pd.merge(df, \
                            qc[['ExamDate', 'ExamTimeRAW', \
                                'ExaminedStruct', 'NumBScans', 'OCTHIRES', 'QCExt1CorrectScanType', \
                                'QCQualityScore', 'QCSignalStrength', 'ArtMeanImages', 'QCOCTArtImageMode', \
                                'QCScanPatternCentered', 'QCScanClipped', 'QCSegAlgFail', 'QCFloaters', \
                                'QCArtifacts', 'QCIllumination', 'QCPossiblePathology', 'QCScanComplete', \
                                'QCSettings', 'QCDataAvailable', 'QCImageUsable']], \
                            how='left', \
                            left_on=['ExamDate', 'ExamTime'], \
                            right_on=['ExamDate', 'ExamTimeRAW'])
        df_all.drop(['ExamTimeRAW', 'ExamMonth', 'ExamDay', 'ExamYear'], axis=1, inplace=True)
        
        cols = list(df_all.columns.values)
        cols.pop(cols.index('2016 OHTS visit'))
        df_all = df_all[cols+['2016 OHTS visit']]
        # Add unlinked QC files to Unlinked DataFrame
        df_noqc = df_all[df_all.ExaminedStruct.isnull()]
        
        if df_unlinked.empty:
            df_unlinked = df_noqc
        else:
            df_unlinked = df_unlinked.append(df_noqc)
        
   
        
        # Clean FIRST_NAME Types
        vals = {'SD': 'S1', 'NY': 'M1'}
        df_all.replace({'Firstname': vals}, inplace=True)
        
        if 'OHTS' in input_file:
            if fi_type == 'IPL':
                ipl_timey = timey
                
                if ohts_ipl_df.empty:
                    ohts_ipl_df = df_all
                else:
                    ohts_ipl_df = ohts_ipl_df.append(df_all)
            elif fi_type == 'GCL':
                gcl_timey = timey
                
                if ohts_gcl_df.empty:
                    ohts_gcl_df = df_all
                else:
                    ohts_gcl_df = ohts_gcl_df.append(df_all)
            elif fi_type == 'RNFL':
                rnfl_timey = timey
                
                if ohts_rnfl_df.empty:
                    ohts_rnfl_df = df_all
                else:
                    ohts_rnfl_df = ohts_rnfl_df.append(df_all)
            else:
                print("Not a supported file type: " + input_file)
        
            
        # If Shiley Scan create Output Files
        elif 'DSPEC' in input_file:
            
            #Create list of OHTS IDs
            ohts = df_all[df_all['Lastname'].isin(ohts_ids.rc_researchid)]
            ohts = pd.merge(ohts, ohts_ids, left_on='Lastname', right_on='rc_researchid')
            ohts['Lastname'] = ohts['Lastname'].astype(str)
            ohts['PatientID'] = ohts['PatientID'].astype(str)
            ohts['Lastname'] = ohts['rc_ohts_id']
            ohts['PatientID'] = ohts['rc_ohts_id']
            # Drop join columns from Participants Table
            ohts.drop(ohts.columns[[-1, -2, -3]], axis=1, inplace=True)
            
            # Extract OHTS Participants
            if fi_type == 'IPL':
                if ohts_ipl_df.empty:
                    ohts_ipl_df = ohts
                else:
                    ohts_ipl_df = ohts_ipl_df.append(ohts)
            elif fi_type == 'GCL':
                if ohts_gcl_df.empty:
                    ohts_gcl_df = ohts
                else:
                    ohts_gcl_df = ohts_gcl_df.append(df_all)
            elif fi_type == 'RNFL':
                if ohts_rnfl_df.empty:
                    ohts_rnfl_df = ohts
                else:
                    ohts_rnfl_df = ohts_rnfl_df.append(df_all)
            else:
                print("Not a supported file type: " + input_file)
                
            # Save HGC Files
            segey = timey + '_HGC_Spectralis_ThMap' + fi_type
            df_all.to_csv(save_path + '/' + segey + '_ALL.csv', index=False)
            print(segey + '_ALL.csv' + ' Created')
            
            # Create File of Usable Scans
            df_usable = df_all[df_all.QCImageUsable == 'Yes']
            df_usable.to_csv(save_path + '/' + segey + '_USABLE.csv', index=False)
            print(segey + '_USABLE.csv' + ' Created')

   
    # Save all 3 OHTS DataFrames
    if not ohts_ipl_df.empty:
        ohts_ipl_df.to_csv(save_path + '/' + timey + '_OHTS_Spectralis_ThMapIPL_ALL.csv', index=False)
        
        # Create Usable CSV
        ohts_ipl_usable = ohts_ipl_df[ohts_ipl_df.QCImageUsable == 'Yes']
        ohts_ipl_usable.to_csv(save_path + '/' + timey + '_OHTS_Spectralis_ThMapIPL_USABLE.csv', index=False)
        
        print('OHTS IPL USABLE & ALL files created.')
        
    if not ohts_gcl_df.empty:
        ohts_gcl_df.to_csv(save_path + '/' + timey + '_OHTS_Spectralis_ThMapGCL_ALL.csv', index=False)
        
        # Create Usable CSV
        ohts_gcl_usable = ohts_gcl_df[ohts_gcl_df.QCImageUsable == 'Yes']
        ohts_gcl_usable.to_csv(save_path + '/' + timey + '_OHTS_Spectralis_ThMapGCL_USABLE.csv', index=False)                          
 
        print('OHTS GCL USABLE & ALL files created.')
            
    if not ohts_rnfl_df.empty:
        ohts_rnfl_df.to_csv(save_path + '/' + timey + '_OHTS_Spectralis_ThMapRNFL_ALL.csv', index=False)

        # Create Usable CSV
        ohts_rnfl_usable = ohts_rnfl_df[ohts_rnfl_df.QCImageUsable == 'Yes']
        ohts_rnfl_usable.to_csv(save_path + '/' + timey + '_OHTS_Spectralis_ThMapRNFL_USABLE.csv', index=False)                          
 
        print('OHTS RNFL USABLE & ALL files created.')
                       
    # Save Usable Scans from OHTS DataFrames
    
    if df_unlinked.shape != (0, 47):
        df_unlinked.to_csv(save_path + '/ThMapNOTlinkedtoQC.csv', index=False)
        print('ThMapNOTlinkedtoQC.csv Created')
    else:
        print('All files linked to QC! \'No ThMapNOTlinkedtoQC.csv\' created.')
            
    print('-------------------------------------')
    print('| OMG y\'all, your files are ready!  |')
    print('-------------------------------------')

In [57]:
# Run the segment()
segment(folder, quality_control, participants)

20160822_HGC_Spectralis_ThMapRNFL_ALL.csv Created
20160822_HGC_Spectralis_ThMapRNFL_USABLE.csv Created
20160823_HGC_Spectralis_ThMapGCL_ALL.csv Created
20160823_HGC_Spectralis_ThMapGCL_USABLE.csv Created
20160823_HGC_Spectralis_ThMapIPL_ALL.csv Created
20160823_HGC_Spectralis_ThMapIPL_USABLE.csv Created
OHTS IPL USABLE & ALL files created.
OHTS GCL USABLE & ALL files created.
OHTS RNFL USABLE & ALL files created.
ThMapNOTlinkedtoQC.csv Created
-------------------------------------
| OMG y'all, your files are ready!  |
-------------------------------------
