# Cirrus Data Exports By Study
v1.2

---
### Add File Paths Here

In [None]:
# Participants
participants  = '/Users/gary/Desktop/PatientFMPDB05May2016.mer'

# Folder full o' .mer files exported from FM
folder = '/Users/gary/Desktop/cirrus_test/'

---
### Segment Function w/ Clock Fix

In [5]:
import glob, os
import numpy as np
import pandas as pd

def segment(folder, fi_parts):
    # Grab Participants File
    parts = pd.read_csv(fi_parts)
    
    for fi in glob.iglob(folder + '*.mer'):
        # Create DataFrame
        df = pd.read_csv(fi, low_memory=False)

        # Create file pieces
        file_parts = os.path.basename(fi).split('_')
        path = os.path.dirname(fi)

        # Clock fix for Optic Disc Data
        if file_parts[-2] == 'OpticDisc':
            for index, row in df.iterrows():
                if row['EXPORT_VERSION'] == 1.1:
                    temp = [row['OPTICDISC_CLOCKHOUR_' + str(i)] for i in range(1,13)]
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_1', temp[4])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_2', temp[5])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_3', temp[6])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_4', temp[7])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_5', temp[8])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_6', temp[9])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_7', temp[10])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_8', temp[11])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_9', temp[0])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_10', temp[1])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_11', temp[2])
                    df.set_value(index, 'OPTICDISC_CLOCKHOUR_12', temp[3])

        # SEGMENTING
        
        # Case Fix
        parts['rc_researchid'] = parts['rc_researchid'].str.upper()
        df['LAST_NAME'] = df['LAST_NAME'].str.upper()
        df['PATIENT_ID'] = df['PATIENT_ID'].str.upper()

        ########
        # OHTS #
        ########

        # Create list of OHTS IDs
        ohts_ids = parts[parts.rc_ohts_id.notnull()]

        # Filter out the OHTS Participant Data
        ohts = df[df['LAST_NAME'].isin(ohts_ids.rc_researchid)]

        # Append Participant Info to end of row
        ohts = pd.merge(ohts, ohts_ids, left_on='LAST_NAME', right_on='rc_researchid')

        # Clean FIRST_NAME Types
        vals = {'SD': 'S1', 'NY': 'M1'}
        ohts_clean = ohts.replace({'FIRST_NAME': vals})

        # Set Research ID to OHTS ID in two columns
        ohts_clean['LAST_NAME'] = ohts_clean['rc_ohts_id']
        ohts_clean['PATIENT_ID'] = ohts_clean['rc_ohts_id']
        
        # Remove Decimals from IDs and save as strings
        ohts_clean['LAST_NAME'] = ohts_clean['LAST_NAME'].astype(str)
        ohts_clean['LAST_NAME'] = ohts_clean['LAST_NAME'].str.split('.').str.get(0).str.zfill(5)
        ohts_clean['PATIENT_ID'] = ohts_clean['PATIENT_ID'].astype(str)
        ohts_clean['PATIENT_ID'] = ohts_clean['PATIENT_ID'].str.split('.').str.get(0).str.zfill(5)
        
        # Drop join columns from Participants Table
        ohts_clean.drop(ohts_clean.columns[[-1, -2, -3]], axis=1, inplace=True)

        # Save OHTS Participants
        file_parts[1] = 'OHTS'
        ohts_path = path + '/' + file_parts[0] + '_Cirrus_OHTS/'
        
        # Create OHTS sub-directory if it does not yet exist
        if not os.path.exists(ohts_path):
            os.makedirs(ohts_path)
            
        # Save data to file
        ohts_out = ohts_path + "_".join(file_parts)
        pre, ext = os.path.splitext(ohts_out)
        fi_ohts = pre + '.csv'
        ohts_clean.to_csv(fi_ohts, index=False)
        print(os.path.basename(fi_ohts) + ' Created')

        ###############
        # DIGS/ADAGES #
        ###############

        # Create list of Non-OHTS IDs
        digs_ids = parts[parts.rc_study != 'OHTS']

        # Filter out the Non-OHTS Participant Data
        digs = df[df['LAST_NAME'].isin(digs_ids.rc_researchid)]

        # Save DIGS/ADAGES Participants
        file_parts[1] = 'HGC'
        adages_path = path + '/' + file_parts[0] + '_Cirrus_ADAGES_DIGS/'
        
        #Create ADAGES-DIGS sub-directory if does not yet exist
        if not os.path.exists(adages_path):
            os.makedirs(adages_path)
            
        # Save data to File
        digs_out = adages_path + "_".join(file_parts)
        pre, ext = os.path.splitext(digs_out)
        fi_digs = pre + '.csv'
        digs.to_csv(fi_digs, index=False)
        print(os.path.basename(fi_digs) + ' Created')
        
    print('-------------------------------------')
    print('| OMG y\'all, your files are ready!  |')
    print('-------------------------------------')

In [7]:
# Run the segment()
segment(folder, participants)

20160429_OHTS_CirrusOCT_OpticDisc_USABLE.csv Created
20160429_ADAGES-DIGS_CirrusOCT_OpticDisc_USABLE.csv Created
20160429_OHTS_CirrusOCT_GCA_USABLE.csv Created
20160429_ADAGES-DIGS_CirrusOCT_GCA_USABLE.csv Created
20160429_OHTS_CirrusOCT_MTA_USABLE.csv Created
20160429_ADAGES-DIGS_CirrusOCT_MTA_USABLE.csv Created
20160429_OHTS_CirrusOCT_OpticDisc_ALL.csv Created
20160429_ADAGES-DIGS_CirrusOCT_OpticDisc_ALL.csv Created
20160429_OHTS_CirrusOCT_GCA_ALL.csv Created
20160429_ADAGES-DIGS_CirrusOCT_GCA_ALL.csv Created
20160429_OHTS_CirrusOCT_MTA_ALL.csv Created
20160429_ADAGES-DIGS_CirrusOCT_MTA_ALL.csv Created
20160429_OHTS_CirrusOCT_AdvancedRPE_ALL.csv Created
20160429_ADAGES-DIGS_CirrusOCT_AdvancedRPE_ALL.csv Created
20160429_OHTS_CirrusOCT_ScanData_ALL.csv Created
20160429_ADAGES-DIGS_CirrusOCT_ScanData_ALL.csv Created
-------------------------------------
| OMG y'all, your files are ready!  |
-------------------------------------
