### Segment Function

In [36]:
import glob, os
import numpy as np
import pandas as pd

def segment(folder, fi_parts):
    # Grab Participants File
    parts = pd.read_csv(fi_parts)
    
    for fi in glob.iglob(folder + '*.mer'):
        # Create DataFrame
        df = pd.read_csv(fi, low_memory=False)
        
        # Create file pieces
        file_parts = os.path.basename(fi).split('_')
        path = os.path.dirname(fi)

        # SEGMENTING
        
        # Case Fix
        parts['rc_researchid'] = parts['rc_researchid'].str.upper()
        df['Lastname'] = df['Lastname'].str.upper()
        df['PatientID'] = df['PatientID'].str.upper()

        ########
        # OHTS #
        ########

        # Create list of OHTS IDs
        ohts_ids = parts[parts.rc_ohts_id.notnull()]

        # Filter out the OHTS Participant Data
        ohts = df[df['Lastname'].isin(ohts_ids.rc_researchid)]

        # Append Participant Info to end of row
        ohts = pd.merge(ohts, ohts_ids, left_on='Lastname', right_on='rc_researchid')
        
        # Clean FIRST_NAME Types
        vals = {'SD': 'S1', 'NY': 'M1'}
        ohts_clean = ohts.replace({'Firstname': vals})

        # Set Research ID to OHTS ID in two columns
        ohts_clean['Lastname'] = ohts_clean['rc_ohts_id']
        ohts_clean['PatientID'] = ohts_clean['rc_ohts_id']
        
        # Remove Decimals from IDs and save as strings
        ohts_clean['Lastname'] = ohts_clean['Lastname'].astype(str)
        ohts_clean['Lastname'] = ohts_clean['Lastname'].str.split('.').str.get(0).str.zfill(5)
        ohts_clean['PatientID'] = ohts_clean['PatientID'].astype(str)
        ohts_clean['PatientID'] = ohts_clean['PatientID'].str.split('.').str.get(0).str.zfill(5)
        
        # Drop join columns from Participants Table
        ohts_clean.drop(ohts_clean.columns[[-1, -2, -3]], axis=1, inplace=True)
        
        # Save OHTS Participants
        file_parts[1] = 'OHTS'
        ohts_path = path + '/' + file_parts[0] + '_Spectralis_OHTS/'
        
        # Create OHTS sub-directory if it does not yet exist
        if not os.path.exists(ohts_path):
            os.makedirs(ohts_path)
            
        # Save data to file
        ohts_out = ohts_path + "_".join(file_parts)
        pre, ext = os.path.splitext(ohts_out)
        fi_ohts = pre + '.csv'
        ohts_clean.to_csv(fi_ohts, index=False)
        print os.path.basename(fi_ohts) + ' Created'

        ###############
        # DIGS/ADAGES #
        ###############

        # Create list of Non-OHTS IDs
        digs_ids = parts[parts.rc_study != 'OHTS']

        # Filter out the Non-OHTS Participant Data
        digs = df[df['Lastname'].isin(digs_ids.rc_researchid)]

        # Save DIGS/ADAGES Participants
        file_parts[1] = 'ADAGES-DIGS'
        adages_path = path + '/' + file_parts[0] + '_Spectralis_ADAGES_DIGS/'
        
        #Create ADAGES-DIGS sub-directory if does not yet exist
        if not os.path.exists(adages_path):
            os.makedirs(adages_path)
            
        # Save data to File
        digs_out = adages_path + "_".join(file_parts)
        pre, ext = os.path.splitext(digs_out)
        fi_digs = pre + '.csv'
        digs.to_csv(fi_digs, index=False)
        print os.path.basename(fi_digs) + ' Created'
        
    print '-------------------------------------'
    print '| OMG y\'all, your files are ready!  |'
    print '-------------------------------------'


# Add File Paths Here
-----

In [37]:
# Participants
participants  = '/Users/gary/Desktop/PatientFMPDB05May2016.mer'

# Folder full o' .mer
folder = '/Users/gary/Desktop/spectralis_test/'

segment(folder, participants)

20160505_OHTS_Spectralis_RNFL_Usable.csv Created
20160505_ADAGES-DIGS_Spectralis_RNFL_Usable.csv Created
20160505_OHTS_Spectralis_RNFL_ALL.csv Created
20160505_ADAGES-DIGS_Spectralis_RNFL_ALL.csv Created
-------------------------------------
| OMG y'all, your files are ready!  |
-------------------------------------
