In [None]:
def update_stats(df):
    # Filter df to only contain acc columns, extract values, flatten, and then 
    # compute min and max stats
    acc_data = df.filter(regex='acc$', axis=1).values.flatten()
    stats['acc']['min'] = acc_data.min()
    stats['acc']['max'] = acc_data.max()


def get_normalization_stats(path):
    # Get all paths to segment files files
    for dirName, subDirs, fileList in os.walk(path):
        if('s01.txt' in fileList):
            for file in fileList:
                # Read data as csv and keep columns that contain 'acc'
                df = read_data(os.path.join(dirName,file))
                # compute min and max stats for all data
                update_stats(df)
    # compute peak to peak (ptp)
    stats['acc']['ptp'] = stats['acc']['max'] - stats['acc']['min']
            
def create_images(data_path, save_path):
    
    for dirName, subDirs, fileList in os.walk(data_path):
        if('segment_1.txt' in fileList):
            for file in fileList:
                # Create normalized df
                df = create_normalized_df(os.path.join(dirName,file))
                # Only care about acc right now.
                df = df.filter(regex='acc$', axis=1)
                # Create image from df, create image name, and save in path
                save_image(df, os.path.join(dirName,file), save_path)
                
            
def save_image(df, original_path, save_path):
    # Create image name from original_path (e.g., 'eating_sheep_58_segment_101.png')
    image_name = create_image_name(original_path)
    # Create array of numbers
    img_array = df.to_numpy().astype(np.uint8).T
    # Create image from array
    im = Image.fromarray(img_array)
    # Save image
    im.save(os.path.join(save_path,image_name))
    
def create_image_name(original_path):
    # Create list with dirName, subDirs, and file 
    # (e.g., 'b_1, sheep_58, segment_101.txt')
    ## .split('/') will give us the list of strings between `/`
    ## the [2:] will keep from the 3rd value on. i.e., leave out the '.' and 'datap'
    unlabeled_data = original_path.split('/')[2:]
    # Replace dirName with predicted_labels_map (e.g., 'b_1' with 'eating')
    unlabeled_data[0] = predicted_labels_map[unlabeled_data[0]].replace(' ', '_')
    # Join unlabeled_data elements using '_' as separator,  and
    # replace '.txt' with '.png'
    return'_'.join(unlabeled_data).replace('.txt', '.png')
            
def read_data(path):
    # Read data as csv and keep those columns that contain 'acc'
    return pd.read_csv(path, head=0).filter(regex='acc', axis=1)
      
def create_normalized_df(path):
        ## TODO reject column_regex if it is not one of
        # acc$, gyro$, mag$, reject
        df = read_data(path)
        #column_key = regex[:len(regex)-1]
        acc_df = df.filter(regex='acc$', axis=1).apply(lambda x: round(255 * (x - stats['acc']['min'])/stats['acc']['ptp']))
        
        return acc_df
         