In [2]:
# data-sorter.py: Sorts data into directories

import os
import pandas as pd
from shutil import copyfile

In [3]:
### By Date

# HELPER FUNCTIONS
def get_dataframe(sensor, filename):
    return pd.read_csv(f'./input/Sensor {sensor}/{filename}', skiprows=1, usecols=['Date', 'Time'])

def get_time_label(timestamp):
    timestamp = (timestamp + 20000)
        
    if 0 < timestamp < (120000):
        return 'AM' # before lunch
    if 120000 <= timestamp < 240000:
        return 'PM' # after lunch
    
def get_date_label(date):
    date = str(date)
    
    return date[:4] + '-' + date[4:6] + '-' + date[6:]

def get_duplicates(folder, base_name):
    """ returns the number of files that start with base_name"""
    
    # Get files in directory
    files = os.listdir(folder)
    
    # Get all files that starts with base_name
    base_files = [f for f in files if f.startswith(base_name)]
    
    # Return only length
    return len(base_files)

# Sensors in input to go through
#sensors = ['A', 'B', 'C', 'D', 'E']
sensors = ['1', '2', '3', '4', '5', '6']

# Go through every sensor
for current_sensor in sensors:
    print(f'Extracting data from Sensor {current_sensor}...')

    # Get all data files from sensor
    filenames = os.listdir(f'./input/Sensor {current_sensor}')
    filenames = sorted(filenames)

    for filename in filenames:
        src_path = f'./input/Sensor {current_sensor}/{filename}'

        # Load dataframe
        df = get_dataframe(current_sensor, filename)
        
        if len(df) == 0: continue
        
        # Extract date
        date = df['Date'][0]
        date_label = get_date_label(date)

        # Check date is not zero
        if date_label == '0--': continue

        # Extract time
        timestamp = df['Time'][0]
        time_label = get_time_label(timestamp)

        # Create new folder if not exists
        folder_path = f'./by-date/{date_label}'

        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        # Label the file (format: A-Morning)
        new_filename = f'{current_sensor}-{time_label}'

        # Check for duplicate file names
        duplicates = get_duplicates(folder_path, new_filename)

        if duplicates > 0:
            # Append number to end
            new_filename += str(duplicates + 1)

        new_filename += '.csv'

        # Copy file from input to sorted file structure
        dst_path = folder_path + '/' + new_filename
        
        # UNCOMMENT TO COPY FILES
        copyfile(src_path, dst_path)
        print(f'\tCopied {src_path} --> {dst_path}')

os.listdir('./by-date')
os.listdir('./by-date/2021-10-12')

Extracting data from Sensor 1...
	Copied ./input/Sensor 1/B1F0008.CSV --> ./by-date/2021-09-03/1-PM.csv
	Copied ./input/Sensor 1/B1F0009.CSV --> ./by-date/2021-09-04/1-PM.csv
	Copied ./input/Sensor 1/B1F0010.CSV --> ./by-date/2021-09-06/1-AM.csv
	Copied ./input/Sensor 1/B1F0011.CSV --> ./by-date/2021-09-07/1-AM.csv
	Copied ./input/Sensor 1/B1F0012.CSV --> ./by-date/2021-09-07/1-AM2.csv
	Copied ./input/Sensor 1/B1F0013.CSV --> ./by-date/2021-09-07/1-PM.csv
	Copied ./input/Sensor 1/B1F0014.CSV --> ./by-date/2021-09-07/1-PM2.csv
	Copied ./input/Sensor 1/B1F0015.CSV --> ./by-date/2021-09-07/1-PM3.csv
	Copied ./input/Sensor 1/B1F0016.CSV --> ./by-date/2021-09-08/1-AM.csv
	Copied ./input/Sensor 1/B1F0017.CSV --> ./by-date/2021-09-08/1-PM.csv
	Copied ./input/Sensor 1/B1F0018.CSV --> ./by-date/2021-09-09/1-AM.csv
	Copied ./input/Sensor 1/B1F0019.CSV --> ./by-date/2021-09-09/1-PM.csv
	Copied ./input/Sensor 1/B1F0020.CSV --> ./by-date/2021-09-09/1-PM2.csv
	Copied ./input/Sensor 1/B1F0021.CSV -->

	Copied ./input/Sensor 4/1ED0005.CSV --> ./by-date/2021-09-08/4-PM.csv
	Copied ./input/Sensor 4/1ED0006.CSV --> ./by-date/2021-09-09/4-AM.csv
	Copied ./input/Sensor 4/1ED0007.CSV --> ./by-date/2021-09-09/4-PM.csv
	Copied ./input/Sensor 4/1ED0008.CSV --> ./by-date/2021-09-10/4-AM.csv
	Copied ./input/Sensor 4/1ED0009.CSV --> ./by-date/2021-09-10/4-AM2.csv
	Copied ./input/Sensor 4/1ED0010.CSV --> ./by-date/2021-09-15/4-PM.csv
	Copied ./input/Sensor 4/1ED0011.CSV --> ./by-date/2021-09-16/4-AM.csv
	Copied ./input/Sensor 4/1ED0012.CSV --> ./by-date/2021-09-16/4-PM.csv
	Copied ./input/Sensor 4/1ED0013.CSV --> ./by-date/2021-09-16/4-PM2.csv
	Copied ./input/Sensor 4/1ED0014.CSV --> ./by-date/2021-09-23/4-AM.csv
	Copied ./input/Sensor 4/1ED0016.CSV --> ./by-date/2021-09-26/4-PM.csv
	Copied ./input/Sensor 4/1ED0017.CSV --> ./by-date/2021-09-27/4-AM.csv
	Copied ./input/Sensor 4/1ED0018.CSV --> ./by-date/2021-09-28/4-AM.csv
	Copied ./input/Sensor 4/1ED0019.CSV --> ./by-date/2021-09-30/4-AM.csv
	Cop

	Copied ./input/Sensor 6/2E40141.CSV --> ./by-date/2021-10-12/6-PM.csv
	Copied ./input/Sensor 6/2E40142.CSV --> ./by-date/2021-10-13/6-AM.csv
	Copied ./input/Sensor 6/2E40143.CSV --> ./by-date/2021-10-13/6-AM2.csv
	Copied ./input/Sensor 6/2E40144.CSV --> ./by-date/2021-10-13/6-AM3.csv
	Copied ./input/Sensor 6/2E40145.CSV --> ./by-date/2021-10-13/6-AM4.csv
	Copied ./input/Sensor 6/2E40146.CSV --> ./by-date/2021-10-13/6-AM5.csv
	Copied ./input/Sensor 6/2E40147.CSV --> ./by-date/2021-10-13/6-AM6.csv
	Copied ./input/Sensor 6/2E40148.CSV --> ./by-date/2021-10-13/6-AM7.csv
	Copied ./input/Sensor 6/2E40149.CSV --> ./by-date/2021-10-13/6-AM8.csv
	Copied ./input/Sensor 6/2E40150.CSV --> ./by-date/2021-10-13/6-AM9.csv
	Copied ./input/Sensor 6/2E40151.CSV --> ./by-date/2021-10-13/6-AM10.csv
	Copied ./input/Sensor 6/2E40152.CSV --> ./by-date/2021-10-13/6-AM11.csv
	Copied ./input/Sensor 6/2E40153.CSV --> ./by-date/2021-10-13/6-AM12.csv
	Copied ./input/Sensor 6/2E40154.CSV --> ./by-date/2021-10-13/6

['stations.txt',
 '1-PM.csv',
 'C-PM.csv',
 'B-AM.csv',
 'A-PM.csv',
 '2-AM.csv',
 '3-PM.csv',
 'D-AM.csv',
 'E-PM.csv',
 '6-AM.csv',
 '5-PM.csv',
 '4-AM.csv',
 'C-AM.csv',
 'B-PM.csv',
 '1-AM.csv',
 '2-PM.csv',
 '3-AM.csv',
 'A-AM.csv',
 '6-PM.csv',
 'D-PM.csv',
 'E-AM.csv',
 '5-AM.csv',
 '4-PM.csv']