## this notebook has been productionized in ./tensor_hero/preprocessing/data.py

In [23]:
# To process the training data, we will use the populated_processed_folder()
from pathlib import Path
import os
from tensor_hero.preprocessing.data import populate_processed_folder, populate_with_simplified_notes, __get_audio_file_name
import shutil

unprocessed_path = Path.cwd() / 'Training Data - Copy' / 'Unprocessed'
processed_path = Path.cwd() / 'Training Data - Copy' / 'Processed'

def check_for_sub_packs(unprocessed_path):
    '''
    Checks whether the track packs in unprocessed_path link directly to songs or if they are organized
    into sub-packs. Returns list of paths of track packs with sub-packs
    
    ~~~~ ARGUMENTS ~~~~
    - unprocessed_path (Path): Path to unprocessed training data
    
    ~~~~ RETURNS ~~~~
    - list of Paths: Paths to track pack directories that contain sub-packs
    '''
    sub_packs = []
    for track_pack in os.listdir(unprocessed_path):
        if track_pack in ['Thumbs.db', '.DS_Store']:
            continue
        for sub_pack in os.listdir(unprocessed_path / track_pack):
            if sub_pack in ['Thumbs.db', '.DS_Store']:
                continue
            for song in os.listdir(unprocessed_path / track_pack / sub_pack):
                if song in ['Thumbs.db', '.DS_Store']:
                    continue
                if os.path.isdir(unprocessed_path / track_pack / sub_pack / song):
                    sub_packs.append(unprocessed_path / track_pack)
                    break
                break
            break
    return sub_packs

sub_packs = check_for_sub_packs(unprocessed_path)
print(sub_packs)

def pop_sub_packs(sub_packs):
    '''
    Takes the songs within the sub-packs and copies them outside their sub-pack directory to the track pack directory
    
    ~~~~ ARGUMENTS ~~~~
    - sub_packs (list of Paths): Path to track pack directories that contain sub-packs
                                 Should be the output of check_for_sub_packs
    '''
    # Parse each sub-pack and move the songs to the track pack directory, then delete the sub-pack
    for track_pack in sub_packs:
        for sub_pack in [track_pack / x for x in os.listdir(track_pack) if x not in ['Thumbs.db', '.DS_Store']]:
            for song in [track_pack / sub_pack / y for y in os.listdir(track_pack / sub_pack) if y not in ['Thumbs.db', '.DS_Store']]:
                # Move song directory outside sub_pack
                if os.path.exists(track_pack / song.stem):
                    shutil.rmtree(track_pack / song.stem)
                shutil.move(song, track_pack / song.stem)
            # Delete sub_pack folder
            shutil.rmtree(sub_pack)

pop_sub_packs(sub_packs)

[WindowsPath('c:/Users/ewais/Documents/GitHub/tensor-hero/Training Data - Copy/Unprocessed/Guitar Hero X')]
