# Create LST files
## These will be the input to our image classifier algorithm

### Change directory (if necessary)

In [22]:
import os
import boto3

s3r = boto3.resource('s3')
bucket = s3r.Bucket('audio-classifier-data')

In [16]:
if not os.path.isdir('../data/mel-spectrogram/train_lst'): 
    os.mkdir('../data/mel-spectrogram/train_lst')

In [17]:
if not os.path.isdir('../data/mel-spectrogram/validate_lst'): 
    os.mkdir('../data/mel-spectrogram/validate_lst')

### Save the following code as script files - create-lst-train.sh and create-lst-val.sh

#!/bin/bash
A_file_path="../data/mel-spectrogram/train/eastern-whipbird"
A_identifier="0"
B_file_path="../data/mel-spectrogram/train/kookaburra"
B_identifier="1"
C_file_path="../data/mel-spectrogram/train/willie-wagtail"
C_identifier="2"


output_file_name="../data/mel-spectrogram/train_lst/bird-audio-train.lst"

# output identifier and file name then append to same file, make the index last

declare -a array

A_file_list=$(find $A_file_path -type f | rev | cut -d'/' -f-2 | rev | sort -n)

for A_file_name in $A_file_list;
do
 array+=("$A_identifier $A_file_name")
done

B_file_list=$(find $B_file_path -type f | rev | cut -d'/' -f-2 | rev | sort -n)

for B_file_name in $B_file_list;
do
 array+=("$B_identifier $B_file_name")
done

C_file_list=$(find $C_file_path -type f | rev | cut -d'/' -f-2 | rev | sort -n)

for C_file_name in $C_file_list;
do
 array+=("$C_identifier $C_file_name")
done

## add an index key at the beginning over every element of output and write to newfile

len=${#array[@]}
i=0
while [ $i -lt $len ]; do
    echo "$i ${array[$i]}"
    let i++
done | tr [:blank:] \\t > $output_file_name

### Give executable permissions to the script files

In [18]:
!chmod 777 create-lst-train.sh
!chmod 777 create-lst-val.sh

### Run the script files to get LST files

In [19]:
!./create-lst-train.sh
!./create-lst-val.sh

### Count how many records in LST files

In [20]:
!wc -l ../data/mel-spectrogram/train_lst/bird-audio-train.lst

578 ../data/mel-spectrogram/train_lst/bird-audio-train.lst


In [21]:
!wc -l ../data/mel-spectrogram/validate_lst/bird-audio-validate.lst

144 ../data/mel-spectrogram/validate_lst/bird-audio-validate.lst


### Upload the LST files to S3

In [23]:
path = '../data/mel-spectrogram/train_lst'
for subdir, dirs, files in os.walk(path):
    for file in files:
        full_path = os.path.join(subdir, file)
        s3_path = '/'.join(full_path.split('/')[2:])
        with open(full_path, 'rb') as data:
            bucket.put_object(Key=s3_path, Body=data)

In [24]:
path = '../data/mel-spectrogram/validate_lst'
for subdir, dirs, files in os.walk(path):
    for file in files:
        full_path = os.path.join(subdir, file)
        s3_path = '/'.join(full_path.split('/')[2:])
        with open(full_path, 'rb') as data:
            bucket.put_object(Key=s3_path, Body=data)