# Read in files from GCP Bucket

## pickle files

In [95]:
def readGcpPkl(auth_json, bucket, gcp_filepath):
    """
    Reads in a pkl file from GCP bucket storage
    
    Inputs:
    auth_json = str path to service account json auth file
    bucket = str bucket name
    gcp_filepath = filepath to file in bucket
    
    Outputs:
    data = pickle file contents
    """
    from google.cloud import storage
    import pickle as pkl
    
    # create storage client
    storage_client = storage.Client.from_service_account_json(auth_json)
    # get bucket with name
    bucket = storage_client.get_bucket(bucket)
    # get bucket data as blob
    blob = bucket.get_blob(gcp_filepath)
    # convert to string
    pkl_data = blob.download_as_string()
    
    data = pkl.loads(pkl_data)
    return data

## wav files

In [96]:
def readGcpWav(auth_json, bucket, gcp_filepath, sr):
    """
    Reads in a wav file from GCP bucket storage
    
    Inputs:
    auth_json = str path to service account json auth file
    bucket = str bucket name
    gcp_filepath = filepath to file in bucket
    sr = sampling rate at which to read wav data
    
    Outputs:
    data = wav file contents
    """
    from google.cloud import storage
    import soundfile as sf
    import io
    
    # create storage client
    storage_client = storage.Client.from_service_account_json(auth_json)
    # get bucket with name
    bucket = storage_client.get_bucket(bucket)
    # get bucket data as blob
    blob = bucket.get_blob(gcp_filepath)
    # convert to string
    wav_data = blob.download_as_string()
    
    data, sr = sf.read(io.BytesIO(wav_data))
    return data

## midi files

In [97]:
def readGcpMidi(auth_json, bucket, gcp_filepath):
    """
    Reads in a midi file from GCP bucket storage
    
    Inputs:
    auth_json = str path to service account json auth file
    bucket = str bucket name
    gcp_filepath = filepath to file in bucket
    
    Outputs:
    data = midi file contents
    """
    from google.cloud import storage
    import pretty_midi
    import os
    
    # create storage client
    storage_client = storage.Client.from_service_account_json(auth_json)
    # get bucket with name
    bucket = storage_client.get_bucket(bucket)
    # get bucket data as blob
    blob = bucket.get_blob(gcp_filepath)
    # download as temporary midi file
    blob.download_to_filename('tempFile.midi')
    
    data = pretty_midi.PrettyMIDI('tempFile.midi')
    os.remove('tempFile.midi')
    return data    

## numpy files

In [123]:
def readGcpNpy(auth_json, bucket, gcp_filepath):
    """
    Reads in a npy file from GCP bucket storage
    
    Inputs:
    auth_json = str path to service account json auth file
    bucket = str bucket name
    gcp_filepath = filepath to file in bucket
    
    Outputs:
    data = npy file contents
    """
    from google.cloud import storage
    import numpy as np
    import os
    
    # create storage client
    storage_client = storage.Client.from_service_account_json(auth_json)
    # get bucket with name
    bucket = storage_client.get_bucket(bucket)
    # get bucket data as blob
    blob = bucket.get_blob(gcp_filepath)
    # download as temporary midi file
    blob.download_to_filename('tempFile.npy')
    
    data = np.load('tempFile.npy')
    os.remove('tempFile.npy')
    return data 

In [98]:
# Testing pkl

In [99]:
# this file will need to be saved locally. I have uploaded it to g drive if anyone needs it
auth_json = 'C:/Users/andre/Downloads/Find Your Rhythm-57160f79f377.json'
bucket = 'fyr-audio-data'
gcp_filepath = 'raw-audio/IDMTdict.pkl'

In [100]:
pkl_file = readGcpPkl(auth_json, bucket, gcp_filepath)
type(pkl_file)

dict

In [88]:
# Testing wav

In [101]:
gcp_filepath = 'raw-audio/IDMT/RealDrum01_00#MIX.wav'
sr = 22050

In [103]:
wav_file = readGcpWav(auth_json, bucket, gcp_filepath, sr)
type(wav_file)

numpy.ndarray

In [104]:
# Testing midi
gcp_filepath = 'raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/session1/101_dance-disco_120_beat_4-4_1.midi'

In [106]:
midi_file = readGcpMidi(auth_json, bucket, gcp_filepath)
type(midi_file)

pretty_midi.pretty_midi.PrettyMIDI

## Leverage GCP bucket directories to iterate through files

In [107]:
storage_client = storage.Client.from_service_account_json(auth_json)

In [122]:
[blob.name for blob in storage_client.list_blobs(bucket,max_results=10,prefix='raw-audio/eGMD/') if blob.name[-5:]=='.midi']

['raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/eval_session/10_soul-groove10_102_beat_4-4_1.midi',
 'raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/eval_session/10_soul-groove10_102_beat_4-4_10.midi',
 'raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/eval_session/10_soul-groove10_102_beat_4-4_11.midi',
 'raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/eval_session/10_soul-groove10_102_beat_4-4_12.midi',
 'raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/eval_session/10_soul-groove10_102_beat_4-4_13.midi',
 'raw-audio/eGMD/e-gmd-v1.0.0-midi/drummer1/eval_session/10_soul-groove10_102_beat_4-4_14.midi']

## Upload files to GCP

**Note - it is SIGNIFICANTLY faster to use gsutil**

gsutil -m cp {local_filepath} gs://{bucket}/{gcp_filepath}

In [155]:
def writeGcpFile(auth_json, bucket, gcp_filepath, local_filepath):
    """
    Writes a file to GCP bucket storage
    
    Inputs:
    auth_json = str path to service account json auth file
    bucket = str bucket name
    gcp_filepath = str filepath to file in bucket
    local_filepath = str filepath to local file
    
    Outputs:
    prints a statement showing successful file upload to gcp_filepath
    """
    from google.cloud import storage
    
    # create storage client
    storage_client = storage.Client.from_service_account_json(auth_json)
    # get bucket with name
    bucket = storage_client.bucket(bucket)
    # write local file to GCP
    blob = bucket.blob(gcp_filepath)
    
    # use gsutil command to write file using multi-threading
    #!gsutil -m cp {local_filepath} gs://{bucket}/{gcp_filepath}
    
    blob.upload_from_filename(local_filepath)

    print('File {} uploaded to {}.'.format(
            local_filepath.split('/')[-1],
            gcp_filepath)
         )

In [157]:
# upload example
#file = 'val_x.npy'
#local_filepath = 'C:/Users/andre/Downloads/' + file
#gcp_filepath = 'model_inputs/mdb_idmt_only/' + file


#writeGcpFile(auth_json, bucket, gcp_filepath, local_filepath)

File val_x.npy uploaded to model_inputs/mdb_idmt_only/val_x.npy.
