# 5 - Uploading to S.A.M. bucket

## 1. Load .env variables



In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

GITHUB_PROFILE_NAME = os.getenv('GITHUB_PROFILE_NAME')
genre = os.getenv('genre')
file_count = os.getenv('file_count')

## 2. Move manually your .JSON credential
  - In SAM Google Drive, go in data/sam-gcp
  - Put your json-credential.json in /sam_files
  - Open the .env for sam_files (sam_files/notebooks/dataset for fine-tuning/.env)
  - insert this line :
    - `gcp_api_json_path={json-credential.json}` (it's already in sam_files)

## 3. Uploading to Google Cloud Bucket

### 3.1. You need to be in :
  - `../stable-audio-tools-sam/sam_files`

In [2]:
%cd ..
%cd ..

/home/arthurcornelio/code/arthurcornelio88/stable-audio-tools-sam/sam_files/notebooks
/home/arthurcornelio/code/arthurcornelio88/stable-audio-tools-sam/sam_files


### 3.2. Copying folders to bucket

In [3]:
%%bash

# Replace with your actual bucket name
bucket_name="sam-dataset"

# Get the current timestamp
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")

# Create the folder name within the bucket
folder_name="${file_count}_${genre}_files_${timestamp}"

#capture variables
echo "bucket_name=$bucket_name" >> "notebooks/dataset for fine-tuning/.env"
echo "timestamp=$timestamp" >> "notebooks/dataset for fine-tuning/.env"
echo "folder_name=$folder_name" >> "notebooks/dataset for fine-tuning/.env"

# List of source folders you want to upload
source_folders=("json" "dataframes" "audio_files")

# Upload each folder to the bucket
for source_folder in "${source_folders[@]}"
do
    gsutil -m cp -r -L "upload_log_${timestamp}.txt" "$source_folder" "gs://$bucket_name/$folder_name/"
done


Copying file://json/json_classical/361_Epic background music for short video Dramatic orchestral instrumental.json [Content-Type=application/json]...
Copying file://json/json_classical/334_ÏïÑÍ∏∞Îì§Ïùò ÏûêÏû•Í∞Ä2.json [Content-Type=application/json]...
Copying file://json/json_classical/274_U·ªëng N∆∞·ªõc Nh·ªõ Ngu·ªìn - Piano BGM.json [Content-Type=application/json]...
Copying file://json/json_classical/350_Legacy of Chopin. Nocturne No. 20 Hip-Hop version. Background music.json [Content-Type=application/json]...
Copying file://json/json_classical/271_DuÃõoÃõÃÅi AÃÅnh TraÃÜng YeÃÇu ThuÃõoÃõng - Nh·∫°c N·ªÅn Video.json [Content-Type=application/json]...
Copying file://json/json_classical/260_Epic Motivational Trailer.json [Content-Type=application/json]...
Copying file://json/json_classical/46_Cinematic Fairy Tale Story (Main).json [Content-Type=application/json]...
Copying file://json/json_classical/113_Cinematic Action Intro.json [Content-Type=application/json]...
Copying file://json

### 3.3. Verify if uploading operation is successful

In [4]:
from google.cloud import storage
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv(dotenv_path='notebooks/dataset for fine-tuning/.env')

# Access the variables from the .env file
bucket_name = os.getenv('bucket_name')
folder_name = os.getenv('folder_name')

def verify_upload(bucket_name, folder_name, source_folders):
    """Verifies if the specified folder and its subfolders exist in the given GCS bucket."""

    # Get the path to your service account credentials JSON file from the environment variable
    credentials_path = os.getenv('gcp_api_json_path')

    # Explicitly create the storage client using the service account credentials
    storage_client = storage.Client.from_service_account_json(credentials_path)

    bucket = storage_client.bucket(bucket_name)

    # Check if the main folder exists
    blobs = list(bucket.list_blobs(prefix=folder_name + '/'))
    if not blobs:
        print(f"Upload failed or incomplete. Main folder '{folder_name}' not found in bucket '{bucket_name}'.")
        return  # Stop further checks if the main folder is missing

    # Check for the existence of each subfolder within the main folder
    for source_folder in source_folders:
        blobs = list(bucket.list_blobs(prefix=f"{folder_name}/{source_folder}/"))

        if blobs:
            print(f"Upload successful! Subfolder '{source_folder}' and its contents found in bucket '{bucket_name}' under '{folder_name}'.")
            # (Optional) You can iterate through 'blobs' to list individual files if needed
            # for blob in blobs:
            #   print(f"  - {blob.name}")
        else:
            print(f"Upload failed or incomplete. Subfolder '{source_folder}' not found in bucket '{bucket_name}' under '{folder_name}'.")

# List of source folders you want to upload
source_folders = [
    'json',
    'dataframes/checked',
    'dataframes/filtered_by_genre',
    'audio_files/by_genre',
    'audio_files/final_backup',
]

verify_upload(bucket_name, folder_name, source_folders)


Upload successful! Subfolder 'json' and its contents found in bucket 'sam-dataset' under '498_classical_files_2024-08-14_23-31-04'.
Upload successful! Subfolder 'dataframes/checked' and its contents found in bucket 'sam-dataset' under '498_classical_files_2024-08-14_23-31-04'.
Upload successful! Subfolder 'dataframes/filtered_by_genre' and its contents found in bucket 'sam-dataset' under '498_classical_files_2024-08-14_23-31-04'.
Upload successful! Subfolder 'audio_files/by_genre' and its contents found in bucket 'sam-dataset' under '498_classical_files_2024-08-14_23-31-04'.
Upload successful! Subfolder 'audio_files/final_backup' and its contents found in bucket 'sam-dataset' under '498_classical_files_2024-08-14_23-31-04'.


## 4. Delete final operation folders 

In [5]:
%%bash

# Delete the folders and their contents
rm -rf audio_files/final_backup audio_files/by_genre json/*

# All done, bravo ! (by Arthur Corn√©lio, 12th August 2024)