# Copy from S3 to EFS (SageMaker)

In [2]:
!pwd

/root


In [7]:
import os
import boto3

# Set the working directory to the EFS home folder
efs_home_dir = '/root'
os.chdir(efs_home_dir)

# S3 bucket and folder details
s3_bucket = 'w210-capstone'
s3_prefix = '01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/'

# Initialize S3 client
s3 = boto3.client('s3')

# List all objects in the specified S3 folder
response = s3.list_objects_v2(Bucket=s3_bucket, Prefix=s3_prefix)

# Check if the response contains 'Contents' (i.e., there are files in the folder)
if 'Contents' in response:
    for obj in response['Contents']:
        s3_key = obj['Key']
        
        # Get the relative path by removing the prefix from the key
        relative_path = os.path.relpath(s3_key, s3_prefix)
        local_filepath = os.path.join(efs_home_dir, relative_path)
        
        # Ensure the local directory structure is created correctly
        local_file_dir = os.path.dirname(local_filepath)
        os.makedirs(local_file_dir, exist_ok=True)
        
        # Download the file from S3 and save it locally
        try:
            temp_file_path = local_filepath + ".tmp"
            s3.download_file(s3_bucket, s3_key, temp_file_path)
            os.rename(temp_file_path, local_filepath)
            print(f"Downloaded {s3_key} to {local_filepath}")
        except OSError as e:
            print(f"Skipping {s3_key} due to OSError: {e}")

print("All files downloaded successfully.")

Skipping 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/ due to OSError: [Errno 16] Device or resource busy: '/root/..tmp' -> '/root/.'
Downloaded 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/.DS_Store to /root/.DS_Store
Downloaded 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/06.Evaluations/.DS_Store to /root/06.Evaluations/.DS_Store
Downloaded 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/07.Scripts/20240530_auto_backup_nightly.sh to /root/07.Scripts/20240530_auto_backup_nightly.sh
Downloaded 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/07.Scripts/20240530_bgl_parser_v1.10.py to /root/07.Scripts/20240530_bgl_parser_v1.10.py
Downloaded 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/07.Scripts/20240530_manual_backup_full.sh to /root/07.Scripts/20240530_manual_backup_full.sh
Downloaded 01.Backups/01.Manual/20240604-EFS-Recovery-Primary-GOLD/07.Scripts/20240531_bgl_sample_v1.50.py to /root/07.Scripts/20240531_bgl_sample_v1.50.py

In [5]:
import os
import boto3

# Set the working directory to root
os.chdir('/root')

# S3 bucket details
s3_bucket = 'w210-capstone'
s3_key = '11.Data/01.BlueGene/02.Raw_Parquet/part-00000-2f0ccaac-9d99-40b3-a6c0-e95514a07a1e-c000.snappy.parquet'

# Local directory and file path in EFS
local_dir = '11.Data/01.BGL/02.Raw_Parquet_CFDR'
local_filename = 'part-00000-2f0ccaac-9d99-40b3-a6c0-e95514a07a1e-c000.snappy.parquet'
local_filepath = os.path.join(local_dir, local_filename)

# Create the local directory if it doesn't exist
os.makedirs(local_dir, exist_ok=True)

# Download the file from S3 and save it locally
s3 = boto3.client('s3')
s3.download_file(s3_bucket, s3_key, local_filepath)

print(f"Downloaded {s3_key} to {local_filepath}")

Downloaded 11.Data/01.BlueGene/02.Raw_Parquet/part-00000-2f0ccaac-9d99-40b3-a6c0-e95514a07a1e-c000.snappy.parquet to 11.Data/01.BGL/02.Raw_Parquet_CFDR/part-00000-2f0ccaac-9d99-40b3-a6c0-e95514a07a1e-c000.snappy.parquet
