**Christine Sako, DATASCI W210, SPRING 2026**

# Retrieving and Saving the Free Music Archive (FMA) from AWS


## Installs, Imports, and Mounting GDrive

In [None]:
import sys
!{sys.executable} -m pip install boto3
import boto3
from google.colab import userdata
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Accessing AWS, Defining Filepaths, and Confirming Files

In [None]:
# Retrieving AWS credentials
aws_access_key_id = userdata.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = userdata.get('AWS_SECRET_ACCESS_KEY')

# Creating S3 client
s3 = boto3.client(
    's3',
    aws_access_key_id = aws_access_key_id,
    aws_secret_access_key = aws_secret_access_key,
    region_name = 'us-west-2'
)

# Accessing S3 FMA bucket
bucket_name = 'mids-capstone-music-ad-matching-2026'
prefix = 'raw-data/fma/'

# Defining GDrive filepath
local_dir = '/content/drive/MyDrive/SPRING 2026/DATASCI 210/Capstone/Data/FMA'

# Checking the GDrive destination directory exists
os.makedirs(local_dir, exist_ok = True)

# Listing objects in the FMA folder
response = s3.list_objects_v2(
    Bucket = bucket_name,
    Prefix = prefix
)

# Printing keys to confirm contents
for obj in response.get('Contents', []):
    print(obj['Key'])



raw-data/fma/
raw-data/fma/echonest.csv
raw-data/fma/features.csv
raw-data/fma/genres.csv
raw-data/fma/tracks.csv


## Retreiving and Storing Data

In [None]:
# Looping through each CSV file
for obj in response.get('Contents', []):
    key = obj['Key']

    # Skipping the folder key
    if key.endswith('/'):
        continue

    # Downliading CSV files
    if not key.endswith('.csv'):
        continue

    filename = os.path.basename(key)
    local_path = os.path.join(local_dir, filename)

    s3.download_file(bucket_name, key, local_path)
    print(f"Downloaded {filename}")

Downloaded echonest.csv
Downloaded features.csv
Downloaded genres.csv
Downloaded tracks.csv
