In [27]:
from minio import Minio
import os

# -----------------------------
# 1. Connect to MinIO
# -----------------------------
client = Minio(
    "localhost:9000",
    access_key="admin",
    secret_key="password123",
    secure=False
)

bucket_name = "landing-temporal"
if not client.bucket_exists(bucket_name):
    client.make_bucket(bucket_name)
    print(f"Created bucket: {bucket_name}")
    



In [19]:
from datasets import load_dataset
import kagglehub
import os
# -----------------------------
# 2. Download Animal Dataset via KaggleHub
# -----------------------------

# Create local folder structure
base_dir = "landing_zone/temporal"
raw_dir = os.path.join(base_dir, "animal_raw")
os.makedirs(raw_dir, exist_ok=True)

print(f"📁 Local folders ready at {raw_dir}")

dataset_path = kagglehub.dataset_download(
    "iamsouravbanerjee/animal-image-dataset-90-different-animals")

print(f"✅ Dataset downloaded to: {dataset_path}")


📁 Local folders ready at landing_zone/temporal\animal_raw
Downloading from https://www.kaggle.com/api/v1/datasets/download/iamsouravbanerjee/animal-image-dataset-90-different-animals?dataset_version_number=5...


100%|███████████████████████████████████████████████████████████████████████████████| 656M/656M [00:34<00:00, 20.0MB/s]

Extracting files...





✅ Dataset downloaded to: C:\Users\Arman\.cache\kagglehub\datasets\iamsouravbanerjee\animal-image-dataset-90-different-animals\versions\5


In [20]:
# -----------------------------
# 3. Move images into project folder
# -----------------------------

import os
import shutil

os.makedirs(raw_dir, exist_ok=True)
# Move the entire folder
shutil.move(dataset_path, raw_dir)

print(f"✅ 'animals' folder moved to {raw_dir}")






✅ 'animals' folder moved to landing_zone/temporal\animal_raw


In [29]:
# -----------------------------
# 3. Generate CSV with image paths & names
# -----------------------------

import os


animals_dir = os.path.join(raw_dir, "5/animals/animals")
records = []
for animal_name in os.listdir(animals_dir):
    animal_folder = os.path.join(animals_dir, animal_name)
    if not os.path.isdir(animal_folder):
        continue
    for img_file in os.listdir(animal_folder):
        img_path = os.path.join(animal_name, img_file)  # relative path
        records.append({"image_path": img_path, "Animal Name": animal_name})

csv_path = os.path.join(raw_dir, "animal_images_metadata.csv")
df = pd.DataFrame(records)
df.to_csv(csv_path, index=False)
print(f"✅ CSV saved at {csv_path}")



✅ CSV saved at landing_zone/temporal\animal_raw\animal_images_metadata.csv


In [30]:
# -----------------------------
# 4. Upload data to MinIO
# -----------------------------

from minio import Minio
from minio.error import S3Error



def upload_if_not_exists(bucket_name, object_name, local_path):
    try:
        if not client.stat_object(bucket_name, object_name):
            client.fput_object(bucket_name, object_name, local_path)
            print(f"Uploaded {object_name} to {bucket_name}")
    except S3Error as e:
        # Object does not exist → upload
        if e.code == "NoSuchKey":
            client.fput_object(bucket_name, object_name, local_path)
            print(f"Uploaded {object_name} to {bucket_name}")
        else:
            raise

# Upload CSV
upload_if_not_exists("landing-temporal", "animal_images_metadata.csv", csv_path)

# Upload images folder
for animal_name in os.listdir(animals_dir):
    animal_folder = os.path.join(animals_dir, animal_name)
    for img_file in os.listdir(animal_folder):
        local_path = os.path.join(animal_folder, img_file)
        object_name = f"{animal_name}/{img_file}"
        upload_if_not_exists("landing-temporal", object_name, local_path)

        

Uploaded animal_images_metadata.csv to landing-temporal
Uploaded antelope/02f4b3be2d.jpg to landing-temporal
Uploaded antelope/03d7fc0888.jpg to landing-temporal
Uploaded antelope/058fa9a60f.jpg to landing-temporal
Uploaded antelope/0a37838e99.jpg to landing-temporal
Uploaded antelope/0b1a3af197.jpg to landing-temporal
Uploaded antelope/0b688923b0.jpg to landing-temporal
Uploaded antelope/0c16ef86c0.jpg to landing-temporal
Uploaded antelope/0e17715606.jpg to landing-temporal
Uploaded antelope/0ee903ea13.jpg to landing-temporal
Uploaded antelope/0fb2e9aa81.jpg to landing-temporal
Uploaded antelope/0fe7076f06.jpg to landing-temporal
Uploaded antelope/17a07512e9.jpg to landing-temporal
Uploaded antelope/17ddf07f85.jpg to landing-temporal
Uploaded antelope/1a3f59ee02.jpg to landing-temporal
Uploaded antelope/1b0b0b614b.jpg to landing-temporal
Uploaded antelope/1d3c6105c9.jpg to landing-temporal
Uploaded antelope/1d556456dc.jpg to landing-temporal
Uploaded antelope/1db1ad56e5.jpg to landing

In [31]:
# -----------------------------
# 5. Verify objects in Temporal Zone
# -----------------------------
print("\nObjects currently in Temporal Landing Zone:")
for obj in client.list_objects(bucket_name):
    print("-", obj.object_name)



Objects currently in Temporal Landing Zone:
- animal_images_metadata.csv
- antelope/
- badger/
- bat/
- bear/
- bee/
- beetle/
- bison/
- boar/
- butterfly/
- cat/
- caterpillar/
- chimpanzee/
- cockroach/
- cow/
- coyote/
- crab/
- crow/
- deer/
- dog/
- dolphin/
- donkey/
- dragonfly/
- duck/
- eagle/
- elephant/
- flamingo/
- fly/
- fox/
- goat/
- goldfish/
- goose/
- gorilla/
- grasshopper/
- hamster/
- hare/
- hedgehog/
- hippopotamus/
- hornbill/
- horse/
- hummingbird/
- hyena/
- jellyfish/
- kangaroo/
- koala/
- ladybugs/
- leopard/
- lion/
- lizard/
- lobster/
- mosquito/
- moth/
- mouse/
- octopus/
- okapi/
- orangutan/
- otter/
- owl/
- ox/
- oyster/
- panda/
- parrot/
- pelecaniformes/
- penguin/
- pig/
- pigeon/
- porcupine/
- possum/
- raccoon/
- rat/
- reindeer/
- rhinoceros/
- sandpiper/
- seahorse/
- seal/
- shark/
- sheep/
- snake/
- sparrow/
- squid/
- squirrel/
- starfish/
- swan/
- tiger/
- turkey/
- turtle/
- whale/
- wolf/
- wombat/
- woodpecker/
- zebra/
