# Data Preparation for AWS Sagemaker Ground Truth
Here we are preparing the outfit images and the manifest file to pass into AWS Sagemaker Ground Truth.

In [None]:
import pandas as pd
import numpy as np

import os
import random
from PIL import Image as PIL_Image
import tempfile
import io
from pathlib import Path
from IPython.display import Image
from PIL import ImageEnhance
import json
from typing import Union, Dict, List
import re

In [None]:
evals_df = pd.read_json('/content/drive/MyDrive/MIDS Capstone/recommendation_evals.json')
evals_df = pd.concat([evals_df, pd.read_json('/content/drive/MyDrive/MIDS Capstone/recommendation_evals_errored.json')]).reset_index(drop=True)
evals_df

Unnamed: 0,outfit,explanation,styling,prompt,image_paths
0,"[42, 36, 2, 18]","""This outfit combines classic style with moder...","""Complete the look with a crisp white dress sh...",Men's outfit for a formal wedding,"[/content/final_dataset/17554.jpg, /content/fi..."
1,"[6, 41, 56, 81]","""This outfit combines classic formal wear with...","""Complete the look with a crisp white shirt co...",Men's outfit for a formal wedding,"[/content/final_dataset/12260.jpg, /content/fi..."
2,"[40, 57, 42, 43, 7]","""The combination of a tailored blazer, a slim-...","""Complete the look with a crisp white pocket s...",Men's outfit for a formal wedding,[/content/final_dataset/hm_fashion_images_imag...
3,"[15, 11, 16, 30]","""This outfit combines classic formal wear with...","""Pair the outfit with a crisp white pocket squ...",Men's outfit for a formal wedding,"[/content/final_dataset/14524.jpg, /content/fi..."
4,"[70, 71, 32, 30, 11]","""This outfit combines classic style with moder...","""Complete the look with a crisp white shirt an...",Men's outfit for a formal wedding,"[/content/final_dataset/11267.jpg, /content/fi..."
...,...,...,...,...,...
495,"[15, 16, 2, 77]","""The outfit combines complementary green and p...","""Pair the outfit with a statement necklace or ...",Women's outfit featuring complementary green a...,[/content/final_dataset/hm_fashion_images_imag...
496,"[12, 34, 30]","""This outfit combines the complementary colors...","""Pair the dress with a statement necklace or a...",Women's outfit featuring complementary green a...,[/content/final_dataset/hm_fashion_images_imag...
497,"[1, 22, 0, 71]","""The outfit combines a marled gray hoodie (Ite...","""Pair the outfit with a sleek backpack and a s...",Men's practical outfit for a rainy day,[/content/final_dataset/hm_fashion_images_imag...
498,"[62, 45, 39, 23, 67, 37, 87]","""This outfit combines warmth, comfort, and pra...","""Pair the outfit with a stylish beanie and glo...",Women's outfit for protection during a heavy s...,[/content/final_dataset/hm_fashion_images_imag...


In [None]:
#save full df
evals_df.to_csv("/content/drive/MyDrive/MIDS Capstone/evals_df.csv")

In [None]:
# update path to my google drive folder with the images
evals_df["image_paths"] = evals_df["image_paths"].apply(lambda x: [path.replace("/content/final_dataset", "/content/drive/MyDrive/MIDS Capstone/eval set images") for path in x])
evals_df["image_paths"][0]

['/content/drive/MyDrive/MIDS Capstone/eval set images/17554.jpg',
 '/content/drive/MyDrive/MIDS Capstone/eval set images/hm_fashion_images_image_3757.jpg',
 '/content/drive/MyDrive/MIDS Capstone/eval set images/hm_fashion_images_image_13185.jpg',
 '/content/drive/MyDrive/MIDS Capstone/eval set images/6502.jpg']

In [None]:
import cv2
import numpy as np

def stack_images_vertically(image_paths, output_path=None):
    """
    Stack multiple images vertically (on top of each other).

    Args:
        image_paths (list): List of paths to input images
        output_path (str, optional): Path to save the stacked image. If None, image is not saved.

    Returns:
        numpy.ndarray: The stacked image
    """
    # Read all images
    images = [cv2.imread(path) for path in image_paths]

    # Remove any paths that didn't load properly
    images = [img for img in images if img is not None]

    if not images:
        raise ValueError("No valid images found in the provided paths")

    # Find the maximum width among all images
    max_width = max(img.shape[1] for img in images)

    # Resize all images to have the same width (maintaining aspect ratio)
    resized_images = []
    for img in images:
        height, width = img.shape[:2]
        aspect_ratio = width / height
        new_height = int(max_width / aspect_ratio)
        resized_img = cv2.resize(img, (max_width, new_height))
        resized_images.append(resized_img)

    # Stack images vertically
    stacked_image = np.vstack(resized_images)

    # Save if output path is provided
    if output_path:
        cv2.imwrite(output_path, stacked_image)

    return stacked_image

In [None]:
from google.colab.patches import cv2_imshow

# Example usage:
image_paths = evals_df["image_paths"][0]
stacked_img = stack_images_vertically(image_paths)

# To display the result (requires matplotlib or OpenCV)
cv2_imshow(stacked_img)

In [None]:
def add_manifest_line(prompt, image_path):
  return "{\"source-ref\": \"s3://dresssense-bucket-brian/" + image_path + "\", \"text-content\": \"" + prompt + "\"}"

In [None]:
manifest = ""
#for i in range(len(evals_df)):
for i in range(10):
  image_paths = evals_df["image_paths"][i]
  stack_images_vertically(image_paths, "/content/drive/MyDrive/MIDS Capstone/eval_outfits/" + str(i) + ".jpg")
  manifest += add_manifest_line(evals_df["prompt"][i], str(i) + ".jpg") + "\n"

manifest

'{"source-ref": "s3://dresssense-bucket-brian/0.jpg", "text-content": "Men\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/1.jpg", "text-content": "Men\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/2.jpg", "text-content": "Men\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/3.jpg", "text-content": "Men\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/4.jpg", "text-content": "Men\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/5.jpg", "text-content": "Women\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/6.jpg", "text-content": "Women\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/7.jpg", "text-content": "Women\'s outfit for a formal wedding"}\n{"source-ref": "s3://dresssense-bucket-brian/8.jpg", "text-content": "Women\'s outfit for a formal wedding"}\n{"source-ref": "s3:

In [None]:
with open("/content/drive/MyDrive/MIDS Capstone/manifest.txt", "w") as text_file:
    text_file.write(manifest)