In [10]:
# This requires two dependencies: langchain and langchain_aws
# You also need datasets to use the wikiart-subject dataset
# pip install langchain langchain_aws datasets

# Available models:
# amazon.titan-embed-text-v1
# amazon.titan-embed-image-v1
# anthropic.claude-3-5-sonnet-20240620-v1:0
# cohere.embed-multilingual-v3
# meta.llama3-70b-instruct-v1:0

import boto3
from langchain_aws.chat_models.bedrock import ChatBedrock
from langchain_aws.embeddings.bedrock import BedrockEmbeddings
from datasets import load_dataset
import os
import pandas as pd

# Load the dataset:
# https://huggingface.co/datasets/jlbaker361/wikiart-subjects
full_dataset = load_dataset("jlbaker361/wikiart-subjects")

# For development, let's use a smaller subset of the full dataset, since it's quite large (815MB)
# Let's take a 5% random sample from the "train" split.
#small_dataset = full_dataset["train"].train_test_split(test_size=0.05)["test"]

# Optionally, if we set also a seed, we'll get the same subset each time; the consistency can be handy for testing & debugging.
small_dataset = full_dataset["train"].train_test_split(test_size=0.05, seed=42)["test"]

# Just to see that the data is there, convert it to DataFrame and display the first few rows
small_dataset_df = small_dataset.to_pandas()
print(small_dataset_df.head())  # Display the first 5 rows

                                               image  \
0  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
1  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
2  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
3  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
4  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   

                                             text               style  
0   the cover of the book the magician's daughter  art-nouveau-modern  
1               the adoration of the holy trinity             baroque  
2  a painting of a man on horseback with two dogs  art-nouveau-modern  
3          a painting of a woman in a white dress             baroque  
4           a painting of a woman laying on a bed       expressionism  


In [7]:
# Start a session with AWS via the Boto3 Python SDK.
session = boto3.Session(
  aws_access_key_id='[AWS_ACCESS_KEY_ID]',
  aws_secret_access_key='[AWS_SECRET_ACCESS_KEY]',
  region_name='us-east-1'
)

client = session.client('bedrock-runtime')