In [3]:
import os
import boto3
import json
import base64
bedrock_runtime = boto3.client(service_name='bedrock-runtime')

In [4]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

def generate_synthetic_post(file_path, model_id, bedrock_runtime):

    image_name = file_path.split('/')[-1]

    with open(file_path, "rb") as img_file:
            image_bytes = img_file.read()
            base64_str = base64.b64encode(image_bytes).decode("utf-8")
            
            
    prompt = f"""
    You are a social media expert. Based on the provided image, generate the post content. 
    The tone and style can be diversified. Include emojis and tags.
    Provide the response in JSON formate, with key: text, file_name 
    for file_name, use value provided from {image_name}
    Avoid using any brand names or people's names.
    Keep the response within 100 words.
    """            

    messages={"role": "user", 
              "content": [
                  {
                      "type": "image",
                      "source": {
                          "type": "base64",
                          "media_type": "image/jpeg",
                          "data": base64.b64encode(image_bytes).decode(),
                      }
                  },
                  {
                      "type": "text", 
                      "text": prompt
                  }
           ]}

    claude_config = {
        'max_tokens': 1500, 
        'temperature': 0, 
        'anthropic_version': '',  
        'top_p': 0.2, 
        'stop_sequences': ['Human:']
    }

    
    # body={'messages': [messages],**claude_config, "system": system_prompt}
    body={'messages': [messages],**claude_config}
    response = bedrock_runtime.invoke_model(modelId=model_id, body=json.dumps(body))
    response = json.loads(response['body'].read().decode('utf-8'))
    
    return json.loads(response['content'][0]['text'])

In [5]:
%%time
directory = './images'

if os.path.exists('./metadata/metadata.jsonl'):
    os.remove('./metadata/metadata.jsonl')
    print(f"Deleted previous metadata.jsonl file.")

else:
    print(f" metadata.jsonl does not exist.")
    
for root, dirs, files in os.walk(directory):
    for file_name in files:
        file_path = os.path.join(root, file_name)
        
        synthetic_post = generate_synthetic_post(file_path, model_id, bedrock_runtime)
        
        with open('./metadata/metadata.jsonl', 'a') as f:
            json.dump(synthetic_post, f)
            f.write('\n')

Deleted previous metadata.jsonl file.
CPU times: user 2.25 s, sys: 758 ms, total: 3.01 s
Wall time: 5min 17s


## Create mapping table

In [6]:
import pandas as pd

jsonl_file = './metadata/metadata.jsonl'

# Read the JSONL file
with open(jsonl_file, 'r') as file:
    lines = file.readlines()
    data = [json.loads(line) for line in lines]

# Convert to DataFrame
df = pd.DataFrame(data)


In [7]:
df['local_path'] = df['file_name'].apply(lambda x: f"./images/{x.split('-')[0]}/{x}")

In [8]:
# Save DataFrame as CSV
csv_file = 'data_mapping.csv'
df.to_csv(csv_file, index=False)

In [9]:
my_session = boto3.session.Session()
my_region = my_session.region_name