In [1]:
import os
os.chdir("../")

In [4]:
# !pip install ultralytics pytesseract

import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from PIL import Image
from ultralytics import YOLO
import os
import pytesseract

# Load the performance data
df_performance = pd.read_csv('data/performance_data.csv')
df_performance = df_performance.head(1)  # For demonstration purposes

# Create an empty dictionary to store the results
results = {}

# Iterate through each row in the dataframe
for index, row in df_performance.iterrows():
    game_id = row['game_id']
    image_path = f'data/Assets/{game_id}/_preview.png'

    # Check if the image file exists
    if os.path.exists(image_path):
        # Load the image using Pillow
        img = Image.open(image_path)

        # Object Identification
        model = YOLO('yolov8n.pt')  # Load a YOLOv8 model (you might need to download it first)
        results_yolo = model(image_path)
        class_names = [model.names[int(result.boxes.cls[0])] for result in results_yolo]
        results[game_id] = {'objects': class_names}

        # Color Identification
        img_rgb = img.convert('RGB')
        img_array = np.array(img_rgb).reshape(-1, 3)
        kmeans = KMeans(n_clusters=5, random_state=0).fit(img_array)
        dominant_colors = kmeans.cluster_centers_.astype(int)
        results[game_id]['colors'] = dominant_colors.tolist()

        # Position Extraction
        positions = []
        for result in results_yolo:
            x1, y1, x2, y2 = result.boxes.xyxy[0]
            positions.append([x1 / img.width, y1 / img.height, x2 / img.width, y2 / img.height])
        results[game_id]['positions'] = positions

        # Character Recognition
        text = pytesseract.image_to_string(img)
        results[game_id]['text'] = text

# Create a new dataframe from the results dictionary
df_features = pd.DataFrame.from_dict(results, orient='index')

# Merge the new dataframe with the original dataframe on the `game_id` column
merged_df = df_performance.merge(df_features, left_on='game_id', right_index=True, how='left')

# Display the first 5 rows of the merged dataframe
print(merged_df.head().to_markdown(index=False, numalign="left", stralign="left"))





image 1/1 /home/hillary_kipkemoi/Automated-Storyboard-Synthesis-Digital-Advertising/data/Assets/bfbf7a2b7ac635e67877b1ab87fd6629/_preview.png: 640x448 2 kites, 225.9ms
Speed: 3.0ms preprocess, 225.9ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 448)


  super()._check_params_vs_input(X, default_n_init=10)


| game_id                          | preview_link                                                                                                                                            | ER       | CTR       | objects   | colors                                                                        | positions                                                          | text                                 |
|:---------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------|:---------|:----------|:----------|:------------------------------------------------------------------------------|:-------------------------------------------------------------------|:-------------------------------------|
| bfbf7a2b7ac635e67877b1ab87fd6629 | https://s3.us-east-1.amazonaws.com/a.futureadlabs.com-us-east-1-backup/us-east-1/games/bfbf7a2b7ac635e67877b1ab87fd6629/b7a604b3b08f086