<a href="https://colab.research.google.com/github/jessmiramontes/instagram_sotries_views/blob/imagesdataset/instagram_stories_step1v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Analyze images of my IG stories, create a Dataframe and export it to a csv file so I can combine this dataset with the Analytics given by Meta.

**First attempt:** All values on the "dominant color" column are Unknown
Not sure that the objects recognized are correct.

**Second attempt:** Instead of translating colors to a name, I decided to let them as HEX values.

**Next step:** See if the object recognition can be improved because on the first attempt objects were recognized but not all of them were correct. I used ResNet50 for object recognition, next I will try YOLOv5.



In [2]:
# Install requerid libraries
!pip install tensorflow
!pip install opencv-python-headless
!pip install deepface
!pip install pytesseract
!pip install pandas
!pip install Pillow




In [3]:
# Import libraries
import os
import pandas as pd
import cv2
from PIL import Image as PILImage, UnidentifiedImageError
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from deepface import DeepFace
import pytesseract
from google.colab import drive


In [4]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Initialize the ResNet50 model
#model = ResNet50(weights='imagenet')

In [6]:
# Function to extract date from filename
def extract_date(filename):
    date_str = filename.split('_')[1]
    return pd.to_datetime(date_str, format='%Y%m%d')

In [6]:
# Function to detect objects with ResNet50
""" def detect_object(img_path):
    try:
        img = Image.open(img_path)
        img = img.resize((224, 224))  # Resize image to 224x224
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        preds = model.predict(x)
        return decode_predictions(preds, top=1)[0][0][1]  # Object name
    except UnidentifiedImageError as e:
        print(f"Error identifying image {img_path}: {e}")
        return 'Unknown'
        """


In [43]:
!pip install yolov5

Collecting yolov5
  Downloading yolov5-7.0.14-py37.py38.py39.py310-none-any.whl.metadata (10 kB)
Collecting thop>=0.1.1 (from yolov5)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.0.100 (from yolov5)
  Downloading ultralytics-8.3.66-py3-none-any.whl.metadata (35 kB)
Collecting boto3>=1.19.1 (from yolov5)
  Downloading boto3-1.36.5-py3-none-any.whl.metadata (6.6 kB)
Collecting sahi>=0.11.10 (from yolov5)
  Downloading sahi-0.11.20-py3-none-any.whl.metadata (17 kB)
Collecting huggingface-hub<0.25.0,>=0.12.0 (from yolov5)
  Downloading huggingface_hub-0.24.7-py3-none-any.whl.metadata (13 kB)
Collecting roboflow>=0.2.29 (from yolov5)
  Downloading roboflow-1.1.51-py3-none-any.whl.metadata (9.7 kB)
Collecting botocore<1.37.0,>=1.36.5 (from boto3>=1.19.1->yolov5)
  Downloading botocore-1.36.5-py3-none-any.whl.metadata (5.7 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from boto3>=1.19.1->yolov5)
  Downloading jmespath-1.0.1-py3-none-any.whl.met

In [7]:
import torch

# Load pre-trained YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

def detect_object_yolo(img_path):
    """Detect the primary object in an image using YOLOv5.

    Args:
        img_path (str): Path to the image.

    Returns:
        str: Name of the primary object detected.
    """
    results = model(img_path)
    if results:
        labels = results.names
        coords = results.xyxy[0][:, :4]
        obj = labels[0] if labels else "Unknown"
        return obj
    return 'Unknown'


Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2025-1-24 Python-3.11.11 torch-2.5.1+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [8]:
# Function to convert RGB to hex color code
def rgb_to_hex(rgb_array):
    return '#%02x%02x%02x' % tuple(rgb_array)

# Updated function to analyze dominant color with error handling and returning hex values
def dominant_color(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error: Unable to read image file {img_path}")
        return 'Unknown'

    data = np.reshape(img, (-1, 3))
    data = np.float32(data)
    _, _, centers = cv2.kmeans(data, 1, None, (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2), 10, cv2.KMEANS_RANDOM_CENTERS)
    dominant_color_rgb = centers[0].astype(int)
    dominant_color_hex = rgb_to_hex(dominant_color_rgb)
   # print(f"Dominant RGB values for {img_path}: {dominant_color_rgb}")  # Debugging statement
   # print(f"Dominant hex value for {img_path}: {dominant_color_hex}")  # Debugging statement
    return dominant_color_hex


In [9]:
# Function to detect emotions, will return "no face detected" if there are no faces on the image
def analyze_emotion(img_path):
    try:
        result = DeepFace.analyze(img_path, actions=['emotion'], enforce_detection=False)
        #print(result)  # Print the result to understand its structure
        if isinstance(result, list) and len(result) > 0:
            return result[0]['dominant_emotion'] if 'dominant_emotion' in result[0] else None
        return None
    except ValueError as e:
        print(f"Error analyzing emotion in {img_path}: {e}")
        return 'No face detected'  # Or any other default value you prefer



In [14]:
# Directory of images in Google Drive
image_directory = '/content/drive/MyDrive/Colab Notebooks/stories_archive'

In [15]:
# List to store results
image_data = []

In [16]:
# Process each image in the directory
for filename in os.listdir(image_directory):
    if filename.endswith('.jpg') or filename.endswith('.webp'):  # Adjust for your image extensions
        # print(f"Processing {filename}")
        img_path = os.path.join(image_directory, filename)

        date = extract_date(filename)
        obj = detect_object_yolo(img_path)
        color = dominant_color(img_path)
        emotion = analyze_emotion(img_path)

        image_data.append({
            'nombre_archivo': filename,
            'fecha': date,
            'objeto_principal': obj,
            'color_dominante': color,
            'emocion_primaria': emotion
        })

print("Finished processing all images.")

UnidentifiedImageError: cannot identify image file '/content/drive/MyDrive/Colab Notebooks/stories_archive/IMG_20240902_094755_286.jpg'

In [None]:
# Create DataFrame
df = pd.DataFrame(image_data)

# Export DataFrame to CSV
df.to_csv('/content/drive/MyDrive/Colab Notebooks/stories_archive/imagedatajan23.csv', index=False)

print("Data saved successfully.")

Data saved successfully.
