<a href="https://colab.research.google.com/github/jessmiramontes/instagram_sotries_views/blob/imagesdataset/instagram_stories_step1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Analyze images of my IG stories, create a Dataframe and export it to a csv file so I can combine this dataset with the Analytics given by Meta.

First attempt: All values on the "dominant color" column are Unknown
Not sure that the objects recognized are correct

In [5]:
# Install requerid libraries
!pip install tensorflow
!pip install opencv-python-headless
!pip install deepface
!pip install pytesseract
!pip install pandas
!pip install Pillow


Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading deepface-0.0.93-py3-none-any.whl (108 kB)
[2K   [90m━

In [43]:
# Import libraries
import os
import pandas as pd
import cv2
from PIL import Image as PILImage, UnidentifiedImageError
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from deepface import DeepFace
import pytesseract
from google.colab import drive


In [25]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [26]:
# Initialize the ResNet50 model
model = ResNet50(weights='imagenet')

In [27]:
# Function to extract date from filename
def extract_date(filename):
    date_str = filename.split('_')[1]
    return pd.to_datetime(date_str, format='%Y%m%d')

In [41]:
# Function to detect objects with ResNet50
def detect_object(img_path):
    try:
        img = Image.open(img_path)
        img = img.resize((224, 224))  # Resize image to 224x224
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        preds = model.predict(x)
        return decode_predictions(preds, top=1)[0][0][1]  # Object name
    except UnidentifiedImageError as e:
        print(f"Error identifying image {img_path}: {e}")
        return 'Unknown'


In [46]:
# Function to return the dominant color
from webcolors import rgb_to_name

def rgb_to_color_name(rgb_array):
    try:
        return rgb_to_name(rgb_array)
    except ValueError:
        # If the color name is not found, create a custom name or return a placeholder
        return 'Unknown'

# Updated function to analyze dominant color with error handling and color name conversion
def dominant_color(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error: Unable to read image file {img_path}")
        return 'Unknown'

    data = np.reshape(img, (-1, 3))
    data = np.float32(data)
    _, _, centers = cv2.kmeans(data, 1, None, (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2), 10, cv2.KMEANS_RANDOM_CENTERS)
    dominant_color_rgb = centers[0].astype(int)
    return rgb_to_color_name(tuple(dominant_color_rgb))

In [40]:
# Function to detect emotions, will return "no face detected" if there are no faces on the image
def analyze_emotion(img_path):
    try:
        result = DeepFace.analyze(img_path, actions=['emotion'], enforce_detection=False)
        #print(result)  # Print the result to understand its structure
        if isinstance(result, list) and len(result) > 0:
            return result[0]['dominant_emotion'] if 'dominant_emotion' in result[0] else None
        return None
    except ValueError as e:
        print(f"Error analyzing emotion in {img_path}: {e}")
        return 'No face detected'  # Or any other default value you prefer



In [31]:
# Directory of images in Google Drive
image_directory = '/content/drive/MyDrive/Colab Notebooks/stories_archive'

In [32]:
# List to store results
image_data = []

In [47]:
# Process each image in the directory
for filename in os.listdir(image_directory):
    if filename.endswith('.jpg') or filename.endswith('.webp'):  # Adjust for your image extensions
        # print(f"Processing {filename}")
        img_path = os.path.join(image_directory, filename)

        date = extract_date(filename)
        obj = detect_object(img_path)
        color = dominant_color(img_path)
        emotion = analyze_emotion(img_path)

        image_data.append({
            'nombre_archivo': filename,
            'fecha': date,
            'objeto_principal': obj,
            'color_dominante': color,
            'emocion_primaria': emotion
        })

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 189ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 353ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [48]:
# Create DataFrame
df = pd.DataFrame(image_data)

# Export DataFrame to CSV
df.to_csv('/content/drive/MyDrive/Colab Notebooks/stories_archive/imagedata.csv', index=False)

print("Data saved successfully.")

Data saved successfully.
