<a href="https://colab.research.google.com/github/ivanmchavez/Analisisdedatos/blob/main/CodigoCIIIA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
import tensorflow as tf
import numpy as np
from PIL import Image
import os

class DataLoader:
    def __init__(self, csv_file_path='/content/drive/MyDrive/instagram_data.csv', image_dir='/content/drive/MyDrive/instagram_data.csv'): # Use './' to denote the current directory
        """
        Initialize the DataLoader.
        Args:
            csv_file_path (str): Path to the CSV file containing image URLs and likes.
            image_dir (str): Path to the directory containing the images.
        """
        # If __file__ is not defined, assume the CSV file is in the current working directory
        try:
            csv_file_path_full = os.path.join(os.path.dirname(__file__), csv_file_path)
        except NameError:
            csv_file_path_full = csv_file_path
        self.data = pd.read_csv(csv_file_path_full)
        self.image_dir = image_dir
        self.images_path = self.data['imageUrl'].tolist()
        self.labels = self.data['likes'].apply(self._classify_likes).tolist()

    def _classify_likes(self, likes):
        # Clasificar en base a "likes"
        return 1 if likes > 10000 else 0

    def _load_image(self, image_path, image_size=(224, 224)):
        # Cargar y preprocesar imagen usando Pillow
        try:
            img = Image.open(f"{self.image_dir}/{image_path}").convert("RGB")
            img = img.resize(image_size)
            img_array = np.array(img) / 255.0  # Normalizar entre 0 y 1
            return img_array
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            return np.zeros((*image_size, 3))  # Devolver imagen vacía en caso de error

    def get_image_dataset(self, batch_size=32, image_size=(224, 224)):
        # Crear dataset de imágenes y etiquetas
        images = []
        for image_path in self.images_path:
            img = self._load_image(image_path, image_size)
            images.append(img)

        # Convertir listas a tensores
        images_tensor = tf.convert_to_tensor(images, dtype=tf.float32)
        labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int32)

        # Crear dataset
        dataset = tf.data.Dataset.from_tensor_slices((images_tensor, labels_tensor))
        dataset = dataset.batch(batch_size).shuffle(buffer_size=100)
        return dataset




In [15]:
 def summary(self):
        # Resumen del conjunto de datos
        print("Dataset Summary:")
        print(f"Total records: {len(self.data)}")
        print(f"High engagement posts: {(self.data['likes'] > 10000).sum()}")
        print(f"Low engagement posts: {(self.data['likes'] <= 10000).sum()}")
        print(self.data.describe())
        print("\n")


In [17]:
import pandas as pd
import tensorflow as tf
import numpy as np
from PIL import Image
import os
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Concatenate, Embedding, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [26]:
class DataLoader:
    def __init__(self, csv_file_path='/content/drive/MyDrive/instagram_data.csv', image_dir='/content/drive/MyDrive/instagram_data.csv'):
        self.data = pd.read_csv(csv_file_path)
        self.image_dir = image_dir
        self.images_path = self.data['imageUrl'].tolist()
        self.captions = self.data['caption'].tolist()
        # Convert the 'likes' column to numeric before applying the classification function
        self.data['likes'] = pd.to_numeric(self.data['likes'], errors='coerce')
        self.labels = self.data['likes'].apply(self._classify_likes).tolist()
        self.tokenizer = None

    def _classify_likes(self, likes):
        return 1 if likes > 10000 else 0

    def _load_image(self, image_path, image_size=(224, 224)):
        try:
            img = Image.open(f"{self.image_dir}/{image_path}").convert("RGB")
            img = img.resize(image_size)
            img_array = np.array(img) / 255.0
            return img_array
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            return np.zeros((*image_size, 3))

    def prepare_text_data(self, max_words=10000, max_length=100):
        self.tokenizer = Tokenizer(num_words=max_words)
        self.tokenizer.fit_on_texts(self.captions)
        sequences = self.tokenizer.texts_to_sequences(self.captions)
        padded_sequences = pad_sequences(sequences, maxlen=max_length)
        return padded_sequences

    def get_image_text_dataset(self, batch_size=32, image_size=(224, 224), max_words=10000, max_length=100):
        images = [self._load_image(img_path, image_size) for img_path in self.images_path]
        padded_sequences = self.prepare_text_data(max_words, max_length)

        images_tensor = tf.convert_to_tensor(images, dtype=tf.float32)
        captions_tensor = tf.convert_to_tensor(padded_sequences, dtype=tf.int32)
        labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int32)

        dataset = tf.data.Dataset.from_tensor_slices(((images_tensor, captions_tensor), labels_tensor))
        dataset = dataset.batch(batch_size).shuffle(buffer_size=100)
        return dataset


In [20]:
def create_model(image_shape=(224, 224, 3), max_words=10000, max_length=100):
    # Image input
    image_input = Input(shape=image_shape, name="image_input")
    x = tf.keras.applications.MobileNetV2(input_shape=image_shape, include_top=False, weights="imagenet")(image_input)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Text input
    text_input = Input(shape=(max_length,), name="text_input")
    embedding = Embedding(input_dim=max_words, output_dim=50, input_length=max_length)(text_input)
    y = LSTM(64)(embedding)
    y = Dense(64, activation='relu')(y)

    # Concatenate image and text features
    combined = Concatenate()([x, y])
    z = Dense(64, activation='relu')(combined)
    z = Dropout(0.5)(z)
    output = Dense(1, activation='sigmoid')(z)

    model = Model(inputs=[image_input, text_input], outputs=output)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [21]:
def predict_success(model, tokenizer, image_path, caption, image_size=(224, 224), max_length=100):
    # Preprocess the image
    img = Image.open(image_path).convert("RGB")
    img = img.resize(image_size)
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    # Preprocess the caption
    sequence = tokenizer.texts_to_sequences([caption])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)

    # Make the prediction
    prediction = model.predict([img_array, padded_sequence])[0][0]
    if prediction > 0.5:
        print(f"The post is likely to get more than 10,000 likes ({prediction * 100:.2f}% probability).")
    else:
        print(f"The post is likely to get less than 10,000 likes ({(1 - prediction) * 100:.2f}% probability).")


In [28]:
if __name__ == "__main__":
    # Load data and create dataset
    data_loader = DataLoader()
    dataset = data_loader.get_image_text_dataset()

    # Build and train the model
    model = create_model()
    model.fit(dataset, epochs=5)

    # Predict on a new image and caption
    image_path = input("Please upload your image file path: ")
    caption = input("Please enter the caption (including hashtags): ")
    predict_success(model, data_loader.tokenizer, image_path, caption)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Error loading image https://instagram.flba2-1.fna.fbcdn.net/v/t51.2885-15/428429672_942836980774796_812393592432183790_n.jpg?stp=dst-jpg_e15&_nc_ht=instagram.flba2-1.fna.fbcdn.net&_nc_cat=108&_nc_ohc=WNWZb8ZWCZYAX9n3sR4&edm=AOQ1c0wBAAAA&ccb=7-5&oh=00_AfC54ifMBrz7erxby6S9LIGFuyxLXSglCwPCMc29gmmteA&oe=65E756ED&_nc_sid=8b3546: [Errno 20] Not a directory: '/content/drive/MyDrive/instagram_data.csv/https:/instagram.flba2-1.fna.fbcdn.net/v/t51.2885-15/428429672_942836980774796_812393592432183790_n.jpg?stp=dst-jpg_e15&_nc_ht=instagram.flba2-1.fna.fbcdn.net&_nc_cat=108&_nc_ohc=WNWZb8ZWCZYAX9n3sR4&edm=AOQ1c0wBAAAA&ccb=7-5&oh=00_AfC54ifMBrz7erxby6S9LIGFuyxLXSglCwPCMc29gmmteA&oe=65E756ED&_nc_sid=8b3546'
Error loading image https://instagram.flba2-1.fna.fbcdn.net/v/t51.2885-15/428299369_1818219725290887_8753604251804594968_n.jpg?stp=dst-jpg_e15&_nc_ht=instagram.flba2-1.fna.fbcdn.net&_nc_cat=103&_nc_ohc=gqX0mQnghCoAX8Ja3s7&edm=AOQ1c0w

KeyboardInterrupt: 