# Step 1: Transfer Learning

In [33]:
import os
import random
import tensorflow as tf
import pandas as pd  # Import pandas for DataFrame operations
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model

# Define paths to your image and text data directories
image_dir = "dataset2"

# Function to get filenames within a directory
def get_files_in_directory(directory):
    return [os.path.join(directory, file) for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]

# Create lists to store selected filenames
train_files = []
val_files = []

# Iterate through subdirectories
for subdir in os.listdir(image_dir):
    subdir_path = os.path.join(image_dir, subdir)
    if os.path.isdir(subdir_path):
        # Get all image files in the subdirectory
        files = get_files_in_directory(subdir_path)
        # Sort files
        files.sort()
        # Select 5 files from serial number 55 to 60
        selected_files = files[55:60]
        # Split selected files for training and validation
        random.shuffle(selected_files)
        train_files.extend(selected_files[:3])  # Select 3 for training
        val_files.extend(selected_files[3:])   # Select 2 for validation

# Create an ImageDataGenerator for efficient image loading and augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Splitting data into train and validation
)

# Load images using the ImageDataGenerator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame(train_files, columns=["filename"]),
    directory=None,
    x_col="filename",
    y_col="filename",
    target_size=(224, 224),
    batch_size=32,
    class_mode='input',  # Return images as both input and target
    shuffle=True,
)

val_generator = train_datagen.flow_from_dataframe(
    dataframe=pd.DataFrame(val_files, columns=["filename"]),
    directory=None,
    x_col="filename",
    y_col="filename",
    target_size=(224, 224),
    batch_size=32,
    class_mode='input',  # Return images as both input and target
    shuffle=False,
)

# Load the pre-trained ResNet50V2 model
resnet50v2_model = ResNet50V2(weights="imagenet", include_top=False)

# Create a custom model by freezing all layers except the top 2
x = resnet50v2_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation="relu")(x)
output_layer = tf.keras.layers.Dense(224 * 224 * 3, activation="sigmoid")(x)  # Output layer for the same size as input

# Reshape the output to match the input shape
output_layer = tf.keras.layers.Reshape((224, 224, 3))(output_layer)

# Create the final model by adding the top layers to the pre-trained model
custom_model = Model(inputs=resnet50v2_model.input, outputs=output_layer)

# Freeze all layers except the top layers
for layer in resnet50v2_model.layers:
    layer.trainable = False

# Compile the custom model with appropriate optimizer and loss function
custom_model.compile(optimizer='adam', loss='mse')

# Train the custom model
history = custom_model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

# Save the feature extraction model
custom_model.save('feature_extraction_model.h5')

Found 1224 validated image filenames.
Found 816 validated image filenames.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [34]:
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

# Define paths to your image and text data directories
dataset_dir = "dataset1"

# Function to get subdirectories within a directory
def get_subdirectories(directory):
    return [os.path.join(directory, subdir) for subdir in os.listdir(directory) if os.path.isdir(os.path.join(directory, subdir))]

# Create lists to store selected filenames
train_files = []
val_files = []

# Iterate through subdirectories
subdirs = get_subdirectories(dataset_dir)
for subdir in subdirs:
    # Get all image files in the subdirectory
    files = [os.path.join(subdir, file) for file in os.listdir(subdir) if file.endswith(('.jpg', '.jpeg', '.png'))]
    # Sort files
    files.sort()
    # Select 5 files
    selected_files = files[:5]
    # Split selected files for training and validation
    random.shuffle(selected_files)
    train_files.extend(selected_files[:3])  # Select 3 for training
    val_files.extend(selected_files[3:])   # Select 2 for validation

# Define a function to load and preprocess images
def load_image(file_path):
    img = load_img(file_path, target_size=(224, 224))
    img = img_to_array(img)
    img = tf.keras.applications.resnet_v2.preprocess_input(img)
    return img

# Load images and preprocess them
train_images = np.array([load_image(img_path) for img_path in train_files])
val_images = np.array([load_image(img_path) for img_path in val_files])

# Load the pre-trained ResNet50V2 model
resnet50v2_model = ResNet50V2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Create a custom model on top of ResNet50V2 for image feature extraction
x = resnet50v2_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
image_features = tf.keras.layers.Dense(128, activation='relu')(x)  # Image features of size 128

# Define inputs for the model
image_input = resnet50v2_model.input

# Create a model that outputs image features
image_model = Model(inputs=image_input, outputs=image_features)

# Compile the image model
image_model.compile(optimizer='adam', loss='mse')

# Get image features for training and validation images
train_image_features = image_model.predict(train_images)
val_image_features = image_model.predict(val_images)

# Define a simple model for understanding relationships between images
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(128,)),  # Image features size
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # Output for 3 classes (example)
])

# Compile the model with appropriate optimizer and loss function
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Define labels (example: 0, 1, 2 for different relationships)
labels = np.array([0, 1, 2] * (len(train_files) // 3))

# Split the data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_image_features, labels, test_size=0.2, random_state=42)

# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=10,
    validation_data=(X_val, y_val)
)

# Save the trained image model and relationship model
image_model.save('image_model.h5')
model.save('relationship_model.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the pre-trained ResNet50V2 model with modified output shape
resnet50v2_model = tf.keras.applications.ResNet50V2(weights='imagenet', include_top=False, pooling='avg')
resnet50v2_model_output = resnet50v2_model.output
resnet50v2_model_output = tf.keras.layers.Dense(128, activation='relu')(resnet50v2_model_output)
resnet50v2_model = tf.keras.Model(inputs=resnet50v2_model.input, outputs=resnet50v2_model_output)

# Load the trained relationship model
relationship_model = tf.keras.models.load_model('relationship_model.h5')

# Define the local directory containing GPT-2 model files
gpt2_dir = "gpt2-small"

# Manually load GPT-2 tokenizer with pad_token as '<pad>'
tokenizer_gpt2 = GPT2Tokenizer.from_pretrained(gpt2_dir, pad_token='<pad>')

# Manually load GPT-2 model
model_gpt2 = GPT2LMHeadModel.from_pretrained(gpt2_dir)

# Define the image directory and example image paths
image_dir = "dataset1"
image_paths = ["1.png", "2.png", "3.png"]  # Example image paths

# Define a function to extract image features using ResNet50V2
def extract_image_features(image_paths):
    images = [load_img(os.path.join(image_dir, img_path), target_size=(224, 224)) for img_path in image_paths]
    images = np.array([img_to_array(img) for img in images])
    images = tf.keras.applications.resnet50.preprocess_input(images)
    features = resnet50v2_model.predict(images)
    return features

# Define a function to predict relationships using the relationship model
def predict_relationship(image_features):
    predictions = relationship_model.predict(image_features)
    return predictions

# Define a function to generate text with GPT-2 in chunks
def generate_text(relationship, script):
    # Combine the input prompt with the script
    input_text = f"{relationship}: {script}\n"
    
    # Tokenize the input text
    input_ids = tokenizer_gpt2.encode(input_text, return_tensors='pt')
    
    # Ensure input_ids is within range of model's vocabulary
    input_ids = input_ids[:, :model_gpt2.config.max_position_embeddings]
    
    # Clip input_ids to the maximum vocabulary size
    input_ids = input_ids.clip(0, model_gpt2.config.vocab_size - 1)
    
    # Initialize generated text
    generated_text = ""
    
    # Generate text in chunks
    while len(generated_text) < 1000:
        with torch.no_grad():
            output = model_gpt2.generate(
                input_ids=input_ids,
                max_length=200,  # Set a smaller max_length for chunks
                pad_token_id=tokenizer_gpt2.eos_token_id,
                do_sample=True,
                top_p=0.95,
                top_k=50,
                max_new_tokens=100  # Limit the new tokens added
            )
        
        # Decode the generated text for this chunk
        chunk_text = tokenizer_gpt2.decode(output[0], skip_special_tokens=True)
        
        # Add the chunk to the generated text
        generated_text += chunk_text + "\n"
        
        # Update input_ids for the next chunk
        input_ids = tokenizer_gpt2.encode(generated_text, return_tensors='pt')
    
    return generated_text

# Define a function to generate the story
def generate_story(image_paths):
    # Extract image features
    image_features = extract_image_features(image_paths)
    
    # Predict relationships
    relationship_predictions = predict_relationship(image_features)
    
    # Define the relationships
    relationships = ["Beginning", "Middle", "End"]
    
    # Load script sample from textdir.xlsx (assuming you have this file)
    script_sample = """
    This is a sample script. You can replace this with your actual script.
    """
    
    # Generate story
    story = ""
    for i, relationship_prob in enumerate(relationship_predictions):
        relationship_idx = np.argmax(relationship_prob)
        selected_relationship = relationships[relationship_idx]
        generated_text = generate_text(selected_relationship, script_sample)
        story += f"{selected_relationship}:\n{generated_text}\n\n"
    
    return story

# Generate story for the given image paths
generated_story = generate_story(image_paths)

print("Generated Story:")
print(generated_story)




Both `max_new_tokens` (=100) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=100) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=100) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=100) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Generated Story:
Beginning:
Beginning: 
    This is a sample script. You can replace this with your actual script.
    
, the the, the, the the the. the the rise and the the. in., of, to and. the the the,. in the. the and.Settings,,, the the the and the to the to▬,▬ and the of the the in the the the,, the the of the rise the the the the the in the the in in, the the warrant the the of the the the the, Beef the the the the the the. the, the
Beginning: 
    This is a sample script. You can replace this with your actual script.
    
, the the, the, the the the. the the rise and the the. in., of, to and. the the the,. in the. the and.Settings,,, the the the and the to the to▬,▬ and the of the the in the the the,, the the of the rise the the the the the in the the in in, the the warrant the the of the the the the, Beef the the the the the the. the, the
 the and and the the the. the the the the the the the to, the the, the to to the the the, the the, the the. in the the,. the, the to the,, t

In [5]:
import os
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Example image paths
image_dir = "dataset1"
image_paths = [os.path.join(image_dir, "1.png"), os.path.join(image_dir, "2.png"), os.path.join(image_dir, "3.png")]

# Example script
script_sample = """
This is a sample script. You can replace this with your actual script.
"""

# Define the relationships
relationships = ["Beginning", "Middle", "End"]

# Load the GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.eval()

def generate_story(image_path, script_sample):
    # Combine the image path and script into a prompt
    prompt = f"Image: {image_path}\nScript: {script_sample}\n"
    
    # Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    
    # Generate the story using GPT-2
    with torch.no_grad():
        output = model.generate(input_ids, max_length=300, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    
    # Decode and return the generated text
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated_text.strip()

# Generate stories for all images
generated_stories = []
for image_path in image_paths:
    for relationship in relationships:
        generated_story = generate_story(image_path, f"{relationship}: {script_sample}")
        generated_stories.append(f"{relationship}:\n{generated_story}\n\n")

# Combine all stories into a single narrative
final_story = "\n".join(generated_stories)

# Print the final generated story
print("Generated Story:")
print(final_story)


Generated Story:
Beginning:
Image: dataset1\1.png
Script: Beginning: 
This is a sample script. You can replace this with your actual script.


This script is a sample script. You can replace this with your actual script.

Script: Beginning: 

This is a sample script. You can replace this with your actual script.


This script is a sample script. You can replace this with your actual script.

Script: Beginning: 

This is a sample script. You can replace this with your actual script.


This script is a sample script. You can replace this with your actual script.

Script: Beginning: 

This is a sample script. You can replace this with your actual script.


This script is a sample script. You can replace this with your actual script.

Script: Beginning: 

This is a sample script. You can replace this with your actual script.


This script is a sample script. You can replace this with your actual script.

Script: Beginning: 

This is a sample script. You can replace this with your actual sc