In [32]:

import tensorflow
print('tensorflow: %s' % tensorflow.__version__)

import keras
print('keras: %s' % keras.__version__)

tensorflow: 2.18.0
keras: 3.8.0


In [33]:
from os import listdir
from pickle import dump
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer


In [34]:
def extract_features(directory):
	model = VGG16()
	model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
	print(model.summary())
	features = dict()
	for name in listdir(directory):
		filename = directory + '/' + name
		image = load_img(filename, target_size=(224, 224))
		image = img_to_array(image)
		image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
		image = preprocess_input(image)
		feature = model.predict(image, verbose=0)
		image_id = name.split('.')[0]
		features[image_id] = feature
		print('>%s' % name)
	return features

In [35]:
from PIL import Image
print(Image.__version__)

11.1.0


In [36]:
def load_doc(filename):
    with open(filename, 'r', encoding='utf-8') as file:  # Open the file with UTF-8 encoding
        text = file.read()  # Read the file content
    return text  # Return the text


In [37]:
filename = r'C:\Users\geets\Desktop\code\new_ml\Flickr8k_text\Flickr8k.token.txt'
doc = load_doc(filename)

print("✅ Document loaded successfully!")
print("First 200 characters:", doc[:200])  # Verify contents


✅ Document loaded successfully!
First 200 characters: 1000268201_693b08cb0e.jpg#0	A child in a pink dress is climbing up a set of stairs in an entry way .
1000268201_693b08cb0e.jpg#1	A girl going into a wooden building .
1000268201_693b08cb0e.jpg#2	A lit


In [38]:
def load_doc(filename):
    with open(filename, 'r', encoding='utf-8') as file:  # Ensure the file opens properly
        return file.read()  # Read and return the file content directly

# Define the correct file path
filename = r'C:\Users\geets\Desktop\code\new_ml\Flickr8k_text\Flickr8k.token.txt'

# Load the document
doc = load_doc(filename)

# Check if it's loaded correctly
print("✅ Document loaded successfully!")
print("Type of doc:", type(doc))
print("First 200 characters:", doc[:200])  # Print a sample to verify


✅ Document loaded successfully!
Type of doc: <class 'str'>
First 200 characters: 1000268201_693b08cb0e.jpg#0	A child in a pink dress is climbing up a set of stairs in an entry way .
1000268201_693b08cb0e.jpg#1	A girl going into a wooden building .
1000268201_693b08cb0e.jpg#2	A lit


In [39]:

def load_descriptions(doc):
	mapping = dict()

	for line in doc.split('\n'):
		
		tokens = line.split()
		if len(line) < 2:
			continue
	
		image_id, image_desc = tokens[0], tokens[1:]
	
		image_id = image_id.split('.')[0]
		
		image_desc = ' '.join(image_desc)
		
		if image_id not in mapping:
			mapping[image_id] = list()
		
		mapping[image_id].append(image_desc)
	return mapping


descriptions = load_descriptions(doc)
print('Loaded: %d ' % len(descriptions))

Loaded: 8092 


In [40]:
descriptions = load_descriptions(doc)
print('✅ Descriptions loaded:', len(descriptions))


✅ Descriptions loaded: 8092


In [41]:

import string
 
def clean_descriptions(descriptions):

	table = str.maketrans('', '', string.punctuation)
	for key, desc_list in descriptions.items():
		for i in range(len(desc_list)):
			desc = desc_list[i]
		
			desc = desc.split()
			
			desc = [word.lower() for word in desc]
			
			desc = [w.translate(table) for w in desc]
		
			desc = [word for word in desc if len(word)>1]
		
			desc = [word for word in desc if word.isalpha()]
			
			desc_list[i] =  ' '.join(desc)
 
clean_descriptions(descriptions)

In [42]:

def to_vocabulary(descriptions):

	all_desc = set()
	for key in descriptions.keys():
		[all_desc.update(d.split()) for d in descriptions[key]]
	return all_desc
 

vocabulary = to_vocabulary(descriptions)
print('Vocabulary Size: %d' % len(vocabulary))

Vocabulary Size: 8763


In [43]:


def save_descriptions(descriptions, filename):
	lines = list()
	for key, desc_list in descriptions.items():
		for desc in desc_list:
			lines.append(key + ' ' + desc)
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()
 

save_descriptions(descriptions, 'descriptions.txt')

In [44]:
 
filename = 'Flickr8k_text/Flickr8k.token.txt'

doc = load_doc(filename)

descriptions = load_descriptions(doc)
print('Loaded: %d ' % len(descriptions))

clean_descriptions(descriptions)

vocabulary = to_vocabulary(descriptions)
print('Vocabulary Size: %d' % len(vocabulary))

save_descriptions(descriptions, 'descriptions.txt')

Loaded: 8092 
Vocabulary Size: 8763


In [45]:

def load_set(filename):
	doc = load_doc(filename)
	dataset = list()
	
	for line in doc.split('\n'):
		
		if len(line) < 1:
			continue
		
		identifier = line.split('.')[0]
		dataset.append(identifier)
	return set(dataset)

In [46]:
def load_clean_descriptions(filename, dataset):
	
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		
		tokens = line.split()
		
		image_id, image_desc = tokens[0], tokens[1:]
		
		if image_id in dataset:
		
			if image_id not in descriptions:
				descriptions[image_id] = list()
			
			desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
		
			descriptions[image_id].append(desc)
	return descriptions

In [47]:
def load_photo_features(filename, dataset):
	
	all_features = load(open(filename, 'rb'))
	
	features = {k: all_features[k] for k in dataset}
	return features
    

In [48]:
import os

directory = r'C:\Users\geets\Desktop\code\new_ml\Flickr8k_Dataset\Flicker8k_Dataset'
images = [f for f in os.listdir(directory) if f.lower().endswith(('jpg', 'jpeg', 'png'))]
print(f"Number of images in directory: {len(images)}")


Number of images in directory: 8091


In [49]:
import os

descriptions_file = r"C:\Users\geets\Desktop\code\new_ml\Flickr8k_text\descriptions.txt"

print("File exists?", os.path.exists(descriptions_file))  # Should print True if the file is present


File exists? True


In [50]:
descriptions_file = r"C:\Users\geets\Desktop\code\new_ml\descriptions.txt"

# Test if the file can be read
try:
    with open(descriptions_file, "r", encoding="utf-8") as file:
        content = file.readlines()

    print("✅ File loaded successfully!")
    print(f"📜 First 5 lines:\n{''.join(content[:5])}")

except FileNotFoundError:
    print("❌ ERROR: File not found!")
except Exception as e:
    print(f"⚠️ ERROR: {e}")




✅ File loaded successfully!
📜 First 5 lines:
1000268201_693b08cb0e child in pink dress is climbing up set of stairs in an entry way
1000268201_693b08cb0e girl going into wooden building
1000268201_693b08cb0e little girl climbing into wooden playhouse
1000268201_693b08cb0e little girl climbing the stairs to her playhouse
1000268201_693b08cb0e little girl in pink dress going into wooden cabin



In [51]:
descriptions_file = r"C:\Users\geets\Desktop\code\new_ml\descriptions.txt"

# Extract all unique image IDs from descriptions.txt
train_dataset = set()

with open(descriptions_file, "r", encoding="utf-8") as file:
    for line in file:
        tokens = line.split()
        if len(tokens) > 1:  # Ensure there's an ID and description
            image_id = tokens[0]  # First token is the image ID
            train_dataset.add(image_id)

print(f"✅ Total images for training: {len(train_dataset)}")
print("🔍 Sample image IDs:", list(train_dataset)[:5])  # Show first 5 IDs


✅ Total images for training: 8092
🔍 Sample image IDs: ['523249012_a0a25f487e', '1295698260_e10c53c137', '3547368652_0d85c665d3', '2955099064_1815b00825', '3673970325_4e025069e9']


In [52]:
# Function to load text from a file
def load_doc(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

In [53]:
train_descriptions = load_clean_descriptions(descriptions_file, train_dataset)
print("✅ Total descriptions loaded:", len(train_descriptions))


✅ Total descriptions loaded: 8092


In [None]:
# Convert a dictionary of clean descriptions to a list of descriptions
def to_lines(descriptions):
    all_desc = []
    for key in descriptions.keys():
        all_desc.extend(descriptions[key])  # Flattening all descriptions into a single list
    return all_desc

# Fit a tokenizer on the given caption descriptions
from tensorflow.keras.preprocessing.text import Tokenizer

def create_tokenizer(descriptions):
    lines = to_lines(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

# Prepare tokenizer
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1

print("✅ Tokenizer is working! Vocabulary Size:", vocab_size)



In [None]:
def create_sequences(tokenizer, max_length, desc_list, photo, vocab_size):
	X1, X2, y = list(), list(), list()
	# walk through each description for the image
	for desc in desc_list:
		# encode the sequence
		seq = tokenizer.texts_to_sequences([desc])[0]
		# split one sequence into multiple X,y pairs
		for i in range(1, len(seq)):
			# split into input and output pair
			in_seq, out_seq = seq[:i], seq[i]
			# pad input sequence
			in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
			# encode output sequence
			out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
			# store
			X1.append(photo)
			X2.append(in_seq)
			y.append(out_seq)
	return array(X1), array(X2), array(y)

In [None]:
# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, photos, tokenizer, max_length, vocab_size):
	# loop for ever over images
	while 1:
		for key, desc_list in descriptions.items():
			# retrieve the photo feature
			photo = photos[key][0]
			in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo, vocab_size)
			yield [in_img, in_seq], out_word

In [None]:
def get_max_length(descriptions):
	lines = to_lines(descriptions)
	return max(len(d.split()) for d in lines)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint to save the best model during training
filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.keras'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

# If you are training the model, use this checkpoint in the `callbacks` list:
# model.fit(..., callbacks=[checkpoint], ...)


In [None]:
filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.keras'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout
from tensorflow.keras.layers import Add  # Fix for `add`
from tensorflow.keras.callbacks import ModelCheckpoint


In [None]:
import numpy as np
from pickle import load
from tensorflow.keras.preprocessing.text import Tokenizer

# Define a proper max_length (adjust based on your dataset)
MAX_LENGTH = 34  # Typical for Flickr8k

# Load text file into memory
def load_doc(filename):
    with open(filename, 'r', encoding="utf-8") as file:
        text = file.read()
    return text

# Load set of photo identifiers
def load_set(filename):
    doc = load_doc(filename)
    dataset = {line.split('.')[0] for line in doc.split("\n") if len(line) > 0}
    return dataset

# Load cleaned descriptions into a dictionary
def load_clean_descriptions(filename, dataset):
    doc = load_doc(filename)
    descriptions = {}

    for line in doc.split('\n'):
        tokens = line.split()
        if len(tokens) < 2:
            continue
        image_id, image_desc = tokens[0], tokens[1:]
        if image_id in dataset:
            desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
            descriptions.setdefault(image_id, []).append(desc)

    return descriptions

# Load photo features from pickle
def load_photo_features(filename, dataset):
    all_features = load(open(filename, 'rb'))
    features = {k: all_features[k] for k in dataset if k in all_features}
    return features

# Load training dataset
train_images_path = 'Flickr8k_text/Flickr_8k.trainImages.txt'
train = load_set(train_images_path)
print(f"Loaded {len(train)} training images.")

train_descriptions = load_clean_descriptions('descriptions.txt', train)
print(f"Loaded {len(train_descriptions)} training descriptions.")

train_features = load_photo_features('features.pkl', train)
print(f"Loaded {len(train_features)} training features.")

# Tokenizer setup
tokenizer = Tokenizer()
tokenizer.fit_on_texts([desc for desc_list in train_descriptions.values() for desc in desc_list])
vocab_size = len(tokenizer.word_index) + 1  # Ensure vocab size includes padding

# Now call the function with the correct max_length
X1train, X2train, ytrain = create_sequences(tokenizer, MAX_LENGTH, train_descriptions, train_features, vocab_size)


Loaded 6000 training images.
Loaded 6000 training descriptions.
Loaded 6000 training features.


MemoryError: Unable to allocate 4.68 GiB for an array with shape (306404, 4096) and data type float32