In [None]:
#not trained took lot of ttime

import os

# ✅ Enables CPU-optimized oneDNN ops (speeds up training)
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '1'

# ✅ Optional: Set visible threads (for reproducibility or performance tuning)
os.environ['OMP_NUM_THREADS'] = '8'   # same as number of cores
os.environ['KMP_BLOCKTIME'] = '0'



In [2]:
print("TF_ENABLE_ONEDNN_OPTS:", os.environ.get('TF_ENABLE_ONEDNN_OPTS'))
print("OMP_NUM_THREADS:", os.environ.get('OMP_NUM_THREADS'))
print("KMP_BLOCKTIME:", os.environ.get('KMP_BLOCKTIME'))


TF_ENABLE_ONEDNN_OPTS: 1
OMP_NUM_THREADS: 8
KMP_BLOCKTIME: 0


In [3]:
import tensorflow as tf

# ✅ Set CPU parallelism (you have 8 cores → use 8 intra, 2 inter threads)
tf.config.threading.set_intra_op_parallelism_threads(8)
tf.config.threading.set_inter_op_parallelism_threads(2)

print("Threading set: Intra=8, Inter=2")

Threading set: Intra=8, Inter=2


In [4]:
import random
import numpy as np
import pandas as pd
# import tensorflow as tf
import tensorflow_hub as hub
from collections import defaultdict
from nltk import word_tokenize, pos_tag
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.utils import load_img, img_to_array
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm


In [5]:
# 2. Load captions and extract noun keywords
def load_captions(path="flickr8k/captions.txt"):
    df = pd.read_csv(path)
    return df.groupby("image")["caption"].apply(list).to_dict()


In [6]:
def extract_keywords(image_captions):
    kw = defaultdict(set)
    for img, caps in image_captions.items():
        for c in caps:
            for w, tag in pos_tag(word_tokenize(c.lower())):
                if tag.startswith("NN"):
                    kw[img].add(w)
    return kw


In [7]:
image_captions = load_captions()
image_keywords = extract_keywords(image_captions)
keyword_sentences = {
    img: " ".join(sorted(words))
    for img, words in image_keywords.items()
}


In [8]:
# 3. Generate triplets based on keyword overlap
def generate_triplets(image_keywords):
    triplets = []
    images = list(image_keywords.keys())
    for anchor in images:
        kws = image_keywords[anchor]
        pos = [i for i in images if i!=anchor and image_keywords[i]&kws]
        neg = [i for i in images if i!=anchor and not (image_keywords[i]&kws)]
        if pos and neg:
            p = random.choice(pos)
            n = random.choice(neg)
            triplets.append((anchor,p,n))
    return triplets

triplets = generate_triplets(image_keywords)



In [9]:
# 4. Build TF dataset yielding images and keyword-based text
def build_triplet_dataset(triplets, keyword_sentences, img_size=(224,224), batch_size=32):
    def gen():
        for a,p,n in triplets:
            yield (
                f"flickr8k/images/{a}", keyword_sentences[a],
                f"flickr8k/images/{p}", keyword_sentences[p],
                f"flickr8k/images/{n}", keyword_sentences[n],
            )
    ds = tf.data.Dataset.from_generator(
        gen,
        output_signature=(
            tf.TensorSpec((), tf.string), tf.TensorSpec((), tf.string),
            tf.TensorSpec((), tf.string), tf.TensorSpec((), tf.string),
            tf.TensorSpec((), tf.string), tf.TensorSpec((), tf.string),
        )
    )
    def parse(a_img, a_txt, p_img, p_txt, n_img, n_txt):
        def load_image(path):
            img = tf.io.read_file(path)
            img = tf.image.decode_jpeg(img, channels=3)
            img = tf.image.resize(img, img_size)/255.0
            return img
        return (
            load_image(a_img), a_txt,
            load_image(p_img), p_txt,
            load_image(n_img), n_txt,
        )
    return ds.map(parse, num_parallel_calls=tf.data.AUTOTUNE)\
             .shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

ds = build_triplet_dataset(triplets, keyword_sentences)



In [10]:
# 5. Build model branches
def build_image_branch():
    inp = Input(shape=(224,224,3), name="image_input")
    base = tf.keras.applications.ResNet50(include_top=False, pooling="avg", weights="imagenet")
    base.trainable = False
    x = base(inp)
    x = layers.Dense(256, activation="relu")(x)
    return Model(inp, x, name="ImageBranch")

use = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")


In [11]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers, Model, Input


@tf.keras.utils.register_keras_serializable()
class UniversalEmbeddingLayer(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.use = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
    def call(self, x):
        x = tf.squeeze(tf.cast(x, tf.string))
        return self.use(x)

def build_text_branch():
    inp = Input(shape=(1,), dtype=tf.string, name="text_input")
    x   = UniversalEmbeddingLayer()(inp)
    x   = layers.Dense(256, activation="relu", name="text_proj")(x)
    return Model(inputs=inp, outputs=x, name="TextBranch")


In [12]:
def build_fusion_model(embed_dim=128):
    img_in = Input(shape=(256,), name="image_features")
    txt_in = Input(shape=(256,), name="text_features")
    merged = layers.Concatenate()([img_in, txt_in])
    x = layers.Dense(embed_dim)(merged)
    x = layers.Lambda(
    lambda y: tf.math.l2_normalize(y, axis=1),
    output_shape=lambda s: s
    )(merged)

    return Model([img_in, txt_in], x, name="FusionModel")


In [13]:
image_model = build_image_branch()
text_model = build_text_branch()
fusion_model = build_fusion_model()


In [14]:
# 6. Triplet loss and optimizer
def triplet_loss(a,p,n, margin=0.3):
    dp = tf.reduce_sum(tf.square(a-p), axis=1)
    dn = tf.reduce_sum(tf.square(a-n), axis=1)
    return tf.reduce_mean(tf.maximum(dp - dn + margin, 0.0))

optimizer = tf.keras.optimizers.Adam(1e-4)


In [15]:
# 7. Training loop with early stopping + accuracy
EPOCHS, PATIENCE = 5,2

batch_size   = 32
steps = (len(triplets) + batch_size - 1) // batch_size
best_loss, no_improve = np.inf, 0

for epoch in range(EPOCHS):
    total_loss, total_acc, count = 0.0, 0, 0
    print(f"\n🔁 Epoch {epoch+1}/{EPOCHS}")

    for i, (ai, at, pi, pt, ni, nt) in tqdm(
            enumerate(ds), total=steps, desc="Training", leave=False):
        if i >= steps: break

        with tf.GradientTape() as tape:
            a_emb = fusion_model([image_model(ai), text_model(at)], training=True)
            p_emb = fusion_model([image_model(pi), text_model(pt)], training=True)
            n_emb = fusion_model([image_model(ni), text_model(nt)], training=True)
            loss = triplet_loss(a_emb, p_emb, n_emb)

        grads = tape.gradient(
            loss,
            image_model.trainable_weights +
            text_model.trainable_weights +
            fusion_model.trainable_weights
        )
        optimizer.apply_gradients(zip(
            grads,
            image_model.trainable_weights +
            text_model.trainable_weights +
            fusion_model.trainable_weights
        ))

        # accumulate loss
        total_loss += loss.numpy()

        # compute “accuracy”: +1 if d(a,p) < d(a,n)
        dp = np.sum((a_emb - p_emb)**2, axis=1)
        dn = np.sum((a_emb - n_emb)**2, axis=1)
        total_acc += np.mean(dp < dn)
        count += 1

    avg_loss = total_loss / count
    avg_acc  = total_acc / count
    print(f"✅ Epoch {epoch+1}: Loss={avg_loss:.4f}, Acc={avg_acc:.4f}")

    # early stopping & saving
    if avg_loss < best_loss - 1e-4:
        best_loss, no_improve = avg_loss, 0
        fusion_model.save("trained_mod/fusion_model.keras")
        image_model.save("trained_mod/image_model.keras")
        text_model.save("trained_mod/text_model.keras")
        print("💾 Models saved.")
    else:
        no_improve += 1
        print(f"⚠️ No improvement for {no_improve} epoch(s).")
        if no_improve >= PATIENCE:
            print(f"🛑 Early stopping at epoch {epoch+1}.")
            break



🔁 Epoch 1/5


                                                                

KeyboardInterrupt: 

In [None]:
import os, datetime, tensorflow as tf

# create an output directory dated for easy tracking
stamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
out_dir = f"trained_models_{stamp}"
os.makedirs(out_dir, exist_ok=True)

print(f"\n💾  Saving models to \"{out_dir}\" …")

# 1️⃣ – save COMPLETE models  (preferred for re-loading later)
try:
    image_model.save(f"{out_dir}/image_model.keras")
    text_model.save(f"{out_dir}/text_model.keras")
    fusion_model.save(f"{out_dir}/fusion_model.keras")
    print("✅  Full .keras model files saved.")
except Exception as e:
    print(f"⚠️  Saving full models failed: {e}")

# 2️⃣ – save WEIGHTS ONLY  (fallback / lighter checkpoint)
try:
    image_model.save_weights(f"{out_dir}/image_model.weights.h5")
    text_model.save_weights(f"{out_dir}/text_model.weights.h5")
    fusion_model.save_weights(f"{out_dir}/fusion_model.weights.h5")
    print("✅  Weights (.weights.h5) saved.")
except Exception as e:
    print(f"⚠️  Saving weights failed: {e}")

print("🎉  All save operations complete!")


In [None]:
# 8. Recommendation pipeline using keyword-based embeddings
def get_embedding(path, sentence):
    img = load_img(path, target_size=(224,224), color_mode='rgb')
    img = img_to_array(img); img = tf.keras.applications.resnet50.preprocess_input(img)
    img = np.expand_dims(img,0)
    ie = image_model.predict(img, verbose=0)[0]
    te = use([sentence])[0].numpy()
    return np.concatenate([ie, te])

def recommend(query_img, query_caption, catalog, top_k=5):
    q_emb = get_embedding(query_img, query_caption)
    scores = [(p, cosine_similarity([q_emb],[get_embedding(p,c)])[0][0]) for p,c in catalog.items()]
    scores.sort(key=lambda x: x[1], reverse=True)
    return scores[:top_k]


In [None]:
if __name__=="__main__":
    os.makedirs("trained_mod", exist_ok=True)
    # Build a catalog of first 50 images with keyword sentences
    catalog = {
        f"flickr8k/images/{img}": keyword_sentences[img]
        for img in list(image_captions.keys())[:50]
    }
    query_img = list(catalog.keys())[0]
    query_kw  = catalog[query_img]
    top6 = recommend(query_img, query_kw, catalog, top_k=6)

    print("\n🎯 Top Recommendations:")
    for p,s in top6:
        print(f"{p}: {s:.3f}")
