<a href="https://colab.research.google.com/github/chandini2595/comprehensive-transfer-learning-experiments/blob/main/Colab/Transfer_learning_on_various_modalities.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Images

In [1]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import cifar10

# Load and preprocess CIFAR-10 (binary: cat vs dog)
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Use only classes 3 (cat) and 5 (dog)
class_filter = lambda x: (x == 3) | (x == 5)
mask_train = class_filter(y_train)
mask_test = class_filter(y_test)

x_train, y_train = x_train[mask_train.flatten()], y_train[mask_train.flatten()]
x_test, y_test = x_test[mask_test.flatten()], y_test[mask_test.flatten()]

# Normalize and resize to 160x160
x_train = tf.image.resize(x_train, (160, 160)) / 255.0
x_test = tf.image.resize(x_test, (160, 160)) / 255.0

# Binary labels: cat=0, dog=1
y_train = (y_train == 5).astype(int)
y_test = (y_test == 5).astype(int)

# Feature extractor
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(160, 160, 3))
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train on smaller batch for safety
model.fit(x_train[:500], y_train[:500], epochs=2, validation_data=(x_test[:100], y_test[:100]))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/2
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 622ms/step - accuracy: 0.6439 - loss: 0.6313 - val_accuracy: 0.7100 - val_loss: 0.5579
Epoch 2/2
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.7218 - loss: 0.5594 - val_accuracy: 0.7500 - val_loss: 0.4963


<keras.src.callbacks.history.History at 0x7bffcdabc590>

In [2]:
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=2, validation_data=(x_test, y_test))


Epoch 1/2
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 61ms/step - accuracy: 0.6868 - loss: 0.5802 - val_accuracy: 0.7770 - val_loss: 0.4728
Epoch 2/2
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 25ms/step - accuracy: 0.8328 - loss: 0.3706 - val_accuracy: 0.8070 - val_loss: 0.4099


<keras.src.callbacks.history.History at 0x7bffa99a3e10>

# Videos

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import TimeDistributed, LSTM, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import cifar10

# Reduce size for memory safety
NUM_VIDEOS = 50
FRAMES_PER_VIDEO = 5
IMG_SIZE = 128  # Smaller image

# Load CIFAR-10 and preprocess
(x, y), _ = cifar10.load_data()
x = tf.image.resize(x[:NUM_VIDEOS * FRAMES_PER_VIDEO], (IMG_SIZE, IMG_SIZE)) / 255.0
y = y[:NUM_VIDEOS * FRAMES_PER_VIDEO]

# Create fake videos
videos = tf.reshape(x, (-1, FRAMES_PER_VIDEO, IMG_SIZE, IMG_SIZE, 3))
labels = tf.reshape(y, (-1, FRAMES_PER_VIDEO))[:, 0]
labels = tf.cast(labels < 5, dtype=tf.int32)  # Binary labels

# MobileNetV2 (smaller input)
cnn = MobileNetV2(include_top=False, weights='imagenet', input_shape=(IMG_SIZE, IMG_SIZE, 3))
cnn.trainable = False

# Build model
model = Sequential([
    TimeDistributed(cnn),
    TimeDistributed(GlobalAveragePooling2D()),
    LSTM(32),  # Smaller LSTM
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train safely
model.fit(videos, labels, epochs=2, batch_size=4)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/2
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 50ms/step - accuracy: 0.4605 - loss: 0.8001
Epoch 2/2
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - accuracy: 0.5551 - loss: 0.6887


<keras.src.callbacks.history.History at 0x78151cc5ce50>

# Audios

In [3]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [5]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import numpy as np

# Load YAMNet
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet = hub.load(yamnet_model_handle)

# Load Speech Commands dataset
ds, ds_info = tfds.load('speech_commands', split='train[:1%]', with_info=True)
ds = ds.map(lambda x: x['audio'])

# Extract embeddings using YAMNet
def extract_embedding(waveform):
    # Cast waveform to float32
    waveform = tf.cast(waveform, tf.float32)
    scores, embeddings, _ = yamnet(waveform)
    return tf.reduce_mean(embeddings, axis=0)

# Prepare dataset
X, y = [], []
for i, waveform in enumerate(ds.take(100)):
    emb = extract_embedding(waveform)
    X.append(emb)
    y.append(i % 2)  # Dummy binary label

X, y = np.array(X), np.array(y)

# Build and train simple classifier
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024,)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=3)

Epoch 1/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 155ms/step - accuracy: 0.4733 - loss: 10.0833
Epoch 2/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.4936 - loss: 3.4876
Epoch 3/3
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5106 - loss: 3.0228 


<keras.src.callbacks.history.History at 0x7814a05b96d0>