# Model: Transfer Learning with CNNs

## Preamble

In [10]:
from tensorflow import keras
import tensorflow_addons as tfa

In [12]:
import tensorflow
tensorflow.__version__

'2.4.1'

## Data Preparation

In [8]:
poster_dir = "../data/01_raw/posters_5"

In [3]:
movies = catalog.load("movies")

2021-03-19 08:34:07,989 - kedro.io.data_catalog - INFO - Loading data from `movies` (ParquetDataSet)...


In [4]:
movies_test = catalog.load("movies_test")

2021-03-19 08:34:09,259 - kedro.io.data_catalog - INFO - Loading data from `movies_test` (CSVDataSet)...


In [5]:
movies["test_set"] = movies.index.isin(movies_test.index)

In [6]:
movies["test_set"].value_counts()

False    11085
True      1000
Name: test_set, dtype: int64

In [7]:
movies.shape

(12085, 30)

**exclude movies with missing poster paths**

In [None]:
movies = movies[~movies["poster_path"].isna()]

**convert genre_names from array to list**

In [None]:
movies["genre_names"] = movies["genre_names"].apply(list)
#movies_test["genre_names"] = movies_test["genre_names"].apply(list)

** full path to image file**

In [None]:
movies["poster_path"] = movies["poster_path"].apply(lambda s: f"{poster_dir}{s}")
movies_test["poster_path"] = movies_test["poster_path"].apply(lambda s: f"{poster_dir}{s}")

In [None]:
movies.shape

**test poster paths**

In [None]:
from PIL import Image

In [None]:
def test_poster(poster_path):
    #img_path = f"{poster_dir}{poster_path}"
    try:
        with Image.open(poster_path) as img:
            return True
    except Exception as ex:
        print(ex)
        return False

In [None]:
movies["has_poster"] = movies["poster_path"].apply(
    test_poster
)

In [None]:
movies["has_poster"].value_counts()

In [None]:
movies = movies[movies["has_poster"]]

**exclude test set**

In [None]:
movies_test.shape

In [None]:
movies = movies[~movies.index.isin(movies_test.index)]

In [None]:
movies.shape

In [None]:
n_genres = 19

**sample for training time control**

In [None]:
n_samples = 500

In [None]:
#movies = movies.sample(n_samples)

## Model

### Pretrained Deep CNNs

In [None]:
input_shape = (299, 299, 3)

**InceptionResNet**

In [None]:
inception_resnet = keras.applications.InceptionResNetV2(
    input_shape=input_shape,
    include_top=False,
    weights="imagenet",
)

**Xception**

In [None]:
xception = keras.applications.Xception(
    input_shape=input_shape,
    include_top=False,
    weights="imagenet",
)

In [None]:
base = inception_resnet

In [None]:
#keras.utils.plot_model(base)

### Metrics

In [None]:
metrics = [
    keras.metrics.Precision(),
    keras.metrics.Recall(),
    tfa.metrics.F1Score(
        name="f1_micro",
        average="micro",
        num_classes=n_genres,
        threshold=0.5,
    ),
    tfa.metrics.F1Score(
        name="f1_weighted",
        average="weighted",
        num_classes=n_genres,
        threshold=0.5,
    ),
]


### Transfer Learning Architecture

In [None]:
base.trainable = False

In [None]:
base.output_shape

In [None]:
base.output

In [None]:
base.input_shape

In [None]:
top = [
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(
        units=64
    ),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(
        units=n_genres,
        activation="sigmoid"
    )
]

In [None]:
net = keras.models.Sequential(
    [base] + top
)

In [None]:
net.summary()

In [None]:
base.output_shape

In [None]:
net.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=metrics
)

### Training Setup

In [None]:
generator = keras.preprocessing.image.ImageDataGenerator(
    rescale=1/255
)

In [None]:
input_shape[:2]

In [None]:
movies["genre_names"].iloc[0]

In [None]:
generator_training = generator.flow_from_dataframe(
    dataframe=movies[~movies["test_set"]],
    #directory=poster_dir,
    x_col="poster_path",
    y_col="genre_names",
    class_mode="categorical",
    shuffle=True,
    target_size=input_shape[:2],
    subset="training",
    validate_filenames=False
)

In [None]:
movies_test

In [None]:
generator_validation = generator.flow_from_dataframe(
    dataframe=movies.loc[movies["test_set"]],
    #directory=poster_dir,
    x_col="poster_path",
    y_col="genre_names",
    class_mode="categorical",
    shuffle=True,
    target_size=input_shape[:2],
    subset="validation",
    validate_filenames=False
)

In [None]:
net.output_shape

In [None]:
callbacks = [
    keras.callbacks.EarlyStopping(
        
    )
]

In [None]:
%%time
net.fit(
    x=generator_training,
    #validation_data=generator_validation,
    verbose=True,
    epochs=10
)

In [None]:
net.evaluate(
    x=generator_validation
)

In [None]:
for i in generator_validation:
    print(i)

In [None]:
net