# Compare Model Sizes
These experiments examine two main architectures which are summarized below.  All experiments are conducted with vocab size 10,000 which makes for a final, extended vocab size of 10,000 when including the four special tokens:, pad, oov, bos, eos.

### Imports

In [24]:
import os, sys, io
sys.path.append(os.path.dirname(os.getcwd()))

import numpy as np
from tensorflow.keras.backend import count_params
from src import model

### Small Network

In [25]:
embedding_dim = 100
rnn_units = 256
extended_vocab_size =10004

In [26]:
keras_model = model.build_model(extended_vocab_size=extended_vocab_size,
                                embedding_dim=embedding_dim,
                                embedding_matrix='uniform',
                                rnn_units=rnn_units)

In [27]:
small_count = int(np.sum([count_params(p) for p in keras_model.trainable_weights]))

In [28]:
keras_model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model1_input (InputLayer)    [(None, None)]            0         
_________________________________________________________________
model1_embedding (Embedding) (None, None, 100)         1000400   
_________________________________________________________________
model1_lstm (LSTM)           (None, None, 256)         365568    
_________________________________________________________________
dense_4 (Dense)              (None, None, 100)         25700     
_________________________________________________________________
dense_5 (Dense)              (None, None, 10004)       1010404   
Total params: 2,402,072
Trainable params: 2,402,072
Non-trainable params: 0
_________________________________________________________________


### Large Network

In [29]:
embedding_dim = 300
rnn_units = 512
extended_vocab_size =10004

In [30]:
keras_model = model.build_model(extended_vocab_size=extended_vocab_size,
                                embedding_dim=embedding_dim,
                                embedding_matrix='uniform',
                                rnn_units=rnn_units)

In [31]:
large_count = int(np.sum([count_params(p) for p in keras_model.trainable_weights]))

In [32]:
keras_model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model1_input (InputLayer)    [(None, None)]            0         
_________________________________________________________________
model1_embedding (Embedding) (None, None, 300)         3001200   
_________________________________________________________________
model1_lstm (LSTM)           (None, None, 512)         1665024   
_________________________________________________________________
dense_6 (Dense)              (None, None, 300)         153900    
_________________________________________________________________
dense_7 (Dense)              (None, None, 10004)       3011204   
Total params: 7,831,328
Trainable params: 7,831,328
Non-trainable params: 0
_________________________________________________________________


### Examine Difference

In [33]:
large_count / small_count

3.2602386606229956