Fixed encoder model

adnaniazi · Jul 8, 2024 · fc35d4c · fc35d4c
1 parent a896567
commit fc35d4c
Show file tree

Hide file tree

Showing 2 changed files with 50 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased]
 ### Fixed
+- Fixed encoder model hogging all available GPU memory and crashing
+
+## [0.1.6] - 2024-07-07
+### Fixed
 - Fixed bug with not writing dataset version to the file
 
 ## [0.1.5] - 2024-07-07
@@ -34,7 +38,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 ### Added
 - Basic skeleton of the package and tested it
 
-[Unreleased]: https://github.com/adnaniazi/capfinder/compare/0.1.5...master
+[Unreleased]: https://github.com/adnaniazi/capfinder/compare/0.1.6...master
+[0.1.6]: https://github.com/adnaniazi/capfinder/compare/0.1.5...0.1.6
 [0.1.5]: https://github.com/adnaniazi/capfinder/compare/0.1.4...0.1.5
 [0.1.4]: https://github.com/adnaniazi/capfinder/compare/0.1.3...0.1.4
 [0.1.3]: https://github.com/adnaniazi/capfinder/compare/0.1.2...0.1.3

diff --git a/src/capfinder/encoder_model.py b/src/capfinder/encoder_model.py
@@ -22,16 +22,12 @@ def transformer_encoder(
     ----------
     inputs : keras.layers.Layer
         The input layer or tensor for the encoder block.
-
     head_size : int
         The size of the attention heads.
-
     num_heads : int
         The number of attention heads.
-
     ff_dim : int
         The dimensionality of the feed-forward network.
-
     dropout : float, optional
         The dropout rate applied after the attention layer and within the feed-forward network. Default is 0.0.
 
@@ -81,28 +77,20 @@ def build_model(
     Parameters:
     input_shape : Tuple[int, int]
         The shape of the input data.
-
     head_size : int
         The size of the attention heads in the transformer encoder.
-
     num_heads : int
         The number of attention heads in the transformer encoder.
-
     ff_dim : int
         The dimensionality of the feed-forward network in the transformer encoder.
-
     num_transformer_blocks : int
         The number of transformer encoder blocks in the model.
-
     mlp_units : List[int]
         A list containing the number of units for each layer in the MLP.
-
     n_classes : int
         The number of output classes (for classification tasks).
-
     dropout : float, optional
         The dropout rate applied in the transformer encoder.
-
     mlp_dropout : float, optional
         The dropout rate applied in the MLP.
 
@@ -122,8 +110,6 @@ def build_model(
     x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
 
     # Save the encoder output
-    # The vectors fro mthe GlobalAveragePolling1D are of fixed size
-    # Before this layer, they are not of fixed length
     encoder_output = x
 
     # Add multi-layer perceptron (MLP) layers
@@ -144,24 +130,61 @@ def build_model(
     return model, encoder_model
 
 
-# Custom HyperModel class to wrap the model building function
 class CapfinderHyperModel(HyperModel):
+    """
+    Custom HyperModel class to wrap the model building function for Capfinder.
+
+    This class defines the hyperparameter search space and builds the model
+    based on the selected hyperparameters, including a variable number of MLP layers.
+
+    Attributes:
+    ----------
+    input_shape : Tuple[int, int]
+        The shape of the input data.
+    n_classes : int
+        The number of output classes for the classification task.
+    encoder_model : Optional[keras.Model]
+        Stores the encoder part of the model, initialized during the build process.
+    """
+
     def __init__(self, input_shape: Tuple[int, int], n_classes: int):
         self.input_shape = input_shape
         self.n_classes = n_classes
-        self.encoder_model = None  # Initialize an attribute to store the encoder model
+        self.encoder_model: Optional[keras.Model] = None
 
     def build(self, hp: HyperParameters) -> Model:
+        """
+        Build and compile the model based on the hyperparameters.
+
+        Parameters:
+        ----------
+        hp : HyperParameters
+            The hyperparameters to use for building the model.
+
+        Returns:
+        -------
+        Model
+            The compiled Keras model.
+        """
+        # Hyperparameter for number of MLP layers
+        num_mlp_layers = hp.Int("num_mlp_layers", min_value=1, max_value=3, step=1)
+
+        # Create a list of MLP units for each layer
+        mlp_units = [
+            hp.Int(f"mlp_units_{i}", min_value=32, max_value=256, step=32)
+            for i in range(num_mlp_layers)
+        ]
+
         # Call the model builder function and obtain the full model and encoder model
         model, encoder_model = build_model(
             input_shape=self.input_shape,
-            head_size=hp.Int("head_size", min_value=32, max_value=512, step=32),
-            num_heads=hp.Int("num_heads", min_value=4, max_value=64, step=1),
-            ff_dim=hp.Int("ff_dim", min_value=4, max_value=16, step=4),
+            head_size=hp.Int("head_size", min_value=32, max_value=256, step=32),
+            num_heads=hp.Int("num_heads", min_value=1, max_value=8, step=1),
+            ff_dim=hp.Int("ff_dim", min_value=32, max_value=512, step=32),
             num_transformer_blocks=hp.Int(
-                "num_transformer_blocks", min_value=2, max_value=32, step=1
+                "num_transformer_blocks", min_value=1, max_value=8, step=1
             ),
-            mlp_units=[hp.Int("mlp_units_1", min_value=64, max_value=256, step=32)],
+            mlp_units=mlp_units,  # Now passing the list of MLP units
             n_classes=self.n_classes,
             mlp_dropout=hp.Float("mlp_dropout", min_value=0.1, max_value=0.5, step=0.1),
             dropout=hp.Float("dropout", min_value=0.1, max_value=0.5, step=0.1),