Skip to content

Commit

Permalink
Fixed encoder model
Browse files Browse the repository at this point in the history
  • Loading branch information
adnaniazi committed Jul 8, 2024
1 parent a896567 commit fc35d4c
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 22 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased]
### Fixed
- Fixed encoder model hogging all available GPU memory and crashing

## [0.1.6] - 2024-07-07
### Fixed
- Fixed bug with not writing dataset version to the file

## [0.1.5] - 2024-07-07
Expand Down Expand Up @@ -34,7 +38,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Added
- Basic skeleton of the package and tested it

[Unreleased]: https://github.com/adnaniazi/capfinder/compare/0.1.5...master
[Unreleased]: https://github.com/adnaniazi/capfinder/compare/0.1.6...master
[0.1.6]: https://github.com/adnaniazi/capfinder/compare/0.1.5...0.1.6
[0.1.5]: https://github.com/adnaniazi/capfinder/compare/0.1.4...0.1.5
[0.1.4]: https://github.com/adnaniazi/capfinder/compare/0.1.3...0.1.4
[0.1.3]: https://github.com/adnaniazi/capfinder/compare/0.1.2...0.1.3
Expand Down
65 changes: 44 additions & 21 deletions src/capfinder/encoder_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,12 @@ def transformer_encoder(
----------
inputs : keras.layers.Layer
The input layer or tensor for the encoder block.
head_size : int
The size of the attention heads.
num_heads : int
The number of attention heads.
ff_dim : int
The dimensionality of the feed-forward network.
dropout : float, optional
The dropout rate applied after the attention layer and within the feed-forward network. Default is 0.0.
Expand Down Expand Up @@ -81,28 +77,20 @@ def build_model(
Parameters:
input_shape : Tuple[int, int]
The shape of the input data.
head_size : int
The size of the attention heads in the transformer encoder.
num_heads : int
The number of attention heads in the transformer encoder.
ff_dim : int
The dimensionality of the feed-forward network in the transformer encoder.
num_transformer_blocks : int
The number of transformer encoder blocks in the model.
mlp_units : List[int]
A list containing the number of units for each layer in the MLP.
n_classes : int
The number of output classes (for classification tasks).
dropout : float, optional
The dropout rate applied in the transformer encoder.
mlp_dropout : float, optional
The dropout rate applied in the MLP.
Expand All @@ -122,8 +110,6 @@ def build_model(
x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)

# Save the encoder output
# The vectors fro mthe GlobalAveragePolling1D are of fixed size
# Before this layer, they are not of fixed length
encoder_output = x

# Add multi-layer perceptron (MLP) layers
Expand All @@ -144,24 +130,61 @@ def build_model(
return model, encoder_model


# Custom HyperModel class to wrap the model building function
class CapfinderHyperModel(HyperModel):
"""
Custom HyperModel class to wrap the model building function for Capfinder.
This class defines the hyperparameter search space and builds the model
based on the selected hyperparameters, including a variable number of MLP layers.
Attributes:
----------
input_shape : Tuple[int, int]
The shape of the input data.
n_classes : int
The number of output classes for the classification task.
encoder_model : Optional[keras.Model]
Stores the encoder part of the model, initialized during the build process.
"""

def __init__(self, input_shape: Tuple[int, int], n_classes: int):
self.input_shape = input_shape
self.n_classes = n_classes
self.encoder_model = None # Initialize an attribute to store the encoder model
self.encoder_model: Optional[keras.Model] = None

def build(self, hp: HyperParameters) -> Model:
"""
Build and compile the model based on the hyperparameters.
Parameters:
----------
hp : HyperParameters
The hyperparameters to use for building the model.
Returns:
-------
Model
The compiled Keras model.
"""
# Hyperparameter for number of MLP layers
num_mlp_layers = hp.Int("num_mlp_layers", min_value=1, max_value=3, step=1)

# Create a list of MLP units for each layer
mlp_units = [
hp.Int(f"mlp_units_{i}", min_value=32, max_value=256, step=32)
for i in range(num_mlp_layers)
]

# Call the model builder function and obtain the full model and encoder model
model, encoder_model = build_model(
input_shape=self.input_shape,
head_size=hp.Int("head_size", min_value=32, max_value=512, step=32),
num_heads=hp.Int("num_heads", min_value=4, max_value=64, step=1),
ff_dim=hp.Int("ff_dim", min_value=4, max_value=16, step=4),
head_size=hp.Int("head_size", min_value=32, max_value=256, step=32),
num_heads=hp.Int("num_heads", min_value=1, max_value=8, step=1),
ff_dim=hp.Int("ff_dim", min_value=32, max_value=512, step=32),
num_transformer_blocks=hp.Int(
"num_transformer_blocks", min_value=2, max_value=32, step=1
"num_transformer_blocks", min_value=1, max_value=8, step=1
),
mlp_units=[hp.Int("mlp_units_1", min_value=64, max_value=256, step=32)],
mlp_units=mlp_units, # Now passing the list of MLP units
n_classes=self.n_classes,
mlp_dropout=hp.Float("mlp_dropout", min_value=0.1, max_value=0.5, step=0.1),
dropout=hp.Float("dropout", min_value=0.1, max_value=0.5, step=0.1),
Expand Down

0 comments on commit fc35d4c

Please sign in to comment.