Skip to content

Commit

Permalink
[ModelMetrics] Updated
Browse files Browse the repository at this point in the history
  • Loading branch information
YanSte committed Sep 11, 2023
1 parent 54385c9 commit 2998aac
Showing 1 changed file with 100 additions and 144 deletions.
244 changes: 100 additions & 144 deletions src/skit/ModelMetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,81 +32,68 @@
from skit.show import show_text, show_history

class Metric(Enum):
ACCURACY = "accuracy"
AUC = "auc"
VAL_AUC = "val_auc"
VAL_ACCURACY = "val_accuracy"
VAL_ACCURACY = "val_acc"

class TrainMetric(Enum):
ACCURACY = "acc"
AUC = "auc"

@property
def train_metric_key(self):
"""
Get the training metric key corresponding to the Metric enum value.
"""
def title(self):
if self == Metric.VAL_ACCURACY:
return "Accuracy"
elif self == Metric.VAL_AUC:
return "AUC"

@property
def key(self):
return self.value

@property
def val_metric_key(self):
"""
Get the validation metric key corresponding to the Metric enum value.
"""
if self == Metric.ACCURACY:
return "val_accuracy"
elif self == Metric.AUC:
return "auc"
def validation_metric(self):
return self.value

@property
def train_metric(self):
if self == Metric.VAL_ACCURACY:
return Metric.TrainMetric.ACCURACY.value
elif self == Metric.VAL_AUC:
return "val_auc"
elif self == Metric.VAL_ACCURACY:
return "val_accuracy"
return Metric.TrainMetric.AUC.value

@property
def plot_labels(self):
"""
Get the curve labels corresponding to the given Metric enum.
"""
if self == Metric.ACCURACY or self == Metric.VAL_ACCURACY:
if self == Metric.VAL_ACCURACY:
return {
'Accuracy': {
'Training Accuracy': 'accuracy',
'Validation Accuracy': 'val_accuracy'
},
'Loss': {
'Training Loss': 'loss',
'Validation Loss': 'val_loss'
}
}
elif self == Metric.AUC or self == Metric.VAL_AUC:
elif self == Metric.VAL_AUC:
return {
'AUC': {
'Training AUC': 'auc',
'Validation AUC': 'val_auc'
},
'Loss': {
'Training Loss': 'loss',
'Validation Loss': 'val_loss'
}
}

class ModelMetrics:
def __init__(self, versions, metric_to_monitor=Metric.ACCURACY):
def __init__(self):
"""
Initialize ModelMetrics class.
Parameters
----------
versions : list
List of model versions to track.
metric_to_monitor : Metric
metric_to_monitors : Metric
The metric to monitor (default is Accuracy).
"""
self.output = {}
self.metric_to_monitor = metric_to_monitor
for version in versions:
self.output[version] = {
"history": None,
"duration": None,
"best_model_path": None,
"board_path": None
}

def reset(self, version=None):
"""
Expand All @@ -117,21 +104,12 @@ def reset(self, version=None):
version : str, optional
The specific version to reset. If None, reset all versions.
"""
default_dict = {
"history": None,
"duration": None,
"best_model_path": None,
"board_path": None
}

if version is not None:
self.output[version] = default_dict
if version is None:
self.output = {}
else:
# Reset all versions
for version in self.output.keys():
self.output[version] = default_dict.copy()
self.output[version] = {}

def get_best_metric(self, version):
def get_best_metrics(self, version, metric_to_monitors=[Metric.VAL_ACCURACY]):
"""
Get the best training and validation metrics for a specific model version.
Expand All @@ -143,21 +121,26 @@ def get_best_metric(self, version):
Returns:
dict: Dictionary containing best training and validation metrics.
"""
history = self.output[version]['history'].history
if version not in self.output:
return None

train_metric_key = self.metric_to_monitor.train_metric_key
val_metric_key = self.metric_to_monitor.val_metric_key
history = self.output[version]['History'].history

best_val_index = np.argmax(history[train_metric_key])
best_train_metric = history[train_metric_key][best_val_index]
best_val_metric = history[val_metric_key][best_val_index]
result_dic = { }

return {
f'best_train_{self.metric_to_monitor.name.lower()}': best_train_metric,
f'best_val_{self.metric_to_monitor.name.lower()}': best_val_metric,
}
for metric in metric_to_monitors:
best_val_index = np.argmax(history[metric.validation_metric])
best_val_metric = history[metric.validation_metric][best_val_index]
best_train_metric = history[metric.train_metric][best_val_index]

result_dic[metric.key] = {
f'Train {metric.title}': best_train_metric,
f'Best Validation {metric.title}': best_val_metric,
}

def get_best_report(self, version):
return result_dic

def get_best_reports(self, version, metric_to_monitors=[Metric.VAL_ACCURACY]):
"""
Get the best model report for a specific model version.
Expand All @@ -173,78 +156,50 @@ def get_best_report(self, version):
if version not in self.output:
return None

metrics = self.get_best_metric(version)
metrics_dict = self.get_best_metrics(version, metric_to_monitors)

return {
'version': version,
f'best_train_{self.metric_to_monitor.name.lower()}': metrics[f'best_train_{self.metric_to_monitor.name.lower()}'],
f'best_val_{self.metric_to_monitor.name.lower()}': metrics[f'best_val_{self.metric_to_monitor.name.lower()}'],
'duration': self.output[version]['duration'],
'best_model_path': self.output[version]['best_model_path'],
'board_path': self.output[version]['board_path'],
result_dict = {
'Info': {
'Version': version,
'Duration': self.output[version]['Duration'],
'Best Model Path': self.output[version]['Best Model Path']
}
}

def show_report(self):
result_dict.update(metrics_dict)

return result_dict

def show_all_version_report(self, metric_to_monitors=[Metric.VAL_ACCURACY]):
"""
Display a tabular report of the best model performance.
"""
# Initialize the report DataFrame
columns = ['version', f'best_train_{self.metric_to_monitor.name.lower()}', f'best_val_{self.metric_to_monitor.name.lower()}', 'duration', 'best_model_path', 'board_path']

df = pd.DataFrame(columns=columns)

show_info = True
for version in self.output.keys():
# Get the best training and validation metric for this version
report = self.get_best_report(version)
self.show_report(version, metric_to_monitors, show_info)
show_info = False

# Add the data to the DataFrame
df = pd.concat([df, pd.DataFrame([report])], ignore_index=True)

# Set 'version' as the index of the DataFrame
df.set_index('version', inplace=True)

# Apply formatting to the duration and metric columns
df['duration'] = df['duration'].apply(lambda x: "{:.2f}".format(x))

metric_columns = [f'best_train_{self.metric_to_monitor.name.lower()}', f'best_val_{self.metric_to_monitor.name.lower()}']
df[metric_columns] = df[metric_columns].applymap(lambda x: "{:.2f}".format(x*100) if self.metric_to_monitor != Metric.VAL_ACCURACY else "{:.2f}%".format(x))

# Highlight the maximum in the metric column
styled_df = df.style.highlight_max(subset=[f'best_val_{self.metric_to_monitor.name.lower()}'], color='lightgreen')

# Display the report
display(styled_df)


def show_best_result(self, version):
def show_report(self, version, metric_to_monitors=[Metric.VAL_ACCURACY], highlight_max_color='lightgreen', show_info=True):
"""
Display the result (best train metric, best validation metric, and duration) for a specific model version.
Parameters
----------
version : str
The model version for which the result will be displayed.
Display a tabular report of the best model performance.
"""
if version not in self.output:
show_text("b", f"No result available for {version}")

result = self.get_best_report(version)
# Get the best training and validation metric for this version
report = self.get_best_reports(version, metric_to_monitors)

if result is not None:
best_train_metric = result.get(f'best_train_{self.metric_to_monitor.name.lower()}', None)
best_val_metric = result.get(f'best_val_{self.metric_to_monitor.name.lower()}', None)
duration = result.get('duration', None)
if show_info:
info_df = pd.DataFrame(report['Info'])
info_df['Duration'] = info_df['Duration'].apply(lambda x: "{:.2f}".format(x))
display(info_df)

metric_name = self.metric_to_monitor.name.lower()
metric_suffix = '%' if self.metric_to_monitor != Metric.VAL_ACCURACY else ''
for metric in metric_to_monitors:
metric_best_report_df = pd.DataFrame(report[metric.key])
styled_df = metric_best_report_df.style.highlight_max(subset=[f'Best Validation {metric.title}'], color=highlight_max_color)

if best_train_metric is not None and best_val_metric is not None and duration is not None:
show_text("b", f"Train {metric_name.capitalize()} = {best_train_metric * 100:.2f}{metric_suffix} - Validation {metric_name.capitalize()} = {best_val_metric * 100:.2f}{metric_suffix} - Duration = {duration:.2f}")
else:
show_text("b", f"Result not available for version {version}")
else:
show_text("b", f"Version {version} not found in the output")
display(styled_df)

def new_version(self, version):
self.output[version] = {}

def start_timer(self, version):
"""
Expand All @@ -255,7 +210,7 @@ def start_timer(self, version):
version : str
The name of the model version for which to start the timer.
"""
self.output[version]['duration'] = time.time()
self.output[version]['start_time'] = time.time()

def stop_timer(self, version):
"""
Expand All @@ -266,9 +221,10 @@ def stop_timer(self, version):
version : str
The name of the model version for which to stop the timer.
"""
if self.output[version]['duration'] is not None:
duration = time.time() - self.output[version]['duration']
self.output[version]['duration'] = duration
start_time = self.output[version].pop('start_time', None)
if start_time is not None:
end_time = time.time()
self.output[version]['Duration'] = end_time - start_time

def add_best_model_path(self, version, path):
"""
Expand All @@ -281,20 +237,8 @@ def add_best_model_path(self, version, path):
link : str
The link or path to the best model.
"""
self.output[version]['best_model_path'] = path
self.output[version]['Best Model Path'] = path

def add_board_path(self, version, path):
"""
Add the link of the tensor board for the specified model version.
Parameters
----------
version : str
The name of the model version for which to add the tensor board link.
link : str
The link or path to the tensor board.
"""
self.output[version]['board_path'] = path

def add_history(self, version, history):
"""
Expand All @@ -307,11 +251,12 @@ def add_history(self, version, history):
history : dict
The accuracy score to be added.
"""
self.output[version]['history'] = history
self.output[version]['History'] = history

def show_history(
self,
version,
metric_to_monitors=[Metric.VAL_ACCURACY],
figsize=(8,6)
):
"""
Expand All @@ -325,7 +270,7 @@ def show_history(
-----------
history : dict
The history object typically returned from the .fit() method of a Keras model. It should
have a 'history' attribute containing the training and validation metrics.
have a 'History' attribute containing the training and validation metrics.
figsize : tuple, optional
The width and height in inches for the figure. Defaults to (8,6).
Expand All @@ -334,7 +279,7 @@ def show_history(
A nested dictionary defining the metrics to be plotted.
- The top-level key corresponds to the main category (e.g., 'Accuracy' or 'Loss').
- The associated nested dictionary's keys are the curve labels (e.g., 'Training Accuracy')
and the values are the corresponding metric names in the 'history' object (e.g., 'accuracy').
and the values are the corresponding metric names in the 'History' object (e.g., 'accuracy').
Defaults to plotting both training and validation accuracy and loss.
Example:
Expand All @@ -355,9 +300,20 @@ def show_history(
The `plot` parameter allows you to customize which metrics to plot and how they are labeled
in the generated visualization.
"""
history = self.output[version]['history']
plot = self.metric_to_monitor.plot_labels
display(show_history(history, figsize=figsize, plot=plot))
history = self.output[version]['History']
plot_result_dict = {
'Loss': {
'Training Loss': 'loss',
'Validation Loss': 'val_loss'
}
}

for metric in metric_to_monitors:
plot = metric.plot_labels
plot_result_dict.update(plot)

display(show_history(history, figsize=figsize, plot=plot_result_dict))


def get_best_model_path(self, version):
"""
Expand All @@ -374,8 +330,8 @@ def get_best_model_path(self, version):
The path of the best model based on the highest accuracy score.
Returns None if no model has been added or no best model path is available.
"""
report = self.get_best_report(version)
best_model_path = report.get('best_model_path')
report = self.get_best_reports(version)
best_model_path = report['Info'].get('Best Model Path')

if best_model_path is not None:
return best_model_path
Expand Down

0 comments on commit 2998aac

Please sign in to comment.