In [1]:
# # Create a directory to save model performance data if it doesn't exist
# PERFORMANCE_SAVE_DIR = '/content/drive/MyDrive/Colab Notebooks/performance_data'
# os.makedirs(PERFORMANCE_SAVE_DIR, exist_ok=True)
# print(f"Performance data will be saved in: {PERFORMANCE_SAVE_DIR}")

In [2]:
# --- Step 1: Mount your Google Drive ---
# This will prompt you for authorization. Follow the link to get your code.
from google.colab import drive
drive.mount('/content/drive')
print("✅ Google Drive mounted successfully.")

Mounted at /content/drive
✅ Google Drive mounted successfully.


In [3]:
import os
import json
import pandas as pd

# Define the directory where the performance data is saved
PERFORMANCE_SAVE_DIR = '/content/drive/MyDrive/Colab Notebooks/performance_data'

# Create a list to store the performance data from each model
all_performance_data = []

# Iterate through the files in the directory
for filename in os.listdir(PERFORMANCE_SAVE_DIR):
    if filename.endswith(".json"):
        filepath = os.path.join(PERFORMANCE_SAVE_DIR, filename)
        try:
            with open(filepath, 'r') as f:
                data = json.load(f)
                all_performance_data.append(data)
        except Exception as e:
            print(f"Error reading file {filename}: {e}")

# Convert the list of dictionaries into a pandas DataFrame
performance_df = pd.DataFrame(all_performance_data)

# Display the DataFrame
print("\nModel Performance Summary:")
display(performance_df)


Model Performance Summary:


Unnamed: 0,model_name,accuracy,f1_score,training_time,roc_auc_score,confusion_matrix,false_positive_rate
0,Random Forest (Raw Data),1.0,1.0,203.417911,1.0,"[[9000, 0], [0, 80000]]",
1,SVM (SMOTE Subset),0.9986,0.9986,2.518111,,"[[1796, 4], [3, 3197]]",0.002222
2,Random Forest (With PCA),0.99927,0.999271,24.390434,1.0,"[[9000, 0], [65, 79935]]",0.0
3,Random Forest (With SMOTE),1.0,1.0,314.686605,,"[[9000, 0], [0, 80000]]",


In [7]:
# Define the directory where the resource usage data is saved
RESOURCE_USAGE_SAVE_DIR = '/content/drive/MyDrive/Colab Notebooks/model_resource_usage_logs'

# Create a list to store the resource usage data from each model
all_resource_usage_data = []

# Iterate through the files in the directory
if os.path.exists(RESOURCE_USAGE_SAVE_DIR):
    for filename in os.listdir(RESOURCE_USAGE_SAVE_DIR):
        # Change from .json to .csv
        if filename.endswith(".csv"):
            filepath = os.path.join(RESOURCE_USAGE_SAVE_DIR, filename)
            try:
                # Use pd.read_csv to read CSV files
                data = pd.read_csv(filepath)
                all_resource_usage_data.append(data)
            except Exception as e:
                print(f"Error reading file {filename}: {e}")

    # Concatenate the list of DataFrames into a single DataFrame
    if all_resource_usage_data:
        resource_usage_df = pd.concat(all_resource_usage_data, ignore_index=True)

        # Display the DataFrame
        print("\nModel Resource Usage Summary:")
        display(resource_usage_df)
    else:
        print("No CSV files found in the directory.")
else:
    print(f"Directory not found: {RESOURCE_USAGE_SAVE_DIR}")


Model Resource Usage Summary:


Unnamed: 0,timestamp,cpu_percent_before,memory_before,cpu_percent_after,memory_after,memory_consumed
0,20250730_205905,91.0,5239107584,10.6,5264928768,25821184
1,20250730_210651,5.5,9323851776,4.5,9328209920,4358144
2,20250801_221615,58.9,5626425344,3.5,5652049920,25624576
3,20250801_222102,3.5,5621174272,3.5,5584646144,-36528128
4,20250802_082609,4.0,5554716672,3.5,5574647808,19931136
5,20250802_084009,57.8,5598089216,86.5,5690544128,92454912
6,20250802_084328,5.0,5671788544,3.0,5626056704,-45731840
7,20250802_190333,59.8,5245718528,2.5,5177602048,-68116480
8,20250802_191346,5.1,3936006144,5.5,3919831040,-16175104
9,20250803_190953,38.7,3932786688,3.5,3957776384,24989696


# Task
Analyze the provided model performance and resource usage data from the files in the "model_resource_usage_logs" folder, focusing on the SVM model's performance and resource consumption. Based on this analysis, write a report outlining next steps and potential improvements for the models, particularly the SVM model.

## Analyze performance data

### Subtask:
Examine the `performance_df` to understand the accuracy, F1 score, training time, and other metrics for each model, paying close attention to the SVM model's performance.


**Reasoning**:
Display the performance_df to inspect the SVM model's performance metrics and compare them to other models as requested in the instructions.



In [8]:
display(performance_df)

# Find the row corresponding to the SVM model
svm_performance = performance_df[performance_df['model_name'] == 'SVM (SMOTE Subset)']

print("\nSVM Model Performance:")
display(svm_performance)

print("\nComparison with other models:")
display(performance_df)

Unnamed: 0,model_name,accuracy,f1_score,training_time,roc_auc_score,confusion_matrix,false_positive_rate
0,Random Forest (Raw Data),1.0,1.0,203.417911,1.0,"[[9000, 0], [0, 80000]]",
1,SVM (SMOTE Subset),0.9986,0.9986,2.518111,,"[[1796, 4], [3, 3197]]",0.002222
2,Random Forest (With PCA),0.99927,0.999271,24.390434,1.0,"[[9000, 0], [65, 79935]]",0.0
3,Random Forest (With SMOTE),1.0,1.0,314.686605,,"[[9000, 0], [0, 80000]]",



SVM Model Performance:


Unnamed: 0,model_name,accuracy,f1_score,training_time,roc_auc_score,confusion_matrix,false_positive_rate
1,SVM (SMOTE Subset),0.9986,0.9986,2.518111,,"[[1796, 4], [3, 3197]]",0.002222



Comparison with other models:


Unnamed: 0,model_name,accuracy,f1_score,training_time,roc_auc_score,confusion_matrix,false_positive_rate
0,Random Forest (Raw Data),1.0,1.0,203.417911,1.0,"[[9000, 0], [0, 80000]]",
1,SVM (SMOTE Subset),0.9986,0.9986,2.518111,,"[[1796, 4], [3, 3197]]",0.002222
2,Random Forest (With PCA),0.99927,0.999271,24.390434,1.0,"[[9000, 0], [65, 79935]]",0.0
3,Random Forest (With SMOTE),1.0,1.0,314.686605,,"[[9000, 0], [0, 80000]]",


## Analyze resource usage data

### Subtask:
Examine the `resource_usage_df` to understand the CPU and memory usage for the models, if the data can be linked to specific models.


**Reasoning**:
Examine the structure and content of the `resource_usage_df` to understand the available information and identify potential links to specific models.



In [9]:
# Examine the structure and content of resource_usage_df
print("Resource Usage DataFrame Info:")
resource_usage_df.info()

print("\nResource Usage DataFrame Head:")
display(resource_usage_df.head())

Resource Usage DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   timestamp           14 non-null     object 
 1   cpu_percent_before  14 non-null     float64
 2   memory_before       14 non-null     int64  
 3   cpu_percent_after   14 non-null     float64
 4   memory_after        14 non-null     int64  
 5   memory_consumed     14 non-null     int64  
dtypes: float64(2), int64(3), object(1)
memory usage: 804.0+ bytes

Resource Usage DataFrame Head:


Unnamed: 0,timestamp,cpu_percent_before,memory_before,cpu_percent_after,memory_after,memory_consumed
0,20250730_205905,91.0,5239107584,10.6,5264928768,25821184
1,20250730_210651,5.5,9323851776,4.5,9328209920,4358144
2,20250801_221615,58.9,5626425344,3.5,5652049920,25624576
3,20250801_222102,3.5,5621174272,3.5,5584646144,-36528128
4,20250802_082609,4.0,5554716672,3.5,5574647808,19931136


**Reasoning**:
Based on the examination of `resource_usage_df`, the 'timestamp' column appears to be the only potential link to specific models. However, without a direct mapping between these timestamps and the timestamps or training times in the `performance_df`, it's not possible to directly link resource usage to specific models. Therefore, the next step is to note this limitation and analyze the overall trends in resource usage across all logged events.



In [10]:
# Check for a direct link between resource usage and model performance data
print("Checking for a direct link between resource usage timestamps and model training times...")

# Compare timestamps in resource_usage_df with training times in performance_df
# There's no clear mapping between the timestamps in resource_usage_df and the training times
# or timestamps in performance_df. The timestamp format in resource_usage_df
# (YYYYMMDD_HHMMSS) doesn't directly correspond to the training_time in seconds
# or any timestamp format in performance_df. There is no column in resource_usage_df
# that explicitly names the model being run.

print("No direct link found between resource usage logs and specific models based on available data.")

# Analyze overall trends in resource usage
print("\nAnalyzing overall trends in resource usage:")
print("Average CPU Percent Before:", resource_usage_df['cpu_percent_before'].mean())
print("Average CPU Percent After:", resource_usage_df['cpu_percent_after'].mean())
print("Average Memory Before (bytes):", resource_usage_df['memory_before'].mean())
print("Average Memory After (bytes):", resource_usage_df['memory_after'].mean())
print("Average Memory Consumed (bytes):", resource_usage_df['memory_consumed'].mean())

print("\nMaximum Memory Consumed (bytes):", resource_usage_df['memory_consumed'].max())
print("Minimum Memory Consumed (bytes):", resource_usage_df['memory_consumed'].min())

# Summarize findings
print("\nSummary of Resource Usage Analysis:")
print("The 'resource_usage_df' contains timestamps, CPU usage before and after, and memory usage before, after, and consumed for various events.")
print("There is no explicit column or clear way to directly link these resource usage logs to specific models (e.g., SVM, Random Forest).")
print("The analysis of resource usage is limited to overall trends across all logged events.")
print("Overall, the data shows average CPU usage before and after events, and memory consumption. Some events show significant memory consumption, while others show negative memory consumption (indicating memory release).")


Checking for a direct link between resource usage timestamps and model training times...
No direct link found between resource usage logs and specific models based on available data.

Analyzing overall trends in resource usage:
Average CPU Percent Before: 24.62857142857143
Average CPU Percent After: 10.435714285714285
Average Memory Before (bytes): 5272951661.714286
Average Memory After (bytes): 5272464822.857142
Average Memory Consumed (bytes): -486838.85714285716

Maximum Memory Consumed (bytes): 92454912
Minimum Memory Consumed (bytes): -68116480

Summary of Resource Usage Analysis:
The 'resource_usage_df' contains timestamps, CPU usage before and after, and memory usage before, after, and consumed for various events.
There is no explicit column or clear way to directly link these resource usage logs to specific models (e.g., SVM, Random Forest).
The analysis of resource usage is limited to overall trends across all logged events.
Overall, the data shows average CPU usage before and

## Identify areas for improvement

### Subtask:
Based on the analysis of both `performance_df` and the overall trends observed in `resource_usage_df`, identify potential areas for improvement for the models, particularly the SVM model. This could include improving performance metrics, reducing training time, or optimizing resource usage.


**Reasoning**:
Review the performance of the SVM model and compare it to other models, considering the resource usage trends for potential improvements.



In [11]:
print("Reviewing SVM Model Performance and Potential Improvements:")

# 1. Review SVM performance metrics
print("\nSVM Model Performance (from svm_performance):")
display(svm_performance)

# 2. Compare SVM to other models (from performance_df)
print("\nComparison of SVM with Other Models:")
display(performance_df)

# 3. Consider training time
print("\nSVM Training Time vs. Other Models:")
print(f"SVM Training Time: {svm_performance['training_time'].values[0]:.2f} seconds")
print("Other models training times:")
display(performance_df[['model_name', 'training_time']])

# 4. Review overall resource usage trends (from resource_usage_df)
print("\nOverall Resource Usage Trends (from resource_usage_df):")
print("Average CPU Percent Before:", resource_usage_df['cpu_percent_before'].mean())
print("Average CPU Percent After:", resource_usage_df['cpu_percent_after'].mean())
print("Average Memory Before (bytes):", resource_usage_df['memory_before'].mean())
print("Average Memory After (bytes):", resource_usage_df['memory_after'].mean())
print("Average Memory Consumed (bytes):", resource_usage_df['memory_consumed'].mean())
print("Maximum Memory Consumed (bytes):", resource_usage_df['memory_consumed'].max())

# 5. Identify potential areas for improvement for the SVM model
print("\nPotential Areas for Improvement for SVM Model:")

# Based on performance_df:
print("- Performance Metrics: SVM has slightly lower accuracy/F1 than Random Forest models. Could explore hyperparameter tuning or different kernels to potentially improve accuracy/F1 without significantly increasing training time.")
print(f"- Training Time: SVM's training time ({svm_performance['training_time'].values[0]:.2f}s) is significantly faster than Random Forest models trained on raw data or SMOTE data (over 200s and 300s respectively). It is comparable to Random Forest with PCA (24s). The current training time seems very good.")

# Based on resource_usage_df (overall trends):
print("- Resource Usage: While resource usage data is not directly linked to SVM, the overall trends show memory consumption varies significantly between logged events. If SVM training or prediction were found to be resource-intensive (which is less likely given its fast training time), optimizing data handling or model complexity could be considered. However, given the fast training time, resource usage for SVM training is likely low.")

# General improvement areas for other models based on performance_df:
print("\nGeneral Improvement Areas for Other Models (Based on performance_df):")
print("- Random Forest (Raw Data) and Random Forest (With SMOTE): These models have very high accuracy/F1 but significantly long training times. Investigating ways to reduce training time (e.g., feature selection, dimensionality reduction like PCA, or optimizing hyperparameters related to tree complexity/ensemble size) would be beneficial.")
print("- Random Forest (With PCA): Achieves high accuracy/F1 and a much faster training time than RF (Raw/SMOTE). This suggests dimensionality reduction was effective. Further tuning of PCA components or RF hyperparameters could potentially find a better balance.")

Reviewing SVM Model Performance and Potential Improvements:

SVM Model Performance (from svm_performance):


Unnamed: 0,model_name,accuracy,f1_score,training_time,roc_auc_score,confusion_matrix,false_positive_rate
1,SVM (SMOTE Subset),0.9986,0.9986,2.518111,,"[[1796, 4], [3, 3197]]",0.002222



Comparison of SVM with Other Models:


Unnamed: 0,model_name,accuracy,f1_score,training_time,roc_auc_score,confusion_matrix,false_positive_rate
0,Random Forest (Raw Data),1.0,1.0,203.417911,1.0,"[[9000, 0], [0, 80000]]",
1,SVM (SMOTE Subset),0.9986,0.9986,2.518111,,"[[1796, 4], [3, 3197]]",0.002222
2,Random Forest (With PCA),0.99927,0.999271,24.390434,1.0,"[[9000, 0], [65, 79935]]",0.0
3,Random Forest (With SMOTE),1.0,1.0,314.686605,,"[[9000, 0], [0, 80000]]",



SVM Training Time vs. Other Models:
SVM Training Time: 2.52 seconds
Other models training times:


Unnamed: 0,model_name,training_time
0,Random Forest (Raw Data),203.417911
1,SVM (SMOTE Subset),2.518111
2,Random Forest (With PCA),24.390434
3,Random Forest (With SMOTE),314.686605



Overall Resource Usage Trends (from resource_usage_df):
Average CPU Percent Before: 24.62857142857143
Average CPU Percent After: 10.435714285714285
Average Memory Before (bytes): 5272951661.714286
Average Memory After (bytes): 5272464822.857142
Average Memory Consumed (bytes): -486838.85714285716
Maximum Memory Consumed (bytes): 92454912

Potential Areas for Improvement for SVM Model:
- Performance Metrics: SVM has slightly lower accuracy/F1 than Random Forest models. Could explore hyperparameter tuning or different kernels to potentially improve accuracy/F1 without significantly increasing training time.
- Training Time: SVM's training time (2.52s) is significantly faster than Random Forest models trained on raw data or SMOTE data (over 200s and 300s respectively). It is comparable to Random Forest with PCA (24s). The current training time seems very good.
- Resource Usage: While resource usage data is not directly linked to SVM, the overall trends show memory consumption varies sign

## Synthesize findings and recommendations

### Subtask:
Combine the insights from both datasets to write a report summarizing the performance and resource usage of the models, highlighting the SVM model, and recommending next steps and potential improvements.


**Reasoning**:
Combine the insights from both datasets to write a report summarizing the performance and resource usage of the models, highlighting the SVM model, and recommending next steps and potential improvements based on the previous analysis.



In [12]:
# Generate the report
report = """
## Model Performance and Resource Usage Report

This report summarizes the performance and resource usage of various models trained for the task, with a focus on the SVM model and recommendations for future improvements.

### 1. Model Performance Summary

The following table presents the key performance metrics for all evaluated models:

"""
report += performance_df.to_markdown(index=False)
report += """

As observed, the Random Forest models trained on Raw Data and With SMOTE achieved perfect accuracy and F1 scores (1.0). The Random Forest with PCA model also performed very well, with high accuracy (0.99927) and F1 score (0.999271).

The **SVM model (SMOTE Subset)** achieved high accuracy (0.99860) and F1 score (0.998600), which are slightly lower than the top-performing Random Forest models. However, it demonstrated a remarkably fast training time of **2.52 seconds**, significantly faster than the Random Forest models on Raw Data (203.42s) and With SMOTE (314.69s). The Random Forest with PCA had a training time of 24.39s, which is faster than the other RF models but still considerably slower than the SVM.

The SVM model's confusion matrix was [[1796, 4], [3, 3197]], indicating a small number of misclassifications. Its false positive rate was 0.002222, slightly higher than the Random Forest with PCA model (0.000000) but lower than the Random Forest models on Raw Data and With SMOTE (which had NaN for this metric, likely due to perfect recall making the calculation undefined).

### 2. Resource Usage Analysis

The resource usage data collected includes timestamps, CPU percentage before and after events, and memory usage before, after, and consumed.

"""
if 'resource_usage_df' in locals() and not resource_usage_df.empty:
    report += """
Overall Resource Usage Trends:
- Average CPU Percent Before: {:.2f}
- Average CPU Percent After: {:.2f}
- Average Memory Before: {:.2f} bytes
- Average Memory After: {:.2f} bytes
- Average Memory Consumed: {:.2f} bytes
- Maximum Memory Consumed: {:.2f} bytes

""".format(
    resource_usage_df['cpu_percent_before'].mean(),
    resource_usage_df['cpu_percent_after'].mean(),
    resource_usage_df['memory_before'].mean(),
    resource_usage_df['memory_after'].mean(),
    resource_usage_df['memory_consumed'].mean(),
    resource_usage_df['memory_consumed'].max()
)

report += """
A key limitation is that **the resource usage logs could not be directly linked to specific models** or their training/prediction phases. The timestamps in the resource usage data do not align with the training times recorded for each model, and there is no model identifier in the resource usage logs. Therefore, the resource usage analysis provides general trends across all logged events rather than model-specific consumption.

The data indicates varying levels of CPU and memory usage during different logged events. Some events show significant memory consumption, while others show negative memory consumption (memory release).

### 3. Performance and Resource Usage Trade-offs

Analyzing the performance and resource usage together highlights trade-offs:

- **Random Forest (Raw Data) and (With SMOTE):** Achieve perfect performance but at the cost of very long training times and potentially high, but unquantified, resource usage during training.
- **Random Forest (With PCA):** Represents a better trade-off, maintaining high performance while significantly reducing training time compared to the other RF models. Resource usage is not directly linked but is likely lower during training due to the reduced dimensionality.
- **SVM (SMOTE Subset):** Offers a different trade-off – slightly lower performance than the top RF models but exceptionally fast training time. This suggests efficient resource utilization during training, though specific usage is not available.

The SVM model stands out for its speed, making it potentially suitable for scenarios where rapid model training or frequent retraining is required, even if it means a marginal reduction in peak performance compared to the slowest models.

### 4. Recommendations and Potential Improvements

Based on the analysis, the following next steps and potential improvements are recommended:

#### For the SVM Model:
- **Hyperparameter Tuning:** Explore tuning SVM hyperparameters (e.g., C, gamma, kernel type) to potentially improve accuracy and F1 score. The goal would be to try and match the performance of the top Random Forest models while maintaining a reasonably fast training time.
- **Kernel Exploration:** Experiment with different kernel functions beyond the default (likely RBF) to see if a different decision boundary can improve performance on this specific dataset.
- **Resource Profiling:** Although the training time is fast, if resource usage becomes a concern for larger datasets or in deployment, implement specific profiling tools to monitor SVM's CPU and memory usage during training and prediction.
- **Evaluate on Different Data Subsets/Preprocessing:** While SMOTE helped balance the dataset, evaluating the SVM's performance and resource usage on other balanced subsets or with alternative preprocessing techniques could be beneficial.

#### For Other Models:
- **Reduce Training Time (Random Forest - Raw and SMOTE):** Prioritize methods to significantly reduce the training time of the Random Forest models on Raw Data and With SMOTE. This could involve:
    - **Feature Selection:** Identifying and using only the most relevant features.
    - **Further Dimensionality Reduction:** Exploring techniques like t-SNE or UMAP if PCA is not sufficient or suitable.
    - **Hyperparameter Optimization:** Tuning Random Forest specific hyperparameters like `n_estimators`, `max_depth`, `min_samples_split`, etc., to find a balance between performance and speed.
    - **Algorithm Variations:** Investigating faster ensemble methods or implementations.
- **Optimize Random Forest (With PCA):** Fine-tune the number of PCA components and Random Forest hyperparameters to potentially find an even better balance between the high performance achieved and the reduced training time.
- **Consistent Resource Logging:** Implement a more robust system for logging resource usage that explicitly links the logs to the specific model being trained or used for prediction. This would allow for a more detailed analysis of resource consumption per model and help identify bottlenecks.

### 5. Conclusion

The SVM model demonstrates excellent speed with high performance, making it a strong contender for certain applications. However, there is potential to further improve its performance metrics through tuning. The Random Forest models achieve slightly higher performance but at a significant cost in training time (except for the PCA version). Future work should focus on optimizing the performance of the SVM model and drastically reducing the training time of the slower Random Forest models, while also implementing better resource usage tracking for more insightful analysis.
"""

print(report)


## Model Performance and Resource Usage Report

This report summarizes the performance and resource usage of various models trained for the task, with a focus on the SVM model and recommendations for future improvements.

### 1. Model Performance Summary

The following table presents the key performance metrics for all evaluated models:

| model_name                 |   accuracy |   f1_score |   training_time |   roc_auc_score | confusion_matrix         |   false_positive_rate |
|:---------------------------|-----------:|-----------:|----------------:|----------------:|:-------------------------|----------------------:|
| Random Forest (Raw Data)   |    1       |   1        |       203.418   |               1 | [[9000, 0], [0, 80000]]  |          nan          |
| SVM (SMOTE Subset)         |    0.9986  |   0.9986   |         2.51811 |             nan | [[1796, 4], [3, 3197]]   |            0.00222222 |
| Random Forest (With PCA)   |    0.99927 |   0.999271 |        24.3904  |         

## Present the report

### Subtask:
Present the generated report to the user.


**Reasoning**:
Display the generated report contained in the `report` variable.



In [13]:
print(report)


## Model Performance and Resource Usage Report

This report summarizes the performance and resource usage of various models trained for the task, with a focus on the SVM model and recommendations for future improvements.

### 1. Model Performance Summary

The following table presents the key performance metrics for all evaluated models:

| model_name                 |   accuracy |   f1_score |   training_time |   roc_auc_score | confusion_matrix         |   false_positive_rate |
|:---------------------------|-----------:|-----------:|----------------:|----------------:|:-------------------------|----------------------:|
| Random Forest (Raw Data)   |    1       |   1        |       203.418   |               1 | [[9000, 0], [0, 80000]]  |          nan          |
| SVM (SMOTE Subset)         |    0.9986  |   0.9986   |         2.51811 |             nan | [[1796, 4], [3, 3197]]   |            0.00222222 |
| Random Forest (With PCA)   |    0.99927 |   0.999271 |        24.3904  |         

## Summary:

### Q&A
**What is the SVM model's performance and resource consumption?**

The SVM model achieved an accuracy and F1 score of 0.9986, which is slightly lower than the top-performing Random Forest models. However, it had a remarkably fast training time of 2.52 seconds, significantly outperforming the Random Forest models on raw and SMOTE data, which took over 200 and 300 seconds, respectively. The specific resource consumption for the SVM model couldn't be determined as the resource usage logs were not directly linkable to specific models. However, given its fast training time, the SVM model's resource usage during training is likely to be low.

### Data Analysis Key Findings
* The SVM model demonstrated excellent training speed (2.52 seconds) but slightly lower performance (0.9986 accuracy/F1 score) compared to Random Forest models.
* Random Forest models on raw and SMOTE data achieved perfect performance scores but had extremely long training times (over 200s and 300s).
* The Random Forest model with PCA offered a good balance, with high performance and a much-reduced training time (24.39 seconds).
* The provided resource usage data could not be directly linked to specific models, limiting the analysis to overall trends rather than model-specific consumption.

### Insights or Next Steps
* **SVM Optimization:** Focus on hyperparameter tuning (e.g., C, gamma, kernel) for the SVM model to potentially match the performance of the Random Forest models while maintaining its speed advantage.
* **Improved Logging:** Implement a more robust logging system that explicitly links resource usage to the specific model being trained or used for prediction to enable more detailed resource consumption analysis.
