# Trying different techniques to find best method

This also tries ensemble methods to see wether it can improve the accuracy 

In [None]:
import pandas as pd
from flaml import AutoML
from sklearn.model_selection import train_test_split
from flaml.data import get_output_from_log
import seaborn as sns
# Load dataset
df = pd.read_csv("your_preprocessed_data.csv")


# Split data into features (X) and target (y)
X = df.drop('posture', axis=1)
y = df['posture']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# FLAML Setup for Multiple Algorithms
automl = AutoML()
settings = {
    "time_budget": 300,           # Time limit in seconds
    "metric": 'accuracy',          # Classification metric 
    "task": 'classification',
    "estimator_list": ['lgbm', 'xgboost', 'rf', 'xgb_limitdepth'],   # Include various algorithms 
    "log_file_name": 'flaml_multi_algo_posture.log',
    "eval_method": "holdout",      # Use holdout validation 
    "early_stop": True,           
    "n_splits": 5,                
    "mem_thres": 1024 * 8,       
    "ensemble": True,              # Enable model ensembling (optional)
    "n_jobs": -1,
}

# Run FLAML's AutoML process
automl.fit(X_train=X_train, y_train=y_train, **settings)

# Analyze Results
print(f"Best estimator: {automl.best_estimator}")
print(f"Best hyperparameters: {automl.best_config}")
print(f"Best loss (accuracy): {-automl.best_loss:.4f}")

# Compare Performance of Different Models
# (Extract from log file as shown in previous plotting code)
df_results = get_output_from_log(log_file_name='flaml_multi_algo_posture.log', time_budget=300)


# Now plots to get visual representation of the training

## Learning Curves

In [None]:
import matplotlib.pyplot as plt
from flaml.data import get_output_from_log
import seaborn as sns

# Load FLAML's log file (replace with your actual log file name)
df = get_output_from_log(log_file_name='flaml_lgbm_posture.log', time_budget=300)

# 1. Learning Curves:
plt.figure(figsize=(12, 6))
for config_id, config_data in df.groupby('config_id'):
    plt.plot(config_data['wall_clock_time'], config_data['val_loss'], label=f'Config {config_id}')
plt.xlabel('Time (s)')
plt.ylabel('Validation Loss (Accuracy)')
plt.title('Learning Curves for Different Configurations')
plt.legend()
plt.show()




## Hyperparameter Importance

In [None]:
# 2. Hyperparameter Importance:
importances = automl.model.estimator.feature_importances_
features = X.columns
plt.figure(figsize=(10, 6))
sns.barplot(x=features, y=importances)
plt.xticks(rotation=45)
plt.xlabel('Features')
plt.ylabel('Importance')
plt.title('Feature Importance')
plt.show()


## Confusion Matrix

In [None]:
# 3. Confusion Matrix:
from sklearn.metrics import confusion_matrix
y_pred = automl.predict(X)
cm = confusion_matrix(y, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=automl.classes_, yticklabels=automl.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

##  Precision-Recall Curve (for each posture class)

In [None]:
# 4. Precision-Recall Curve (for each posture class):
from sklearn.metrics import precision_recall_curve
from sklearn.preprocessing import label_binarize

y_bin = label_binarize(y, classes=automl.classes_)
n_classes = y_bin.shape[1]

plt.figure(figsize=(10, 8))
for i in range(n_classes):
    precision, recall, _ = precision_recall_curve(y_bin[:, i], automl.predict_proba(X)[:, i])
    plt.plot(recall, precision, label=f'Class {automl.classes_[i]}')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

## Classification Report (summary of metrics)

In [None]:

# 5. Classification Report (summary of metrics):
from sklearn.metrics import classification_report
print(classification_report(y, y_pred, target_names=automl.classes_))