In [1]:
import pandas as pd

filtered_data = pd.read_csv('processed.csv', index_col=False)

from sklearn.model_selection import train_test_split

# Split the data into features (X) and targets (y)
X = filtered_data.drop(columns=['StartHesitation', 'Turn', 'Walking'])
y = filtered_data[['StartHesitation', 
                   'Turn', 
                   'Walking',
                  ]]

# Split the data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Combine the features and targets back into DataFrames for AutoGluon
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)

In [8]:
from autogluon.tabular import TabularDataset, TabularPredictor
import gc

# Assuming 'train_data' and 'test_data' are DataFrames created from the previous response
# Convert the data to AutoGluon's TabularDataset format
train_data = TabularDataset(train_data)
test_data = TabularDataset(test_data)

# Define the target columns
labels = ['StartHesitation', 'Turn', 'Walking']

# Initialize an empty dictionary to store the predictors
predictors = {}

# Train a separate regression model for each target event type
for label in labels:
    print(f"Training model for {label}...")
    predictor = TabularPredictor(label=label, 
                                 problem_type='regression', 
                                 eval_metric='mean_absolute_error',
                                 ) # regression with R^2 as the evaluation metric
    predictor.fit(train_data, 
                  num_gpus=1, 
                  excluded_model_types=['LightGBMLarge'], 
                  presets=['best_quality']
#                   time_limit=600, 
#                   num_bag_sets = 2, 
#                   hyperparameters = 'light'
)
    predictors[label] = predictor

# Make predictions on the test data for each target event type
predictions = []
for label in labels:
    print(f"Predicting probabilities for {label}...")
    predictor = predictors[label]
    predictions.append(predictor.predict(test_data.drop(columns=label)))
    

# Combine the predictions into a single DataFrame
predictions_df = pd.DataFrame(predictions)
print(predictions_df)


No path specified. Models will be saved in: "AutogluonModels/ag-20230504_211354/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230504_211354/"
AutoGluon Version:  0.7.0
Python Version:     3.10.10
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #42~20.04.1-Ubuntu SMP Wed Mar 1 19:17:41 UTC 2023
Train Data Rows:    7807241
Train Data Columns: 5
Label Column: StartHesitation
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    40667.99 MB
	Train Data (Original)  Memory Usage: 312.29 MB (0.8% of av

Training model for StartHesitation...
