In [1]:
import pandas as pd
from autogluon.tabular import TabularDataset, TabularPredictor

# Loading the data
X_train = pd.read_csv('data/single_turbine_data/X_train.csv')
X_test = pd.read_csv('data/single_turbine_data/X_test.csv')
y_train = pd.read_csv('data/single_turbine_data/y_train.csv')
y_test = pd.read_csv('data/single_turbine_data/y_test.csv')

# convert to datetime
X_train['# Date and time'] = pd.to_datetime(X_train['# Date and time'])
X_test['# Date and time'] = pd.to_datetime(X_test['# Date and time'])
y_train['# Date and time'] = pd.to_datetime(y_train['# Date and time'])
y_test['# Date and time'] = pd.to_datetime(y_test['# Date and time'])

# Setting the index
X_train.set_index('# Date and time', inplace=True)
X_test.set_index('# Date and time', inplace=True)
y_train.set_index('# Date and time', inplace=True)
y_test.set_index('# Date and time', inplace=True)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def create_time_features(df):
    """
    Function to create time features from a date column in a dataframe.
    
    Parameters:
    df (pandas.DataFrame): Dataframe containing the data
    date_col (str): Column in the dataframe containing the date information
    
    Returns:
    pandas.DataFrame: Dataframe with added time features (hour, day of week, month)
    """
    df['hour'] = df.index.hour
    df['day_of_week'] = df.index.dayofweek
    df['month'] = df.index.month
    
    return df

In [8]:
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)

In [9]:
train_data = create_time_features(train_data)
test_data = create_time_features(test_data)

In [11]:
# Assuming 'train_data' and 'test_data' are DataFrames created from the previous response
# Convert the data to AutoGluon's TabularDataset format
train_data = TabularDataset(train_data)
test_data = TabularDataset(test_data)

# Define the target columns
label = '1_Gear oil temperature (°C)'

# Initialize an empty dictionary to store the predictors
predictors = {}

# Train a separate regression model for each target event type
predictor = TabularPredictor(label=label, 
                              problem_type='regression', 
                              eval_metric='root_mean_squared_error') # regression with R^2 as the evaluation metric
predictor.fit(train_data, 
              # excluded_model_types=['LightGBMLarge'], 
#                   time_limit=600, 
#                   num_bag_sets = 2, 
            #   hyperparameters = 'light',
              )

No path specified. Models will be saved in: "AutogluonModels/ag-20230724_154150/"
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230724_154150/"
AutoGluon Version:  0.8.2
Python Version:     3.10.12
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 22.5.0: Thu Jun  8 22:22:23 PDT 2023; root:xnu-8796.121.3~7/RELEASE_ARM64_T6020
Disk Space Avail:   401.00 GB / 1995.22 GB (20.1%)
Train Data Rows:    158731
Train Data Columns: 35
Label Column: 1_Gear oil temperature (°C)
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    83743.01 MB
	Train Data (Original)  Memory Usage: 44.44 MB (0.1% of available memory

[1000]	valid_set's rmse: 0.506699
[2000]	valid_set's rmse: 0.444599
[3000]	valid_set's rmse: 0.41545
[4000]	valid_set's rmse: 0.396962
[5000]	valid_set's rmse: 0.382878
[6000]	valid_set's rmse: 0.372811
[7000]	valid_set's rmse: 0.365483
[8000]	valid_set's rmse: 0.358871
[9000]	valid_set's rmse: 0.353302
[10000]	valid_set's rmse: 0.348996


	-0.349	 = Validation score   (-root_mean_squared_error)
	83.04s	 = Training   runtime
	0.25s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.436521
[2000]	valid_set's rmse: 0.391375
[3000]	valid_set's rmse: 0.371212
[4000]	valid_set's rmse: 0.35918
[5000]	valid_set's rmse: 0.349856
[6000]	valid_set's rmse: 0.343833
[7000]	valid_set's rmse: 0.338951
[8000]	valid_set's rmse: 0.335746
[9000]	valid_set's rmse: 0.332751
[10000]	valid_set's rmse: 0.330605


	-0.3306	 = Validation score   (-root_mean_squared_error)
	78.51s	 = Training   runtime
	0.22s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.5243	 = Validation score   (-root_mean_squared_error)
	135.01s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: CatBoost ...
	-0.3316	 = Validation score   (-root_mean_squared_error)
	70.45s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.5132	 = Validation score   (-root_mean_squared_error)
	15.55s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.4335	 = Validation score   (-root_mean_squared_error)
	217.49s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	-0.3436	 = Validation score   (-root_mean_squared_error)
	55.61s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.7183	 = Validation score   (-root_mean_squared_error)
	260.59s	 = Training   runtime
	0.04s	 = Valid

[1000]	valid_set's rmse: 0.383054
[2000]	valid_set's rmse: 0.358965
[3000]	valid_set's rmse: 0.348731
[4000]	valid_set's rmse: 0.34352
[5000]	valid_set's rmse: 0.340326
[6000]	valid_set's rmse: 0.338388
[7000]	valid_set's rmse: 0.336728
[8000]	valid_set's rmse: 0.335769
[9000]	valid_set's rmse: 0.335047
[10000]	valid_set's rmse: 0.334158


	-0.3342	 = Validation score   (-root_mean_squared_error)
	247.78s	 = Training   runtime
	0.34s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	-0.307	 = Validation score   (-root_mean_squared_error)
	0.1s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 1168.52s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20230724_154150/")


<autogluon.tabular.predictor.predictor.TabularPredictor at 0x16ac578e0>

In [12]:
predictor.evaluate(test_data, silent=True)

{'root_mean_squared_error': -0.5252737585954804,
 'mean_squared_error': -0.27591252146902306,
 'mean_absolute_error': -0.3441628673795754,
 'r2': 0.9901219452414469,
 'pearsonr': 0.9952454171820333,
 'median_absolute_error': -0.2401134490966932}