In [None]:
%matplotlib inline
import composeml as cp
import featuretools as ft
from demo.predict_rul import load_sample
from evalml import AutoMLSearch
from evalml.preprocessing import split_data

In [None]:
df = load_sample()

df.head()

In [None]:
def remaining_useful_life(df):
    return len(df) - 1

In [None]:
lm = cp.LabelMaker(
    target_entity='engine_no',
    time_index='time',
    labeling_function=remaining_useful_life,
)

In [None]:
lt = lm.search(
    df.sort_values('time'),
    num_examples_per_instance=20,
    minimum_data=5,
    gap=20,
    verbose=True,
)

lt.head()

In [None]:
lt.describe()

In [None]:
lt.plot.distribution();

In [None]:
lt = lt.bin(4, quantiles=True, precision=0)

In [None]:
lt.describe()

In [None]:
lt.plot.distribution();

In [None]:
lt.plot.count_by_time();

In [None]:
es = ft.EntitySet('observations')

es.entity_from_dataframe(
    dataframe=df.reset_index(),
    entity_id='recordings',
    index='id',
    time_index='time',
)

es.normalize_entity(
    base_entity_id='recordings',
    new_entity_id='engines',
    index='engine_no',
)

es.normalize_entity(
    base_entity_id='recordings',
    new_entity_id='cycles',
    index='time_in_cycles',
)

es.plot()

In [None]:
X, features = ft.dfs(
    entityset=es,
    target_entity='engines',
    agg_primitives=['sum'],
    trans_primitives=[],
    cutoff_time=lt,
    cutoff_time_in_index=True,
    include_cutoff_time=False,
    verbose=True,
)

X.head()

In [None]:
y = X.pop('remaining_useful_life').cat.codes
datasets = split_data(X, y, test_size=0.2, random_state=0)
X_train, X_holdout, y_train, y_holdout = datasets

In [None]:
automl = AutoMLSearch(problem_type='multiclass', objective='f1_macro')
automl.search(X_train, y_train, data_checks=None)

In [None]:
automl.best_pipeline.describe()
automl.best_pipeline.graph()

In [None]:
best_pipeline = automl.best_pipeline.fit(X_train, y_train)
score = best_pipeline.score(X_holdout, y_holdout, objectives=['f1_macro'])
dict(score)