In [1]:
# TimeSeries regressions with tslearn - https://tslearn.readthedocs.io/en/stable/gen_modules/svm/tslearn.svm.TimeSeriesSVR.html

In [2]:
%%bash
ls

01_TurboFan_RUL_Preprocessing-Copy1.ipynb
01_TurboFan_RUL_Preprocessing-Copy2.ipynb
01_TurboFan_RUL_Preprocessing.ipynb
01_TurboFan_tslearn.ipynb
02_TurboFan_RUL_Model_Pred.ipynb
CMAPSSData
Damage Propagation Modeling.pdf
df_test_mvavg_norm_ts001.pkl
df_test_norm_ts001.pkl
df_train_mvavg_norm_ts001.pkl
df_train_norm_ts001.pkl
readme.txt
TurboFan.docx
TurboFan_RUL_2.ipynb
TurboFan_RUL2.ipynb
TurboFan_RUL_2_jm.ipynb
TurboFan_RUL.ipynb
TurboFan_RUL_Moving_Avg.ipynb
TurboFan_RUL_Moving_Avg_v2.ipynb
TurboFan_RUL_Moving_Avg_v3.ipynb


In [3]:
from tqdm import tqdm
import pandas as pd
from tslearn.svm import TimeSeriesSVR

In [6]:
# Load training and test data files generated by "01_TurboFan_RUL_Preprocessing.ipynb"

#normalized features data. ( R^2 - 0.3933)
train_file = "df_train_norm_ts001.pkl" 
test_file = "df_test_norm_ts001.pkl"

#normalized moving average features data (R^2 - 0.2153)
train_file = "df_train_mvavg_norm_ts001.pkl" 
test_file = "df_test_mvavg_norm_ts001.pkl"


df_train = pd.read_pickle(train_file)
df_test = pd.read_pickle(test_file)

features = df_train.columns
label_col = "RUL"
unit_col = "unit"

In [7]:
#Prepare Time Series data
def prepare_ts_data(df_train, window_size=20, hop_size=10):
    """
    Prepare time series data by adding features for past "window_size" timestamps for each data point.
    For each unit, add the features for timestamp after every "hop_size" interval t
    """
    train_data, train_labels = [], []
    
    units = list(set(df_train[unit_col]))
    for unit in tqdm(units):
        df_subset = df_train[(df_train[unit_col]==unit)]
        rows = [row[features] for idx,row in df_subset.iterrows()]
        for end_idx in range(window_size, len(rows), hop_size):
            row_data=[]
            for rowb in rows[end_idx-window_size:end_idx]:
                row_data.append([rowb[feature] for feature in features if feature not in [label_col, unit_col]])
            train_data.append(row_data)
            train_labels.append(rows[end_idx][label_col])
    return train_data, train_labels

window_size = 50
hop_size = 50
train_data, train_labels = prepare_ts_data(df_train, window_size=window_size, hop_size=hop_size)
test_data, test_labels = prepare_ts_data(df_test, window_size=window_size, hop_size=hop_size)

100%|██████████| 100/100 [00:22<00:00,  4.41it/s]
100%|██████████| 100/100 [00:41<00:00,  2.38it/s]


In [None]:
# Fit TimeSeriesSVR model
clf = TimeSeriesSVR(C=1.0, kernel="gak")
clf.fit(train_data, train_labels)

In [None]:
predications=clf.predict(test_data)

In [None]:
#Calculate Score
score=clf.score(test_data, test_labels)
score