In [6]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nasa-cmaps/CMaps/RUL_FD002.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD003.txt
/kaggle/input/nasa-cmaps/CMaps/Damage Propagation Modeling.pdf
/kaggle/input/nasa-cmaps/CMaps/readme.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD003.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD004.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD004.txt
/kaggle/input/nasa-cmaps/CMaps/x.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD002.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD001.txt
/kaggle/input/nasa-cmaps/CMaps/train_FD002.txt
/kaggle/input/nasa-cmaps/CMaps/RUL_FD001.txt
/kaggle/input/nasa-cmaps/CMaps/RUL_FD004.txt
/kaggle/input/nasa-cmaps/CMaps/RUL_FD003.txt
/kaggle/input/nasa-cmaps/CMaps/test_FD001.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/RUL_FD002.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/test_FD003.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/Damage Propagation Modeling.pdf
/kaggle/input/nasa-cmaps/cmaps/CMaps/readme.txt
/kaggle/input/nasa-cmaps/cmaps/CMaps/train_FD003.txt
/kaggle/input/nasa-cmaps/cmaps/CM

In [7]:
from sklearn.model_selection import train_test_split


from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso, HuberRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score


In [8]:
index_names = ['unit_number', 'time_cycles']
setting_names = ['setting_1', 'setting_2', 'setting_3']
sensor_names = ['s_{}'.format(i+1) for i in range(0,21)]
col_names = index_names + setting_names + sensor_names

dftrain = pd.read_csv('../input/nasa-cmaps/CMaps/train_FD001.txt',sep='\s+',header=None,index_col=False,names=col_names)
dfvalid = pd.read_csv('../input/nasa-cmaps/CMaps/test_FD001.txt',sep='\s+',header=None,index_col=False,names=col_names)
y_valid = pd.read_csv('../input/nasa-cmaps/CMaps/RUL_FD001.txt',sep='\s+',header=None,index_col=False,names=['RUL'])

In [9]:
predictive_columns = ['s_8', 's_13', 's_4', 's_7', 's_15', 's_21', 's_20', 's_2', 's_17', 's_3', 's_14', 'setting_1', 'setting_2', 'time_cycles', 'unit_number']

In [10]:
def RUL_map(input_df):
    req = input_df[predictive_columns]
    total_life = req[['unit_number', 'time_cycles']].groupby(by = ['unit_number']).agg(total_cycles= ("time_cycles", lambda x: max(x))).reset_index()
    # req['RUL'] 
    req_rul = pd.merge(req, total_life, on = 'unit_number', how= 'left')
    req_rul['RUL'] = req_rul['total_cycles'] - req_rul['time_cycles']
    X = req_rul[predictive_columns].drop(columns = ['unit_number', 'time_cycles'])
    y = req_rul[['RUL']]
    return (X, y) 
# X_train, y_train = RUL_map(dftrain.copy())
# y_train

In [11]:
def fltr_last(input_df):
    return input_df.groupby('unit_number').last().reset_index()

In [12]:
X, y = RUL_map(dftrain.copy())
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

X_OOB_test, _ = RUL_map(fltr_last(dfvalid.copy()))
y_OOB_test = y_valid.copy()

print(X_OOB_test.shape, y_OOB_test.shape)

(100, 13) (100, 1)


In [13]:

models_and_params = {
    'Ridge Regression': {
        'model': Ridge(),
        'params': {
            'ridge__alpha': [0.01, 0.1, 1.0, 10.0]
        }
    },
    'Lasso Regression': {
        'model': Lasso(max_iter=10000),
        'params': {
            'lasso__alpha': [0.01, 0.1, 1.0, 10.0]
        }
    },
    'Huber Regressor': {
        'model': HuberRegressor(),
        'params': {
            'huberregressor__alpha': [0.0001, 0.001, 0.01],
            'huberregressor__epsilon': [1.1, 1.35, 1.5]
        }
    },
    'Random Forest Regressor': {
        'model': RandomForestRegressor(random_state=42),
        'params': {
            'randomforestregressor__n_estimators': [100, 200],
            'randomforestregressor__max_depth': [None, 5, 10]
        }
    }
}


for name, mp in models_and_params.items():
    pipeline = make_pipeline(StandardScaler(), mp['model'])
    grid = GridSearchCV(pipeline, mp['params'], cv=5, scoring='r2', n_jobs=-1)
    grid.fit(X_train, y_train)
    
    best_model = grid.best_estimator_
    y_pred = best_model.predict(X_test)
    r2 = r2_score(y_test, y_pred)

    y_OOB_pred = best_model.predict(X_OOB_test)
    OOB_r2 = r2_score(y_OOB_test, y_OOB_pred)

    print(f"**{name}**")
    print(f"Best Params: {grid.best_params_}")
    print(f"Test R² Score: {r2:.4f}")
    print(f"OOB-Test R² Score: {OOB_r2:.4f}")
    print("-" * 40)

**Ridge Regression**
Best Params: {'ridge__alpha': 10.0}
Test R² Score: 0.5621
OOB-Test R² Score: 0.4347
----------------------------------------
**Lasso Regression**
Best Params: {'lasso__alpha': 0.01}
Test R² Score: 0.5621
OOB-Test R² Score: 0.4347
----------------------------------------


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

**Huber Regressor**
Best Params: {'huberregressor__alpha': 0.0001, 'huberregressor__epsilon': 1.5}
Test R² Score: 0.5574
OOB-Test R² Score: 0.5487
----------------------------------------


  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_step)
  self._final_estimator.fit(Xt, y, **fit_params_last_ste

**Random Forest Regressor**
Best Params: {'randomforestregressor__max_depth': 10, 'randomforestregressor__n_estimators': 200}
Test R² Score: 0.6139
OOB-Test R² Score: 0.4243
----------------------------------------
