### --------------------------------------------------------
### Overview 
### --------------------------------------------------------
### Objective is to predictive output for pm, stator tooth, stator winding
### and stator yoke.  The key measure is RMSE

In [3]:
import pandas as pd
from pycaret.regression import *
import sklearn
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn import model_selection
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [4]:
# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
test_solutions = pd.read_csv("test_solutions.csv")
test = pd.concat([test, test_solutions],axis=1)

Unnamed: 0,ambient,coolant,u_d,u_q,motor_speed,torque,i_d,i_q,pm,stator_yoke,stator_tooth,stator_winding,profile_id
0,-0.752143,-1.118446,0.327935,-1.297858,-1.222428,-0.250182,1.029572,-0.24586,-2.522071,-1.831422,-2.066143,-2.018033,4
1,-0.771263,-1.117021,0.329665,-1.297686,-1.222429,-0.249133,1.029509,-0.245832,-2.522418,-1.830969,-2.064859,-2.017631,4
2,-0.782892,-1.116681,0.332771,-1.301822,-1.222428,-0.249431,1.029448,-0.245818,-2.522673,-1.8304,-2.064073,-2.017343,4
3,-0.780935,-1.116764,0.3337,-1.301852,-1.22243,-0.248636,1.032845,-0.246955,-2.521639,-1.830333,-2.063137,-2.017632,4
4,-0.774043,-1.116775,0.335206,-1.303118,-1.222429,-0.248701,1.031807,-0.24661,-2.5219,-1.830498,-2.062795,-2.018145,4


In [5]:
main_columns = ['ambient','coolant','u_d','u_q','motor_speed','torque','i_d','i_q']

In [7]:
# rescale all train variables except the target variable
scaler = preprocessing.MinMaxScaler()
df_scale_train = scaler.fit_transform(train[main_columns])
df_scale_train = pd.DataFrame(df_scale_train)
df_scale_train.reset_index(drop=True, inplace=True)

In [8]:
# rescale all test variables except the target variable
scaler = preprocessing.MinMaxScaler()
df_scale_test = scaler.fit_transform(test[main_columns])
df_scale_test = pd.DataFrame(df_scale_test)
df_scale_test.reset_index(drop=True, inplace=True)

In [None]:
## pm	stator_yoke	stator_tooth	stator_winding

In [9]:
# create train models
train['pm'].reset_index(drop=True, inplace=True)
train['stator_yoke'].reset_index(drop=True, inplace=True)
train['stator_tooth'].reset_index(drop=True, inplace=True)
train['stator_winding'].reset_index(drop=True, inplace=True)
# build pm model
train_pm = pd.concat([df_scale_train,train['pm']], axis=1)
# build stator tooth model
train_stator_tooth = pd.concat([df_scale_train,train['stator_tooth']], axis=1)
# build stator yoke model
train_stator_yoke = pd.concat([df_scale_train,train['stator_yoke']], axis=1)
# build stator winding model
train_stator_winding = pd.concat([df_scale_train,train['stator_winding']], axis=1)

In [10]:
# create test models
test['pm'].reset_index(drop=True, inplace=True)
test['stator_yoke'].reset_index(drop=True, inplace=True)
test['stator_tooth'].reset_index(drop=True, inplace=True)
test['stator_winding'].reset_index(drop=True, inplace=True)
# build pm model
test_pm = pd.concat([df_scale_test,test['pm']], axis=1)
# build stator tooth model
test_stator_tooth = pd.concat([df_scale_test,test['stator_tooth']], axis=1)
# build stator yoke model
test_stator_yoke = pd.concat([df_scale_test,test['stator_yoke']], axis=1)
# build stator winding model
test_stator_winding = pd.concat([df_scale_test,test['stator_winding']], axis=1)

In [None]:
# init setup on exp
exp.setup(data, target = 'charges', session_id = 123)

In [None]:
# compare baseline models
best = compare_models()

In [None]:
# predict model on new_data
predictions = predict_model(best, data = new_data)
predictions.head()

In [None]:
# plot residuals
plot_model(best, plot = 'residuals')

In [None]:
# plot error
plot_model(best, plot = 'error')

In [None]:
# plot feature importance
plot_model(best, plot = 'feature')

In [None]:
# save pipeline
save_model(best, 'my_first_pipeline')

In [None]:
lightgbm = create_model('lightgbm')
#gbc = create_model('Gradient Boosting Classifier')
#rf = create_model('Random Forest Classifier')
#et = create_model('Extra Trees Classifier')
#nb = create_model('Naive Bayes')

In [None]:
# RMSE
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
for model, label in zip([model1, model2, model3, model4,model5], 
                        ['Random Forest','Gradient Boosting','Linear Regression','KNN','Ensemble']):
    regressor = model.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    score = np.sqrt(mean_squared_error(y_pred, y_test))
    rmse.append(score)
    Name.append(model.__class__.__name__)
    print("RMSE: %f for model %s" % (score,label))