<a href="https://colab.research.google.com/github/aist2000/ML-public/blob/master/Bus_Differential.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Electric BUS
[Diagram](https://www.webgreenstation.com/bus-differential-element-ge-ur-multilin-settings-gu8012/)

[Compare models](https://ruslanmv.com/blog/The-best-binary-Machine-Learning-Model)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import array as arr
from sklearn import datasets, linear_model, metrics


In [None]:
url="https://raw.githubusercontent.com/aist2000/ML-public/master/dataset.csv"
#url="dataset.csv"
df = pd.read_csv(url, skiprows=0)
display(df.head())
display(df.describe())

In [None]:
#histogram to see distribution of data points
import seaborn as sns
sns.pairplot(data=df, diag_kind='kde', vars=['X1','X2','X3','X4'])
plt.show()

In [None]:
X = df[df.columns[0:4]]
y = df[['Y']]
#X=X.rename(columns={0:'X1',1:'X2', 2:'X3', 3:'X4', 4:'label'})
#y = df[['Status']].rename(columns={'Status':'label'})
display(X.head())
display(y.head())


In [None]:
# scale features - Not needed
from sklearn.preprocessing import RobustScaler, StandardScaler
numeric=['X1','X2','X3','X4']
sc=StandardScaler()
X_scaled=sc.fit_transform(X)
display( "Scaled", X_scaled )

In [None]:
# split dataset into train and test 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
#X_train = X
#y_train = y
display( "Train", X_train.describe() )
display( "Test", X_test.describe() )

In [None]:
#compare models
models = {}

# Logistic Regression
from sklearn.linear_model import LogisticRegression
models['Logistic Regression'] = LogisticRegression()

# Support Vector Machines
from sklearn.svm import LinearSVC
models['Support Vector Machines'] = LinearSVC()

# Decision Trees
from sklearn.tree import DecisionTreeClassifier
models['Decision Trees'] = DecisionTreeClassifier()

# Random Forest
from sklearn.ensemble import RandomForestClassifier
models['Random Forest'] = RandomForestClassifier()

# Naive Bayes
from sklearn.naive_bayes import GaussianNB
models['Naive Bayes'] = GaussianNB()

# K-Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier
models['K-Nearest Neighbor'] = KNeighborsClassifier()

from sklearn.metrics import accuracy_score, precision_score, recall_score

accuracy, precision, recall = {}, {}, {}

for key in models.keys():
    
    # Fit the classifier model
    models[key].fit(X_train, y_train.values.ravel())
    
    # Prediction 
    predictions = models[key].predict(X_test)
    
    # Calculate Accuracy, Precision and Recall Metrics
    accuracy[key] = accuracy_score(predictions, y_test.values.ravel())
    precision[key] = precision_score(predictions, y_test.values.ravel())
    recall[key] = recall_score(predictions, y_test.values.ravel())

In [None]:
df_model = pd.DataFrame(index=models.keys(), columns=['Accuracy', 'Precision', 'Recall'])
df_model['Accuracy'] = accuracy.values()
df_model['Precision'] = precision.values()
df_model['Recall'] = recall.values()

df_model

**Decision Trees**

In [None]:
# predict Sample with Decision Trees
x_t =[[-0.620128,	0.0	,0.199953,	0.130023],[1.870952,	0.0,	0.000548,	-0.730424],[2.0, 0, 2.00, -165] ,[4.5, 0, 0.89, -245]]
model = models['Decision Trees'] 
pred1= model.predict( x_t)
#proba1= model.predict_proba(x_t)
print("For parameters = {}, \n we predict {} ".format(x_t, pred1))

# train accuracy
model_accuracy=model.score(X_train,  y_train.Y)
y_pred= model.predict( X_train)
calc_accuracy =  sum(y_pred == y_train.Y) /  y_train.Y.count()
print("Train Accuracy Count={}; Model Accuracy= {} ; Calculated Accuracy={}".format( y_train.Y.count(), model_accuracy, calc_accuracy) )

# test accuracy
x_t = X_test.head()
y_t = y_test.head()
#print(X_test.head(), y_test.head())
pred1= model.predict( x_t)
#proba1= model.predict_proba(x_t)
print("For parameters = {}{}, \n we predict {} ".format(x_t, y_t, pred1))

model_accuracy=model.score(X_test,  y_test.Y)
y_pred= model.predict( X_test)
calc_accuracy =  sum(y_pred == y_test.Y) /  y_test.Y.count()
print("Test Accuracy Count={}; Model Accuracy= {} ; Calculated Accuracy={}".format( y_test.Y.count(), model_accuracy, calc_accuracy) )


**Scilearn Logistic Regression**

In [None]:
#  Logistic Regression
from sklearn.linear_model import LogisticRegression

# instantiate the model (using the default parameters)
model = LogisticRegression(solver="liblinear", C= 1 )
#model = LogisticRegression(max_iter =400, solver='lbfgs')

# fit the model with data
y_t=y_train.to_numpy().reshape(-1)

model.fit(X_train, y_t)

display(model.coef_, model.intercept_)


In [None]:
# predict Sample with Logistic Regression
#x_t =[[8,0,1.61,-205],[8.0, 0, 1.61, -35],[2.0, 0, 2.00, -165] ,[4.5, 0, 0.89, -245]]

# train accuracy
model_accuracy=model.score(X_train,  y_train)
y_pred= model.predict( X_train)
calc_accuracy =  sum(y_pred == y_train.Y) /  y_train.Y.count()
print("Train  Count={}; Model Accuracy= {} ; Calculated Accuracy={}".format( y_train.Y.count(), model_accuracy, calc_accuracy) )

# test accuracy
x_t = X_test.head()
y_t = y_test.head()
#print(X_test.head(), y_test.head())
pred1= model.predict( x_t)
#proba1= model.predict_proba( np.array(x_t, ndmin=2) )
proba1= model.predict_proba(x_t)
print("For parameters = {} \n{}, \n we predict {} proba={}".format(x_t, y_t, pred1, proba1[:,1]))

model_accuracy=model.score(X_test,  y_test.Y)
y_pred= model.predict( X_test)
calc_accuracy =  sum(y_pred == y_test.Y) /  y_test.Y.count()
print("Test  Count={}; Model Accuracy= {} ; Calculated Accuracy={}".format( y_test.Y.count(), model_accuracy, calc_accuracy) )


**Standard Vector Machine SVM**

In [None]:
# Standard Vector Machine SVM
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
model = make_pipeline(StandardScaler(), SVC(gamma='auto'))

y_t=y_train.to_numpy().reshape(-1)
model.fit(X_train, y_t)


#print(clf.predict([[-0.8, -1]]))

In [None]:
# predict Sample with SVM
x_t =[[-0.620128,	0.0	,0.199953,	0.130023],[1.870952,	0.0,	0.000548,	-0.730424],[2.0, 0, 2.00, -165] ,[4.5, 0, 0.89, -245]]
pred1= model.predict( x_t)
#proba1= model.predict_proba(x_t)
print("For parameters = {}, \n we predict {} ".format(x_t, pred1))

# train accuracy
model_accuracy=model.score(X_train,  y_train.Y)
y_pred= model.predict( X_train)
calc_accuracy =  sum(y_pred == y_train.Y) /  y_train.Y.count()
print("Train Accuracy Count={}; Model Accuracy= {} ; Calculated Accuracy={}".format( y_train.Y.count(), model_accuracy, calc_accuracy) )

# test accuracy
x_t = X_test.head()
y_t = y_test.head()
#print(X_test.head(), y_test.head())
pred1= model.predict( x_t)
#proba1= model.predict_proba(x_t)
print("For parameters = {}{}, \n we predict {} ".format(x_t, y_t, pred1))

model_accuracy=model.score(X_test,  y_test.Y)
y_pred= model.predict( X_test)
calc_accuracy =  sum(y_pred == y_test.Y) /  y_test.Y.count()
print("Test Accuracy Count={}; Model Accuracy= {} ; Calculated Accuracy={}".format( y_test.Y.count(), model_accuracy, calc_accuracy) )
#print(X_test[y_pred != y_test.Y].count())
print(y_test[y_pred != y_test.Y].head(10))
print(y_pred[y_pred != y_test.Y])