# Cross-validation / Splitting Datasets

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=20)
     # random_state can be any number - determines the algorithm for random number generation

-----
# Scaling Data

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(dataset.drop('TARGET CLASS', axis=1))

-----
# Evaluation Metrics

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix

print(confusion_matrix( y_test, predictions ))

In [None]:
# Classification Report
from sklearn.metrics import classification_report

print(classification_report( y_test, y_predict ))

In [None]:
# Accuracy Score
from sklearn.metrics import accuracy_score
print(clf.__class__.__name__, accuracy_score(y_test, y_predict))

# Models

-----
### Linear Regression

-----
### Support Vector Machines

#### Linear Classification

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge"))
    ])

svm_clf.fit(X_train, y_train)
y_predict = svm_clf.predict(X_train)

##### Non-linear Classification

In [None]:
# Polynomial Kernel
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

svm_poly = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=2, coef0=1, C=5))
  ])

svm_poly.fit(np.transpose([x,y_train]),s)
s_poly = svm_poly.predict(np.transpose([x,y_test]))

In [None]:
# Gaussian Kernel
svm_gauss = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="rbf", gamma=0.1, C=0.1))
  ])

svm_gauss.fit(np.transpose([x,y_train]),s)
s_gauss = svm_gauss.predict(np.transpose([x,y_test]))

#### Linear Regression

In [None]:
from sklearn.svm import LinearSVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

svm_reg = Pipeline([
    ("scaler", StandardScaler()),
    ("lsvr", LinearSVR(random_state=3, epsilon=1.5))
    ])

svm_reg.fit(np.transpose([x]),y)
reg_coefficient = svm_reg.named_steps['lsvr'].coef_
reg_intercept = svm_reg.named_steps['lsvr'].intercept_

#### Non-linear Regression

In [None]:
from sklearn.svm import LinearSVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Choose your kernel
svm_reg = Pipeline([
    ("scaler", StandardScaler()),
    ("lsvr", SVR(kernel="poly", degree=2, C=random_state=3, epsilon=1.5))
    ])

svm_reg.fit(np.transpose([x]),y)
reg_coefficient = svm_reg.named_steps['lsvr'].coef_
reg_intercept = svm_reg.named_steps['lsvr'].intercept_

-----
### K-Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)
   # k is determined by the n_neighbors parameter
knn.fit(X_train, y_train)
knn_predictions = knn.predict(X_test)

-----
### Decision Trees
#### Decision Tree Classification

In [None]:
from sklearn.tree import DecisionTreeClassifier

dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)
dtree_predictions = dtree.predict(X_test)

In [None]:
# Visualization of Decision Trees
# Need to install Pydot and Graphviz
  # Graphviz not in pip library - need to install separately
  # www.graphviz.org

from IPython.display import Image  
from sklearn.externals.six import StringIO  
from sklearn.tree import export_graphviz
import pydot 

features = list(df.columns[1:])

dot_data = StringIO()  
export_graphviz(dtree, out_file=dot_data,feature_names=features,filled=True,rounded=True)

graph = pydot.graph_from_dot_data(dot_data.getvalue())  
Image(graph[0].create_png())  

#### Decision Tree Regression

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier()
tree_clf.fit(X_train,y_train)

tree_predict = tree_clf.predict(X_test)

-----
### Ensemble Learning
#### Voting Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')
voting_clf.fit(X_train, y_train)

#### Random Forests

In [None]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train,y_train)
rfc_predictions = rfc.predict(X_test)

#### AdaBoost

In [None]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(n_estimators=200, learning_rate=0.5)
ada_clf.fit(X_train, y_train)
y_precidt = ada_clf.predict(X_test)

#### Gradient Boost

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbr = GradientBoostingRegressor(max_depth=2, n_estimators=5, learning_rate=0.6)
gbr.fit(X_train,y_train)
y_predict = gbr.predict(X_test)

-----
## Unsupervised models
### K-Means

### DBSCAN