In [None]:
import numpy as np
import matplotlib.pyplot as mplt
import plotly.express as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.mixture import GaussianMixture
from sklearn.datasets import make_blobs
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.datasets import load_digits
from sklearn import metrics

In [None]:
#Linear Regression
rng = np.random.RandomState(42)
x = 10 * rng.rand(100)
y = 2 * x - 1 + rng.rand(100)

from sklearn.linear_model import LinearRegression

model = LinearRegression(fit_intercept=True)

X = x[:, np.newaxis]
X.shape

model.fit(X,y)

xfit = np.linspace(-1, 11)
Xfit = xfit[:, np.newaxis]
yfit = model.predict(Xfit)

In [None]:
trace1 = go.Scatter(
    x=x,
    y=y,
    mode='markers'
)
trace2 = go.Scatter(
    x=xfit,
    y=yfit,
    mode='lines'
)
data = [trace1, trace2]
fig = go.Figure(data=data)
fig.show()

In [None]:
iris = sns.load_dataset('iris')
iris.head()

In [None]:
sns.pairplot(iris, hue='species', height=1.5)

In [None]:
X_iris = iris.drop('species', axis=1)
y_iris = iris['species']


Xtrain, Xtest, ytrain, ytest = train_test_split(X_iris, y_iris, random_state=1)

model = GaussianNB()
model.fit(Xtrain, ytrain)
y_model = model.predict(Xtest)

accuracy_score(ytest, y_model)

In [None]:
#Principal Component Analysis
from sklearn.decomposition import PCA

model = PCA(n_components=2)
model.fit(X_iris)
X_2D = model.transform(X_iris)

iris['PCA1'] = X_2D[:, 0]
iris['PCA2'] = X_2D[:, 1]
sns.lmplot(x="PCA1", y="PCA2", hue='species', data=iris, fit_reg=False)

In [None]:
# unsupervised learning: Iris Clustering
model = GaussianMixture(n_components=3, covariance_type='full')
model.fit(X_iris)
y_ggm = model.predict(X_iris)

iris['cluster'] = y_ggm
sns.lmplot(x="PCA1", y="PCA2", data=iris, hue='species', col='cluster', fit_reg=False)

In [None]:
X, y = make_blobs(n_samples=300, centers=4, random_state=0, cluster_std=1.0)
mplt.scatter(X[:,0], X[:, 1], c=y, s=50, cmap='rainbow')

In [None]:
tree = DecisionTreeClassifier().fit(X,y)

In [None]:
def visualize_classifier(model, X, y, ax=None, cmap='rainbow'):
    ax = ax or mplt.gca()
    ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cmap, clim=(y.min(), y.max()), zorder=3)
    ax.axis('tight')
    ax.axis('off')
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    model.fit(X, y)
    xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
                        np.linspace(*ylim, num=200))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
    
    n_classes = len(np.unique(y))
    contours = ax.contourf(xx, yy, Z, alpha=0.3,
                          levels=np.arange(n_classes+1) - 0.5,
                          cmap=cmap, #clim=(y.min(), y.max()),
                          zorder=1)
    ax.set(xlim=xlim, ylim=ylim)

In [None]:
visualize_classifier(DecisionTreeClassifier(), X, y)

In [None]:
bag = BaggingClassifier(tree, n_estimators=100, max_samples=0.8, random_state=1)
bag.fit(X, y)
visualize_classifier(bag, X, y)

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=0)
visualize_classifier(model, X, y)

In [None]:
# Random Forest Regression
x = 10 * rng.rand(200)

In [None]:
def model(x, sigma=0.3):
    fast_oscillation = np.sin(5 * x)
    slow_oscillation = np.sin(0.5 * x)
    noise = sigma * rng.randn(len(x))
    
    return slow_oscillation + fast_oscillation + noise

In [None]:
y = model(x)
mplt.errorbar(x, y, 0.3, fmt='o')

In [None]:
forest = RandomForestRegressor(200)
forest.fit(x[:, None], y)

xfit = np.linspace(0, 10, 1000)
yfit = forest.predict(xfit[:, None])
ytrue = model(xfit, sigma=0)

mplt.errorbar(x, y, 0.3, fmt='o', alpha=0.5)
mplt.plot(xfit, yfit, '-r')
mplt.plot(xfit, ytrue, 'k', alpha=0.5)

In [None]:
digits = load_digits()
digits.keys()

In [None]:
fig = mplt.figure()
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

for i in range(64):
    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
    ax.imshow(digits.images[i], cmap=mplt.cm.binary, interpolation='nearest')
    ax.text(0, 7, str(digits.target[i]))

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(digits.data, digits.target, random_state=0)

model = RandomForestClassifier(n_estimators=1000)
model.fit(Xtrain, ytrain)
ypred = model.predict(Xtest)

print(metrics.classification_report(ypred, ytest))

In [None]:
mat = confusion_matrix(ytest, ypred)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False)
mplt.xlabel('true label')
mplt.ylabel('predicted label')