In [None]:
from sklearn.datasets import load_digits, load_breast_cancer, load_diabetes

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn.decomposition import KernelPCA

# =============================================================================
# DATASETS
# =============================================================================
plt.figure(figsize = (8, 8))
diabetes = load_diabetes()
bc = load_breast_cancer()
digits = load_digits()
images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[10:20]):
    plt.subplot(2, 5, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Target: %i' % label)


# =============================================================================
# CLASSIFICATION
# =============================================================================
f = lambda x: 2 * x - 5

pos = []
neg = []

for i in range(30):
    x = np.random.randint(15)
    y = np.random.randint(15)

    if f(x) < y:
        pos.append([x,y])
    else:
        neg.append([x,y])


plt.figure(figsize = (8, 8))
plt.xticks([])
plt.yticks([])
plt.scatter(*zip(*pos), marker = 'x')
plt.scatter(*zip(*neg), marker = 'o')
plt.plot([0,10],[f(0),f(10)], linestyle='--', color='m')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Classification')

# =============================================================================
# REGRESSION
# =============================================================================

dat = []


for i in range(30):
    x = np.random.uniform(10)
    y = f(x) + np.random.uniform(-2.0,2.0)


    dat.append([x,y])


plt.figure(figsize = (8, 8))
plt.xticks([])
plt.yticks([])
plt.scatter(*zip(*dat))
plt.plot([0,10],[f(0),f(10)], linestyle='--', color='m')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Regression')

# =============================================================================
# CLUSTERING
# =============================================================================

km = KMeans(n_clusters=3)
dat = []

t = 0.5

for i in range(300):


    c = np.random.randint(3)
    a = np.random.uniform() * 2 * 3.14
    r = t * np.sqrt(np.random.uniform())

    x = r * np.cos(a)
    y = r * np.sin(a)


    dat.append([c+x, c+y])


c = km.fit_predict(dat)
plt.figure(figsize = (8, 8))
plt.xticks([])
plt.yticks([])
plt.scatter(*zip(*dat),c=c)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Clustering')

# =============================================================================
# TSNE
# =============================================================================

from sklearn.manifold import TSNE

tsne = TSNE()

dat = tsne.fit_transform(bc.data)
reds = bc.target == 0
blues = bc.target == 1
plt.figure(figsize = (8, 8))
plt.scatter(dat[reds,0], dat[reds,1], label='malignant', marker = '.')
plt.scatter(dat[blues,0], dat[blues,1], label='benign', marker = 'x')
plt.xlabel('1st Component')
plt.ylabel('2nd Component')
plt.title('Breast Cancer Data')
plt.legend()

# =============================================================================
# ROC
# =============================================================================
import numpy as np
from sklearn import metrics
ax1 = plt.subplot()
ax1.margins(0)
np.random.seed(856522)
y = np.random.choice([1,2], 30)
scores = np.random.choice([i/100 for i in range(0,100)], 30)
fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)

x = [i/100 for i in range(0,100)]
y = [i/100 for i in range(0,100)]
plt.figure(figsize = (8, 8))
plt.plot(x, y, linestyle='-.')
plt.plot(fpr, tpr, label='ROC curve')

plt.xlabel('Specificity')
plt.ylabel('Sensitivity')
plt.title('ROC')
plt.legend()

# =============================================================================
# SVM FIGURE
# =============================================================================
f = lambda x: 2 * x - 5
f_upp = lambda x: 2 * x - 5 + 2
f_lower = lambda x: 2 * x - 5 - 2

pos = []
neg = []

np.random.seed(345234)
for i in range(80):
    x = np.random.randint(15)
    y = np.random.randint(15)

    d = np.abs(2*x-y-5)/np.sqrt(2**2+1)
    if f(x) < y and d>=1:
        pos.append([x,y])
    elif f(x) > y and d>=1 :
        neg.append([x,y])

pos.append([4, f_upp(4)])
neg.append([8, f_lower(8)])


plt.figure(figsize = (8, 8))
plt.xticks([])
plt.yticks([])
plt.scatter(*zip(*pos), marker = 'x')
plt.scatter(*zip(*neg), marker = 'o')

plt.plot([0,10],[f(0),f(10)], linestyle='-', color='m')
plt.plot([0,10],[f_upp(0),f_upp(10)], linestyle='-.', color='red')
plt.plot([0,10],[f_lower(0),f_lower(10)], linestyle='-.', color='red')
#plt.plot([4,3],[f_lower(4),f_upp(3)], linestyle='-', color='black')
#plt.plot([7,6],[f_lower(7),f_upp(6)], linestyle='-', color='black')
plt.xlabel('x')
plt.ylabel('y')
plt.title('SVM')
plt.show()