In [None]:
# Clustering K-Means
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

df = pd.read_csv('penguins_size.csv', usecols = ['culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm'])
df = df.dropna()
dfs = StandardScaler().fit_transform(df)

sse = []
for i in range(1,11):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(dfs)
    sse.append(kmeans.inertia_)
    
plt.plot(range(1,11), sse, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('SSE')
plt.show()

pred = KMeans(n_clusters=3).fit_predict(dfs)
plt.scatter(df.iloc[:,0], df.iloc[:,2], c=pred, s=5)
plt.title('K-Means Clustering')
plt.xlabel('Culmen Length (mm)')
plt.ylabel('Flipper Length (mm)')
plt.show()

In [None]:
# Decision Tree
from sklearn import datasets 
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

X, y = datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, train_size = 0.6, random_state = 42)

clf = tree.DecisionTreeClassifier(criterion = 'entropy', min_samples_split = 6)
clf = clf.fit(X_train, y_train)

predC = clf.predict(X_test)

print('The accuracy of the classifier is', accuracy_score(y_test, predC))

plt.figure(figsize=(20, 20)) 
_ = tree.plot_tree(clf, filled=True, fontsize = 12)

trainAccuracy = []
testAccuracy = []

depthOptions = range(1, 16)
for depth in depthOptions:
    cltree = tree.DecisionTreeClassifier(criterion = 'entropy', min_samples_split = 6)
    cltree = cltree.fit(X_train, y_train)
    y_predTrain = cltree.predict(X_train)
    y_predTest = cltree.predict(X_test)
    trainAccuracy.append(accuracy_score(y_train, y_predTrain))
    testAccuracy.append(accuracy_score(y_test, y_predTest))
    
plt.figure(figsize=(10, 10)) 
plt.plot(depthOptions, trainAccuracy, marker='o', color='blue')
plt.plot(depthOptions, testAccuracy, marker='*', color='orange')
plt.legend(['Training Accuracy','Test Accuracy'])
plt.xlabel('Tree Depth')
plt.ylabel('Classifier Accuracy')

parameters = {'max_depth': depthOptions}
clf_gs = GridSearchCV(tree.DecisionTreeClassifier(criterion = 'entropy', min_samples_split = 6), parameters)
clf_gs.fit(X_train, y_train) 
tree_model = clf_gs.best_estimator_
print("The maximum depth of the tree should be", clf_gs.best_params_)

plt.figure(figsize=(15, 15)) 
_ = tree.plot_tree(tree_model, filled=True, fontsize = 12)

In [None]:
# Regression
import pandas
from sklearn import linear_model
import matplotlib.pyplot as plt

# Basic Linear Regression
data = pandas.read_csv('data.csv', header = None, names=['X', 'y'])
X = data['X'].values.reshape(-1,1)
y = data['y']
plt.scatter(X, y)

reg = linear_model.LinearRegression()
reg.fit(X, y)

fig = plt.figure()
y_pred = reg.predict(X)
plt.scatter(X, y, c='b')
plt.plot(X, y_pred, 'r')
fig.canvas.draw()

# Logistic Regression
data = pandas.read_csv('data.csv', header = None, names=['Score1', 'Score2', 'y'])
X = data[['Score1', 'Score2']]
y = data['y']
m = ['o', 'x']
c = ['hotpink', '#88c999']
fig = plt.figure()
for i in range(len(data)):
    plt.scatter(data['Score1'][i], data['Score2'][i], marker=m[data['y'][i]], color = c[data['y'][i]])
fig.canvas.draw()

regS = linear_model.LogisticRegression()
regS.fit(X, y)

y_pred = regS.predict(X)
m = ['o', 'x']
c = ['red', 'blue']
for i in range(len(data)):
    plt.scatter(data['Score1'][i], data['Score2'][i], marker=m[y_pred[i]], color = c[y_pred[i]])
fig.canvas.draw()

In [None]:
# Neural networks
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

plt.figure(figsize=(5,2))
for i in range(10):
    plt.subplot(5, 2, i + 1)
    plt.imshow(x_train[i], cmap='gray')
    plt.axis('off')  
plt.show()

x_train = x_train / 255
x_test = x_test / 255

model = keras.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation='relu'))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.summary()
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd')
model.fit(x_train, y_train, epochs=20)

plt.close('all')
y_pred = model.predict(x_test)
plt.figure(figsize=(5,2))
for i in range(10):
    plt.subplot(5, 2, i+1)
    plt.title('Predicted label: ' + str(np.argmax(y_pred[i])))
    plt.imshow(x_test[i], cmap='gray')
    plt.axis('off')
plt.show()