Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9f88013
commit d58485e
Showing
58 changed files
with
1,463 additions
and
953 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from __future__ import division, print_function | ||
import numpy as np | ||
from sklearn import datasets | ||
|
||
# Import helper functions | ||
from mlfromscratch.supervised_learning import Adaboost | ||
from mlfromscratch.utils.data_manipulation import train_test_split | ||
from mlfromscratch.utils.data_operation import accuracy_score | ||
from mlfromscratch.utils import Plot | ||
|
||
def main(): | ||
data = datasets.load_digits() | ||
X = data.data | ||
y = data.target | ||
|
||
digit1 = 1 | ||
digit2 = 8 | ||
idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) | ||
y = data.target[idx] | ||
# Change labels to {-1, 1} | ||
y[y == digit1] = -1 | ||
y[y == digit2] = 1 | ||
X = data.data[idx] | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) | ||
|
||
# Adaboost classification with 5 weak classifiers | ||
clf = Adaboost(n_clf=5) | ||
clf.fit(X_train, y_train) | ||
y_pred = clf.predict(X_test) | ||
|
||
accuracy = accuracy_score(y_test, y_pred) | ||
print ("Accuracy:", accuracy) | ||
|
||
# Reduce dimensions to 2d using pca and plot the results | ||
Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from __future__ import division, print_function | ||
import numpy as np | ||
|
||
from mlfromscratch.unsupervised_learning import Apriori | ||
|
||
def main(): | ||
# Demo transaction set | ||
# Example 2: https://en.wikipedia.org/wiki/Apriori_algorithm | ||
transactions = np.array([[1, 2, 3, 4], [1, 2, 4], [1, 2], [2, 3, 4], [2, 3], [3, 4], [2, 4]]) | ||
print ("+-------------+") | ||
print ("| Apriori |") | ||
print ("+-------------+") | ||
min_sup = 0.25 | ||
min_conf = 0.8 | ||
print ("Minimum Support: %.2f" % (min_sup)) | ||
print ("Minimum Confidence: %s" % (min_conf)) | ||
print ("Transactions:") | ||
for transaction in transactions: | ||
print ("\t%s" % transaction) | ||
|
||
apriori = Apriori(min_sup=min_sup, min_conf=min_conf) | ||
|
||
# Get and print the frequent itemsets | ||
frequent_itemsets = apriori.find_frequent_itemsets(transactions) | ||
print ("Frequent Itemsets:\n\t%s" % frequent_itemsets) | ||
|
||
# Get and print the rules | ||
rules = apriori.generate_rules(transactions) | ||
print ("Rules:") | ||
for rule in rules: | ||
print ("\t%s -> %s (support: %.2f, confidence: %s)" % (rule.antecedent, rule.concequent, rule.support, rule.confidence,)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
|
||
# Import helper functions | ||
from mlfromscratch.utils.data_operation import mean_squared_error | ||
from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features | ||
from mlfromscratch.supervised_learning import BayesianRegression | ||
|
||
def main(): | ||
|
||
# Load temperature data | ||
data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t") | ||
|
||
time = np.atleast_2d(data["time"].as_matrix()).T | ||
temp = np.atleast_2d(data["temp"].as_matrix()).T | ||
|
||
X = time # fraction of the year [0, 1] | ||
y = temp | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) | ||
|
||
n_samples, n_features = np.shape(X) | ||
|
||
# Prior parameters | ||
# - Weights are assumed distr. according to a Normal distribution | ||
# - The variance of the weights are assumed distributed according to | ||
# a scaled inverse chi-squared distribution. | ||
# High prior uncertainty! | ||
# Normal | ||
mu0 = np.array([0] * n_features) | ||
omega0 = np.diag([.0001] * n_features) | ||
# Scaled inverse chi-squared | ||
nu0 = 1 | ||
sigma_sq0 = 100 | ||
|
||
# The credible interval | ||
cred_int = 10 | ||
|
||
clf = BayesianRegression(n_draws=2000, | ||
poly_degree=4, | ||
mu0=mu0, | ||
omega0=omega0, | ||
nu0=nu0, | ||
sigma_sq0=sigma_sq0, | ||
cred_int=cred_int) | ||
clf.fit(X_train, y_train) | ||
y_pred = clf.predict(X_test) | ||
|
||
mse = mean_squared_error(y_test, y_pred) | ||
|
||
# Get prediction line | ||
y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True) | ||
|
||
# Print the mean squared error | ||
print ("Mean Squared Error:", mse) | ||
|
||
# Color map | ||
cmap = plt.get_cmap('viridis') | ||
|
||
# Plot the results | ||
m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) | ||
m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) | ||
p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction") | ||
p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int)) | ||
p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2) | ||
plt.axis((0, 366, -20, 25)) | ||
plt.suptitle("Bayesian Regression") | ||
plt.title("MSE: %.2f" % mse, fontsize=10) | ||
plt.xlabel('Day') | ||
plt.ylabel('Temperature in Celcius') | ||
plt.legend(loc='lower right') | ||
# plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') | ||
plt.legend(loc='lower right') | ||
|
||
plt.show() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
|
||
from __future__ import print_function | ||
from sklearn import datasets | ||
import matplotlib.pyplot as plt | ||
import math | ||
import numpy as np | ||
|
||
# Import helper functions | ||
from mlfromscratch.supervised_learning import NeuralNetwork | ||
from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize | ||
from mlfromscratch.utils.data_manipulation import get_random_subsets, shuffle_data | ||
from mlfromscratch.utils.data_operation import accuracy_score | ||
from mlfromscratch.utils.optimizers import GradientDescent, Adam, RMSprop, Adagrad, Adadelta | ||
from mlfromscratch.utils.loss_functions import CrossEntropy | ||
from mlfromscratch.utils.misc import bar_widgets | ||
from mlfromscratch.utils import Plot | ||
from mlfromscratch.utils.layers import Dense, Dropout, Conv2D, Flatten, Activation, MaxPooling2D | ||
from mlfromscratch.utils.layers import AveragePooling2D, ZeroPadding2D, BatchNormalization, RNN | ||
|
||
|
||
|
||
def main(): | ||
|
||
#---------- | ||
# Conv Net | ||
#---------- | ||
|
||
optimizer = Adam() | ||
|
||
data = datasets.load_digits() | ||
X = data.data | ||
y = data.target | ||
|
||
# Convert to one-hot encoding | ||
y = to_categorical(y.astype("int")) | ||
|
||
n_samples = np.shape(X) | ||
n_hidden = 512 | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) | ||
|
||
# Reshape X to (n_samples, channels, height, width) | ||
X_train = X_train.reshape((-1,1,8,8)) | ||
X_test = X_test.reshape((-1,1,8,8)) | ||
|
||
clf = NeuralNetwork(optimizer=optimizer, | ||
loss=CrossEntropy, | ||
validation_data=(X_test, y_test)) | ||
|
||
clf.add(Conv2D(n_filters=16, filter_shape=(3,3), input_shape=(1,8,8), padding='same')) | ||
clf.add(Activation('relu')) | ||
clf.add(Dropout(0.25)) | ||
clf.add(BatchNormalization()) | ||
clf.add(Conv2D(n_filters=32, filter_shape=(3,3), padding='same')) | ||
clf.add(Activation('relu')) | ||
clf.add(Dropout(0.25)) | ||
clf.add(BatchNormalization()) | ||
clf.add(Flatten()) | ||
clf.add(Dense(256)) | ||
clf.add(Activation('relu')) | ||
clf.add(Dropout(0.5)) | ||
clf.add(BatchNormalization()) | ||
clf.add(Dense(10)) | ||
clf.add(Activation('softmax')) | ||
|
||
print () | ||
clf.summary(name="ConvNet") | ||
|
||
train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) | ||
|
||
# Training and validation error plot | ||
n = len(train_err) | ||
training, = plt.plot(range(n), train_err, label="Training Error") | ||
validation, = plt.plot(range(n), val_err, label="Validation Error") | ||
plt.legend(handles=[training, validation]) | ||
plt.title("Error Plot") | ||
plt.ylabel('Error') | ||
plt.xlabel('Iterations') | ||
plt.show() | ||
|
||
# Predict labels of the test data | ||
y_pred = np.argmax(clf.predict(X_test), axis=1) | ||
y_test = np.argmax(y_test, axis=1) | ||
|
||
accuracy = accuracy_score(y_test, y_pred) | ||
print ("Accuracy:", accuracy) | ||
|
||
# Flatten data set | ||
X_test = X_test.reshape(-1, 8*8) | ||
|
||
# Reduce dimension to 2D using PCA and plot the results | ||
Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10)) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import sys | ||
import os | ||
import math | ||
import random | ||
from sklearn import datasets | ||
import numpy as np | ||
|
||
# Import helper functions | ||
from mlfromscratch.utils import Plot | ||
from mlfromscratch.unsupervised_learning import DBSCAN | ||
|
||
def main(): | ||
# Load the dataset | ||
X, y = datasets.make_moons(n_samples=300, noise=0.1, shuffle=False) | ||
|
||
# Cluster the data using DBSCAN | ||
clf = DBSCAN(eps=0.17, min_samples=5) | ||
y_pred = clf.predict(X) | ||
|
||
# Project the data onto the 2 primary principal components | ||
p = Plot() | ||
p.plot_in_2d(X, y_pred, title="DBSCAN") | ||
p.plot_in_2d(X, y, title="Actual Clustering") | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from __future__ import division, print_function | ||
import numpy as np | ||
from sklearn import datasets | ||
import matplotlib.pyplot as plt | ||
import sys | ||
import os | ||
|
||
# Import helper functions | ||
from mlfromscratch.utils.data_manipulation import train_test_split, standardize | ||
from mlfromscratch.utils.data_operation import accuracy_score | ||
from mlfromscratch.utils.data_operation import mean_squared_error, calculate_variance | ||
from mlfromscratch.utils import Plot | ||
from mlfromscratch.supervised_learning import ClassificationTree | ||
|
||
def main(): | ||
|
||
print ("-- Classification Tree --") | ||
|
||
data = datasets.load_iris() | ||
X = data.data | ||
y = data.target | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) | ||
|
||
clf = ClassificationTree() | ||
clf.fit(X_train, y_train) | ||
y_pred = clf.predict(X_test) | ||
|
||
accuracy = accuracy_score(y_test, y_pred) | ||
|
||
print ("Accuracy:", accuracy) | ||
|
||
Plot().plot_in_2d(X_test, y_pred, | ||
title="Decision Tree", | ||
accuracy=accuracy, | ||
legend_labels=data.target_names) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.