Example folder

eriklindernoren · Sep 18, 2017 · d58485e · d58485e
1 parent 9f88013
commit d58485e
Show file tree

Hide file tree

Showing 58 changed files with 1,463 additions and 953 deletions.
diff --git a/README.md b/README.md
@@ -12,9 +12,9 @@ but rather to present the inner workings of them in a transparent way.
   * [Table of Contents](#table-of-contents)
   * [Installation](#installation)
   * [Example Usage](#example-usage)
-    + [Regression](#regression)
-    + [Classification](#classification)
-    + [Clustering](#clustering)
+    + [Polynomial Regression](#polynomial-regression)
+    + [Classification With CNN](#classification-with-cnn)
+    + [Density-Based Clustering](#density-based-clustering)
     + [Generating Handwritten Digits](#generating-handwritten-digits)
     + [Deep Reinforcement Learning](#deep-reinforcement-learning)
     + [Genetic Algorithm](#genetic-algorithm)
@@ -31,8 +31,8 @@ but rather to present the inner workings of them in a transparent way.
     $ python setup.py install
 
 ## Example Usage
-### Regression
-    $ python mlfromscratch/supervised_learning/regression.py
+### Polynomial Regression
+    $ python mlfromscratch/examples/polynomial_regression.py
 
 <p align="center">
     <img src="http://eriklindernoren.se/images/mlfs_prr.png" width="640"\>
@@ -42,8 +42,8 @@ but rather to present the inner workings of them in a transparent way.
     Linköping, Sweden 2016.
 </p>
 
-### Classification
-    $ python mlfromscratch/supervised_learning/neural_network.py
+### Classification With CNN
+    $ python mlfromscratch/examples/cnn.py
 
     +---------+
     | ConvNet |
@@ -80,8 +80,8 @@ but rather to present the inner workings of them in a transparent way.
     Figure: Classification of the digit dataset using CNN.
 </p>
 
-### Clustering
-    $ python mlfromscratch/unsupervised_learning/dbscan.py
+### Density-Based Clustering
+    $ python mlfromscratch/examples/dbscan.py
 
 <p align="center">
     <img src="http://eriklindernoren.se/images/mlfs_dbscan.png" width="640">
@@ -141,7 +141,7 @@ but rather to present the inner workings of them in a transparent way.
 </p>
 
 ### Deep Reinforcement Learning
-    $ python mlfromscratch/reinforcement_learning/deep_q_network.py
+    $ python mlfromscratch/examples/deep_q_network.py
 
     +----------------+
     | Deep Q-Network |
@@ -164,7 +164,7 @@ but rather to present the inner workings of them in a transparent way.
 </p>
 
 ### Genetic Algorithm
-    $ python mlfromscratch/unsupervised_learning/genetic_algorithm.py
+    $ python mlfromscratch/examples/genetic_algorithm.py
 
     +--------+
     |   GA   |
@@ -194,7 +194,7 @@ but rather to present the inner workings of them in a transparent way.
     [294 Answer: 'Genetic Algorithm']
 
 ### Association Analysis
-    $ python mlfromscratch/unsupervised_learning/apriori.py 
+    $ python mlfromscratch/examples/apriori.py 
     +-------------+
     |   Apriori   |
     +-------------+

diff --git a/mlfromscratch/examples/adaboost.py b/mlfromscratch/examples/adaboost.py
@@ -0,0 +1,40 @@
+from __future__ import division, print_function
+import numpy as np
+from sklearn import datasets
+
+# Import helper functions
+from mlfromscratch.supervised_learning import Adaboost
+from mlfromscratch.utils.data_manipulation import train_test_split
+from mlfromscratch.utils.data_operation import accuracy_score
+from mlfromscratch.utils import Plot
+
+def main():
+    data = datasets.load_digits()
+    X = data.data
+    y = data.target
+
+    digit1 = 1
+    digit2 = 8
+    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
+    y = data.target[idx]
+    # Change labels to {-1, 1}
+    y[y == digit1] = -1
+    y[y == digit2] = 1
+    X = data.data[idx]
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
+
+    # Adaboost classification with 5 weak classifiers
+    clf = Adaboost(n_clf=5)
+    clf.fit(X_train, y_train)
+    y_pred = clf.predict(X_test)
+
+    accuracy = accuracy_score(y_test, y_pred)
+    print ("Accuracy:", accuracy)
+
+    # Reduce dimensions to 2d using pca and plot the results
+    Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/examples/apriori.py b/mlfromscratch/examples/apriori.py
@@ -0,0 +1,35 @@
+from __future__ import division, print_function
+import numpy as np
+
+from mlfromscratch.unsupervised_learning import Apriori
+
+def main():
+    # Demo transaction set
+    # Example 2: https://en.wikipedia.org/wiki/Apriori_algorithm
+    transactions = np.array([[1, 2, 3, 4], [1, 2, 4], [1, 2], [2, 3, 4], [2, 3], [3, 4], [2, 4]])
+    print ("+-------------+")
+    print ("|   Apriori   |")
+    print ("+-------------+")
+    min_sup = 0.25
+    min_conf = 0.8
+    print ("Minimum Support: %.2f" % (min_sup))
+    print ("Minimum Confidence: %s" % (min_conf))
+    print ("Transactions:")
+    for transaction in transactions:
+        print ("\t%s" % transaction)
+
+    apriori = Apriori(min_sup=min_sup, min_conf=min_conf)
+
+    # Get and print the frequent itemsets
+    frequent_itemsets = apriori.find_frequent_itemsets(transactions)
+    print ("Frequent Itemsets:\n\t%s" % frequent_itemsets)
+
+    # Get and print the rules
+    rules = apriori.generate_rules(transactions)
+    print ("Rules:")
+    for rule in rules:
+        print ("\t%s -> %s (support: %.2f, confidence: %s)" % (rule.antecedent, rule.concequent, rule.support, rule.confidence,))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/examples/bayesian_regression.py b/mlfromscratch/examples/bayesian_regression.py
@@ -0,0 +1,79 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# Import helper functions
+from mlfromscratch.utils.data_operation import mean_squared_error
+from mlfromscratch.utils.data_manipulation import train_test_split, polynomial_features
+from mlfromscratch.supervised_learning import BayesianRegression
+
+def main():
+
+    # Load temperature data
+    data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")
+
+    time = np.atleast_2d(data["time"].as_matrix()).T
+    temp = np.atleast_2d(data["temp"].as_matrix()).T
+
+    X = time # fraction of the year [0, 1]
+    y = temp
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
+
+    n_samples, n_features = np.shape(X)
+
+    # Prior parameters
+    # - Weights are assumed distr. according to a Normal distribution
+    # - The variance of the weights are assumed distributed according to 
+    #   a scaled inverse chi-squared distribution.
+    # High prior uncertainty!
+    # Normal
+    mu0 = np.array([0] * n_features)
+    omega0 = np.diag([.0001] * n_features)
+    # Scaled inverse chi-squared
+    nu0 = 1
+    sigma_sq0 = 100
+
+    # The credible interval
+    cred_int = 10
+
+    clf = BayesianRegression(n_draws=2000, 
+        poly_degree=4, 
+        mu0=mu0, 
+        omega0=omega0, 
+        nu0=nu0, 
+        sigma_sq0=sigma_sq0,
+        cred_int=cred_int)
+    clf.fit(X_train, y_train)
+    y_pred = clf.predict(X_test)
+
+    mse = mean_squared_error(y_test, y_pred)
+
+    # Get prediction line
+    y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True)
+
+    # Print the mean squared error
+    print ("Mean Squared Error:", mse)
+
+    # Color map
+    cmap = plt.get_cmap('viridis')
+
+    # Plot the results
+    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
+    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
+    p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction")
+    p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int))
+    p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2)
+    plt.axis((0, 366, -20, 25))
+    plt.suptitle("Bayesian Regression")
+    plt.title("MSE: %.2f" % mse, fontsize=10)
+    plt.xlabel('Day')
+    plt.ylabel('Temperature in Celcius')
+    plt.legend(loc='lower right')
+    # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
+    plt.legend(loc='lower right')
+
+    plt.show()
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/examples/cnn.py b/mlfromscratch/examples/cnn.py
@@ -0,0 +1,95 @@
+
+from __future__ import print_function
+from sklearn import datasets
+import matplotlib.pyplot as plt
+import math
+import numpy as np
+
+# Import helper functions
+from mlfromscratch.supervised_learning import NeuralNetwork
+from mlfromscratch.utils.data_manipulation import train_test_split, to_categorical, normalize
+from mlfromscratch.utils.data_manipulation import get_random_subsets, shuffle_data
+from mlfromscratch.utils.data_operation import accuracy_score
+from mlfromscratch.utils.optimizers import GradientDescent, Adam, RMSprop, Adagrad, Adadelta
+from mlfromscratch.utils.loss_functions import CrossEntropy
+from mlfromscratch.utils.misc import bar_widgets
+from mlfromscratch.utils import Plot
+from mlfromscratch.utils.layers import Dense, Dropout, Conv2D, Flatten, Activation, MaxPooling2D
+from mlfromscratch.utils.layers import AveragePooling2D, ZeroPadding2D, BatchNormalization, RNN
+
+
+
+def main():
+
+    #----------
+    # Conv Net
+    #----------
+
+    optimizer = Adam()
+
+    data = datasets.load_digits()
+    X = data.data
+    y = data.target
+
+    # Convert to one-hot encoding
+    y = to_categorical(y.astype("int"))
+
+    n_samples = np.shape(X)
+    n_hidden = 512
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)
+
+    # Reshape X to (n_samples, channels, height, width)
+    X_train = X_train.reshape((-1,1,8,8))
+    X_test = X_test.reshape((-1,1,8,8))
+
+    clf = NeuralNetwork(optimizer=optimizer,
+                        loss=CrossEntropy,
+                        validation_data=(X_test, y_test))
+
+    clf.add(Conv2D(n_filters=16, filter_shape=(3,3), input_shape=(1,8,8), padding='same'))
+    clf.add(Activation('relu'))
+    clf.add(Dropout(0.25))
+    clf.add(BatchNormalization())
+    clf.add(Conv2D(n_filters=32, filter_shape=(3,3), padding='same'))
+    clf.add(Activation('relu'))
+    clf.add(Dropout(0.25))
+    clf.add(BatchNormalization())
+    clf.add(Flatten())
+    clf.add(Dense(256))
+    clf.add(Activation('relu'))
+    clf.add(Dropout(0.5))
+    clf.add(BatchNormalization())
+    clf.add(Dense(10))
+    clf.add(Activation('softmax'))
+
+    print ()
+    clf.summary(name="ConvNet")
+
+    train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256)
+
+    # Training and validation error plot
+    n = len(train_err)
+    training, = plt.plot(range(n), train_err, label="Training Error")
+    validation, = plt.plot(range(n), val_err, label="Validation Error")
+    plt.legend(handles=[training, validation])
+    plt.title("Error Plot")
+    plt.ylabel('Error')
+    plt.xlabel('Iterations')
+    plt.show()
+
+    # Predict labels of the test data
+    y_pred = np.argmax(clf.predict(X_test), axis=1)
+    y_test = np.argmax(y_test, axis=1)
+
+    accuracy = accuracy_score(y_test, y_pred)
+    print ("Accuracy:", accuracy)
+
+    # Flatten data set
+    X_test = X_test.reshape(-1, 8*8)
+
+    # Reduce dimension to 2D using PCA and plot the results
+    Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10))
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/examples/dbscan.py b/mlfromscratch/examples/dbscan.py
@@ -0,0 +1,26 @@
+import sys
+import os
+import math
+import random
+from sklearn import datasets
+import numpy as np
+
+# Import helper functions
+from mlfromscratch.utils import Plot
+from mlfromscratch.unsupervised_learning import DBSCAN
+
+def main():
+    # Load the dataset
+    X, y = datasets.make_moons(n_samples=300, noise=0.1, shuffle=False)
+
+    # Cluster the data using DBSCAN
+    clf = DBSCAN(eps=0.17, min_samples=5)
+    y_pred = clf.predict(X)
+
+    # Project the data onto the 2 primary principal components
+    p = Plot()
+    p.plot_in_2d(X, y_pred, title="DBSCAN")
+    p.plot_in_2d(X, y, title="Actual Clustering")
+
+if __name__ == "__main__":
+    main()
diff --git a/mlfromscratch/examples/decision_tree_classifier.py b/mlfromscratch/examples/decision_tree_classifier.py
@@ -0,0 +1,40 @@
+from __future__ import division, print_function
+import numpy as np
+from sklearn import datasets
+import matplotlib.pyplot as plt
+import sys
+import os
+
+# Import helper functions
+from mlfromscratch.utils.data_manipulation import train_test_split, standardize
+from mlfromscratch.utils.data_operation import accuracy_score
+from mlfromscratch.utils.data_operation import mean_squared_error, calculate_variance
+from mlfromscratch.utils import Plot
+from mlfromscratch.supervised_learning import ClassificationTree
+
+def main():
+
+    print ("-- Classification Tree --")
+
+    data = datasets.load_iris()
+    X = data.data
+    y = data.target
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
+
+    clf = ClassificationTree()
+    clf.fit(X_train, y_train)
+    y_pred = clf.predict(X_test)
+
+    accuracy = accuracy_score(y_test, y_pred)
+
+    print ("Accuracy:", accuracy)
+
+    Plot().plot_in_2d(X_test, y_pred, 
+        title="Decision Tree", 
+        accuracy=accuracy, 
+        legend_labels=data.target_names)
+
+
+if __name__ == "__main__":
+    main()