In [1]:
from mlsolutions import datasets
ds = datasets()

# Supervised Learning

## Distance-based Algorithm

### K-Nearest Neighbor

In [2]:
from mlsolutions import KNearestNeighbor

#### Numeric features

In [3]:
ds.load_knn('numeric')
X, y = ds.data, ds.target
new_example = ds.new_example
ds.overview()

The training set is:
   a1  a2  a3 class
0   1   3   1   yes
1   3   5   2   yes
2   3   2   2    no
3   5   2   3    no

The new example is:
   a1  a2  a3
0   2   4   2


In [4]:
knn = KNearestNeighbor(k=1)
knn.fit(X, y)
prediction = knn.predict(new_example)
print(knn)

The closest nearest neighbor is ex.1. Hence, 1-Nearest Neighbor predicts class = yes.


In [5]:
knn = KNearestNeighbor(k=3)
knn.fit(X, y)
prediction = knn.predict(new_example)
print(knn)

The closest nearest neighbors are ex.1, 0, 2. The majority of class is yes; hence, 3-Nearest Neighbor predicts class = yes.


In [6]:
import pandas as pd
new_examples = pd.DataFrame({'a1': [2, 4], 'a2': [4, 2], 'a3': [2, 1]}, index=[0, 1])

In [7]:
prediction = knn.predict(new_examples)
print(knn)

The closest nearest neighbors are ex.1, 0, 2. The majority of class is yes; hence, 3-Nearest Neighbor predicts class = yes.
The closest nearest neighbors are ex.2, 3, 0. The majority of class is no; hence, 3-Nearest Neighbor predicts class = no.


#### Nominal features

In [8]:
ds.load_knn('nominal')
X, y = ds.data, ds.target
new_example = ds.new_example
ds.overview()

The training set is:
       age  income student credit rating buy iPhone
0     <=30    high      no          fair         no
1     <=30    high      no     excellent         no
2  [31,40]    high      no          fair        yes
3      >40  medium      no          fair        yes
4      >40     low     yes     excellent         no
5  [31,40]     low     yes     excellent        yes
6     <=30  medium      no          fair         no
7  [31,40]  medium      no     excellent        yes
8      >40  medium      no     excellent         no

The new example is:
    age  income student credit rating
0  <=30  medium     yes          fair


In [9]:
knn = KNearestNeighbor(k=1)
knn.fit(X, y)
prediction = knn.predict(new_example)
print(knn)

The closest nearest neighbor is ex.6. Hence, 1-Nearest Neighbor predicts buy iPhone = no.


In [10]:
knn = KNearestNeighbor(k=3)
knn.fit(X, y)
prediction = knn.predict(new_example)
print(knn)

The closest nearest neighbors are ex.6, 0, 3. The majority of buy iPhone is no; hence, 3-Nearest Neighbor predicts buy iPhone = no.


## Rule-based Algorithms

### 1-Rule

In [11]:
from mlsolutions import OneRule

In [12]:
ds.load_onerule()
X, y = ds.data, ds.target
new_example = ds.new_example
ds.overview()

The training set is:
   credit history  debt   deposit   income      risk
0             bad  high      none      low      high
1         unknown  high      none  average      high
2         unknown   low      none  average  moderate
3         unknown   low      none      low      high
4         unknown   low      none     high       low
5         unknown   low  adequate     high       low
6             bad   low      none      low      high
7             bad   low  adequate     high  moderate
8            good   low      none     high       low
9            good  high  adequate     high       low
10           good  high      none      low      high
11           good  high      none  average  moderate
12           good  high      none     high       low
13            bad  high      none  average      high

The new example is:
  credit history debt deposit   income
0        unknown  low    none  average


In [13]:
oneR = OneRule()
oneR.fit(X, y)
oneR.generate()
prediction = oneR.predict(new_example)
print(oneR)

The rule based on income has the minimum number of errors, whose error rate is 0.214. Hence, 1R produces the following rule: 
if income = low then risk = high; else if income = average then risk = high; else if income = high then risk = low.

The new example 0 has income = average and hence will be classified as risk = high.


### PRISM

In [14]:
from mlsolutions import PRISM

In [15]:
ds.load_prism()
X, y = ds.data, ds.target
ds.overview()

The training set is:
     outlook temperature humidity  windy play
0      sunny         hot     high  False   no
1      sunny         hot     high   True   no
2   overcast         hot     high  False  yes
3      rainy        mild     high  False  yes
4      rainy        cool   normal  False  yes
5      rainy        cool   normal   True   no
6   overcast        cool   normal   True  yes
7      sunny        cool     high  False   no
8      sunny        mild   normal  False  yes
9      rainy        cool   normal  False  yes
10     sunny        mild   normal   True  yes
11  overcast        mild     high   True  yes
12  overcast         hot   normal  False  yes
13     rainy        mild     high   True   no


In [16]:
prism = PRISM()
prism.fit(X, y)
prism.generate('no')
print(prism)

For play = no, PRISM generates the following rule(s):
if outlook = sunny & humidity = high then play = no;
if outlook = rainy & windy = True then play = no.


In [17]:
prism = PRISM()
prism.fit(X, y)
prism.generate('yes')
print(prism)

For play = yes, PRISM generates the following rule(s):
if outlook = overcast then play = yes;
if humidity = normal & windy = False then play = yes;
if temperature = mild & outlook = sunny then play = yes;
if temperature = mild & windy = False then play = yes.


## Probabilistic method

### Naive Bayes

$P(H|E) = \frac{P(E|H)P(H)}{P(E)}$

In [18]:
from mlsolutions import NaiveBayes

#### Nominal features

In [19]:
ds.load_naivebayes('No.1')
X, y = ds.data, ds.target
new_example = ds.new_example
ds.overview()

The training set is:
  home owner marital status     income loan default
0        yes         single  very high          yes
1         no        married       high          yes
2         no         single     medium           no
3        yes        married  very high           no
4        yes       divorced       high          yes
5         no        married        low           no
6        yes       divorced  very high           no
7         no         single       high          yes
8         no        married     medium           no
9         no         single        low          yes

The new example is:
  home owner marital status     income
0         no        married  very high


In [20]:
nb = NaiveBayes()
nb.fit(X, y)
prediction = nb.predict(new_example)
print(nb)

Naive Bayes predicts loan default = no.


In [21]:
ds.load_naivebayes('No.3')
X, y = ds.data, ds.target
new_example = ds.new_example
ds.overview()

The training set is:
  location weather companion expensive holiday
0     nice   sunny  annoying         Y    good
1     nice   sunny  annoying         N     bad
2   boring   rainy     great         Y    good
3   boring   sunny     great         Y     bad
4     nice   rainy     great         Y    good
5   boring   rainy  annoying         N    good
6   boring   rainy     great         N    good

The new example is:
  location weather companion expensive
0   boring   sunny  annoying         Y


In [22]:
nb = NaiveBayes()
nb.fit(X, y)
prediction = nb.predict(new_example)
print(nb)

Naive Bayes predicts holiday = bad.


#### Numeric features

In [23]:
ds.load_naivebayes('No.2')
X, y = ds.data, ds.target
new_example = ds.new_example
ds.overview()

The training set is:
  home owner marital status  income loan default
0        yes         single     125          yes
1         no        married     100          yes
2         no         single      70           no
3        yes        married     120           no
4        yes       divorced      95          yes
5         no        married      60           no
6        yes       divorced     220           no
7         no         single      85          yes
8         no        married      75           no
9         no         single      90          yes

The new example is:
  home owner marital status income
0         no        married    120


In [24]:
nb = NaiveBayes()
nb.fit(X, y)
prediction = nb.predict(new_example)
print(nb)

Naive Bayes predicts loan default = no.


## Trees

### Decision Tree - Root selection

In [25]:
from mlsolutions import DecisionTreeRootSelection

In [26]:
ds.load_decisiontree('No.1')
X, y = ds.data, ds.target
ds.overview()

The training set is:
      shape color class
0    circle  blue     +
1    circle  blue     +
2    square  blue     -
3  triangle  blue     -
4    square   red     +
5    square  blue     -
6    square   red     +
7    circle   red     +


In [27]:
root = DecisionTreeRootSelection()
root.fit(X, y)
print(root)

As shape has the highest information gain, it will be selected as the root of the tree.


In [28]:
ds.load_decisiontree('No.2')
X, y = ds.data, ds.target
ds.overview()

The training set is:
  location weather expensive holiday
0     nice   sunny         Y    good
1     nice   sunny         N     bad
2   boring   rainy         Y    good
3   boring   sunny         N     bad
4     nice   rainy         Y    good
5   boring   rainy         N    good
6   boring   rainy         N    good


In [29]:
root = DecisionTreeRootSelection()
root.fit(X, y)
print(root)

As weather has the highest information gain, it will be selected as the root of the tree.


## Neural network

### Perceptron

In [30]:
from mlsolutions import Perceptron

In [31]:
ds.load_perceptron()
X, y = ds.data, ds.target
ds.overview()

The training set is:
   a1  a2  a3  class
0   1   0   0      1
1   0   1   1      0
2   1   1   0      1
3   1   1   1      0
4   0   0   1      0


In [32]:
n_in = len(X.columns)
perceptron = Perceptron(n_in)
perceptron.fit(X, y)
print(perceptron)

After 1 epoch(s), weight vector and bias: w = [0.0, -1.0, -2.0], b = -1.


# Unsupervised Learning

## Clustering

### K-means clustering

In [33]:
from mlsolutions import Kmeans

In [34]:
ds.load_kmeans('No.1')
matrix = ds.dataset
ds.overview()

The distance matrix is:
    A  B  C   D  E
A   0  2  7  10  1
B   2  0  3   4  6
C   7  3  0   5  9
D  10  4  5   0  8
E   1  6  9   8  0


In [35]:
centroids = ['A', 'B']
kmeans = Kmeans(centroids)
kmeans.fit(matrix)
print(kmeans)

After the first epoch, the clusters are: {'E', 'A'}, {'C', 'D', 'B'}.


In [36]:
ds.load_kmeans('No.2')
matrix = ds.dataset
ds.overview()

The distance matrix is:
    A   B   C  D   E   F   G
A   0  10   2  1  12   5   4
B  10   0   4  3   6  23   7
C   2   4   0  5   9  14  19
D   1   3   5  0   1   7   4
E  12   6   9  1   0   2  18
F   5  23  14  7   2   0   6
G   4   7  19  4  18   6   0


In [37]:
centroids = ['A', 'B']
kmeans = Kmeans(centroids)
kmeans.fit(matrix)
print(kmeans)

After the first epoch, the clusters are: {'C', 'F', 'A', 'D', 'G'}, {'E', 'B'}.


### Hierarchical clustering

In [38]:
from mlsolutions import HierarchicalClustering

In [39]:
ds.load_hierarchical()
matrix = ds.dataset
ds.overview()

The distance matrix is:
    A  B  C   D  E
A   0  2  7  10  1
B   2  0  3   4  6
C   7  3  0   5  9
D  10  4  5   0  8
E   1  6  9   8  0


In [40]:
hc = HierarchicalClustering()
hc.fit(matrix)
print(hc)

After step 1, the latest merged cluster is: {'E', 'A'};
After step 2, the latest merged cluster is: {'E', 'B', 'A'};
After step 3, the latest merged cluster is: {'C', 'E', 'B', 'A'};
After step 4, the latest merged cluster is: {'C', 'B', 'A', 'D', 'E'}.


### Density-Based Spatial Clustering of Applications with Noise(DBSCAN)

In [41]:
from mlsolutions import DBSCAN

In [42]:
ds.load_dbscan('No.1')
matrix = ds.dataset
ds.overview()

The distance matrix is:
   A  B  C  D  E
A  0  1  4  5  6
B  1  0  2  6  7
C  4  2  0  3  4
D  5  6  3  0  1
E  6  7  4  1  0


In [43]:
dbscan = DBSCAN(2, 3)
dbscan.fit(matrix)
print(dbscan)

Final clustering: K1 = {'C', 'B', 'A'}.


In [44]:
dbscan = DBSCAN(1, 2)
dbscan.fit(matrix)
print(dbscan)

Final clustering: K1 = {'B', 'A'}, K2 = {'D', 'E'}.


In [45]:
ds.load_dbscan('No.2')
matrix = ds.dataset
ds.overview()

The distance matrix is:
     A1   A2   A3   A4   A5   A6   A7   A8
A1  0.0  5.0  6.0  3.6  7.0  7.2  8.0  2.2
A2  5.0  0.0  6.1  4.2  5.0  4.1  3.2  4.5
A3  6.0  6.1  0.0  5.0  1.5  1.5  7.5  6.5
A4  3.6  4.2  5.0  0.0  3.6  4.1  7.2  1.5
A5  7.0  5.0  1.5  3.6  0.0  1.4  6.7  5.0
A6  7.2  4.1  1.5  4.1  1.4  0.0  5.4  5.5
A7  8.0  3.2  7.5  7.2  6.7  5.4  0.0  7.5
A8  2.2  4.5  6.5  1.5  5.0  5.5  7.5  0.0


In [46]:
dbscan = DBSCAN(2, 2)
dbscan.fit(matrix)
print(dbscan)

Final clustering: K1 = {'A3', 'A5', 'A6'}, K2 = {'A4', 'A8'}.


## Evaluating clustering quality

In [47]:
from mlsolutions import ClusteringEvaluator

### Using the silhouette coefficent

In [48]:
ds.load_cluster_evaluate(method='sihouette_coefficient')
distance_matrix = ds.dataset
cluster_labels = ds.cluster_labels
ds.overview()

The distance matrix is:
      P1   P2    P3    P4
P1  0.00  0.1  0.65  0.55
P2  0.10  0.0  0.70  0.60
P3  0.65  0.7  0.00  0.30
P4  0.55  0.6  0.30  0.00

The cluster labels are:
points
P1    1
P2    1
P3    2
P4    2
Name: cluster label, dtype: int64


In [49]:
ce = ClusteringEvaluator(method='sihouette_coefficient')
ce.fit(cluster_labels, distance_matrix)
print(ce)

The evaluation results of the clustering quality using sihouette coefficient is 0.678.


### Using correlation

In [50]:
# Re-evaluate the previous clustering results using correlation
# In this case, 
# the similarity matrix will be directly computed from the distance matrix.
ce = ClusteringEvaluator(method='correlation')
ce.fit(cluster_labels, distance_matrix=distance_matrix)
print(ce)

The evaluation results of the clustering quality using correlation is 0.947.


In [51]:
# 
ds.load_cluster_evaluate(method='correlation')
similarity_matrix_distance = ds.dataset
ds.overview()

The similarity matrix is:
      P1    P2    P3    P4
P1  1.00  1.00  0.08  0.25
P2  1.00  1.00  0.00  0.17
P3  0.08  0.00  1.00  0.67
P4  0.25  0.17  0.67  1.00


In [52]:
ce = ClusteringEvaluator(method='correlation')
ce.fit(cluster_labels, similarity_matrix_distance=similarity_matrix_distance)
print(ce)

The evaluation results of the clustering quality using correlation is 0.963.


## Markov models

### Markov chain

In [53]:
from mlsolutions import MarkovChain

In [54]:
ds.load_markov()
matrix = ds.dataset
ds.overview()

The transition matrix is:
       sunny  rainy  foggy
sunny    0.8   0.05   0.15
rainy    0.2   0.60   0.20
foggy    0.2   0.30   0.50


In [55]:
# The state sequence must have a length of four in our implementation.
state_sequence = ['rainy', 'foggy', 'sunny', None]
mc = MarkovChain(matrix)
mc.fit(state_sequence)
mc.next_state()
print(mc)

Given that today is foggy, the probability that it will be sunny tomorrow is 0.20.


In [56]:
state_sequence = [None, 'sunny', 'sunny', 'rainy']
mc = MarkovChain(matrix)
mc.fit(state_sequence)
mc.state_after_next_state()
print(mc)

Given that today is sunny, the probability that it will be sunny tomorrow and rainy the day after tomorrow is 0.04.


### Hidden Markov models

In [57]:
from mlsolutions import HiddenMarkovModel

In [58]:
ds.load_hidden_markov('No.1')
# Initial probabilities of state A_0
initial_probability = ds.initial_probability
# Transition probability matrix A
transition = ds.transition
# Emission probability matrix E
emission = ds.emission
ds.overview()

The initial probabilities are:
Sunny    0.4
Rainy    0.3
Foggy    0.3

The transition matrix is:
       Sunny  Rainy  Foggy
Sunny    0.8   0.05   0.15
Rainy    0.2   0.60   0.20
Foggy    0.2   0.30   0.50

The emission matrix is:
       Umbrella  No Umbrella
Sunny       0.1          0.9
Rainy       0.8          0.2
Foggy       0.3          0.7


In [59]:
observations = ['No Umbrella', 'Umbrella']
hmm = HiddenMarkovModel(initial_probability, transition, emission)
hmm.fit(observations)
print(hmm)

Conclusion: a) The probability of the observation sequence No Umbrella, Umbrella is 0.179, b) The most likely sequence of hidden states is Foggy, Rainy.


In [60]:
ds.load_hidden_markov('No.2')
# Initial probabilities of state A_0
initial_probability = ds.initial_probability
# Transition probability matrix A
transition = ds.transition
# Emission probability matrix E
emission = ds.emission
ds.overview()

The initial probabilities are:
Sunny     0.5
Cloudy    0.5

The transition matrix is:
        Sunny  Cloudy
Sunny     0.6     0.4
Cloudy    0.5     0.5

The emission matrix is:
        Dress  Scarf  Blazer
Sunny     0.6    0.3     0.1
Cloudy    0.2    0.3     0.5


In [61]:
observations = ['Dress', 'Blazer']
hmm = HiddenMarkovModel(initial_probability, transition, emission)
hmm.fit(observations)
print(hmm)

Conclusion: a) The probability of the observation sequence Dress, Blazer is 0.108, b) The most likely sequence of hidden states is Sunny, Cloudy.


In [62]:
observations = ['Dress', 'Blazer', 'Scarf']
hmm = HiddenMarkovModel(initial_probability, transition, emission)
hmm.fit(observations)
print(hmm)

Conclusion: a) The probability of the observation sequence Dress, Blazer, Scarf is 0.032, b) The most likely sequence of hidden states is Sunny, Cloudy, Sunny.
