In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# Create 500 evenly spaced numbers from 0 to 10
# Showing the last 20 values
x = np.linspace(0, 10, 500)
x[-20:]

In [None]:
# Get the available styles
print(plt.style.available)

In [None]:
# Plot the default matlplotlib style
plt.title('Default MatPlotLib')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.plot(x, x**8)
plt.show()

In [None]:
# Plot the default Baysian Methods for Hackers style
plt.style.use('bmh')
plt.title('Baysian Methods for Hackers')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.plot(x, x**8)
plt.show()

In [None]:
# Plot the GGPlot like style
plt.style.use('ggplot')
plt.title('GGPlot')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.plot(x, x**8)
plt.show()

In [None]:
plt.style.use('grayscale')
plt.title('Grayscale')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.plot(x, x**8)
plt.show()

In [None]:
plt.style.use('fivethirtyeight')
plt.title('Nate Silver fivethirtyeight.com')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.plot(x, x**8)
plt.show()

In [None]:
plt.style.use('dark_background')
plt.title('Dark Background')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.plot(x, x**8)
plt.show()

#### Twin axes


In [None]:
fig, ax1 = plt.subplots()

ax1.plot(x, x**2, lw=2, color="blue")
ax1.set_ylabel(r"area $(m^2)$", fontsize=18, color="blue")
for label in ax1.get_yticklabels():
    label.set_color("blue")
    
ax2 = ax1.twinx()
ax2.plot(x, x**3, lw=2, color="red")
ax2.set_ylabel(r"volume $(m^3)$", fontsize=18, color="red")
for label in ax2.get_yticklabels():
    label.set_color("red")

In [None]:
import matplotlib
alpha = 0.7
phi_ext = 2 * np.pi * 0.5

def flux_qubit_potential(phi_m, phi_p):
    return 2 + alpha - 2 * np.cos(phi_p) * np.cos(phi_m) - alpha * np.cos(phi_ext - 2*phi_p)

In [None]:
phi_m = np.linspace(0, 2*np.pi, 100)
phi_p = np.linspace(0, 2*np.pi, 100)
X,Y = np.meshgrid(phi_p, phi_m)
Z = flux_qubit_potential(X, Y).T

In [None]:
fig, ax = plt.subplots()

p = ax.pcolor(X/(2*np.pi), Y/(2*np.pi), Z, cmap=matplotlib.cm.RdBu, vmin=abs(Z).min(), vmax=abs(Z).max())
cb = fig.colorbar(p, ax=ax)

In [None]:
def fact(n):
    if n <= 0:
        return 1
    return n*fact(n-1)

In [None]:
fact(5)

In [None]:
!pip3 install --proxy="http://uid:pwd@rb-proxy-de.bosch.com:8080/" pydot

In [2]:
%matplotlib inline
import sys
import string
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import feature_extraction
from sklearn import tree
from sklearn.externals.six import StringIO  
from IPython.core.display import Image
import pydot
import pydotplus

from __future__ import print_function

# turn of data table rendering
pd.set_option('display.notebook_repr_html', False)
sns.set_palette(['#00A99D', '#F5CA0C', '#B6129F', '#76620C', '#095C57'])
sys.version

'3.6.7 (default, Oct 22 2018, 11:32:17) \n[GCC 8.2.0]'


What is a Decision Tree?

A decision tree is a structure of questions and answers used to separate data points into classes. We can use supervised machine learning to build such a structure from existing data. Decision trees can be used for classification and regression. In this example we focus on classification.
Classification of Guitar Models

In the example below we try to predict the class of a guitar, based on its features, using a decision tree. There are only two classes of guitar models in this case, 10 Stratocasters (st) and 16 Les Paul (lp) models. Our feature set contains body material, fretboard, number of frets and kind of pickup elements. Note: this is probably highly inaccurate toy-data only created to illustrate a point. You can download the data set from my GitHub repo.

https://raw.githubusercontent.com/remondo/NoteBooks-Unsupervised-Learning/master/data/guitar-model.csv


Feature Extraction

We are confronted with a lot of categorical data, so we need to do some feature extraction first. We use binary one-hot encoding for this.


In [3]:
df = pd.read_csv("guitar-model.txt")

In [4]:
df.head()

  model material fretboard  frets     elements
0    st    alder     maple     21   humbuckers
1    st    alder     maple     21   humbuckers
2    st     lime     maple     22  single coil
3    st     lime     maple     22  single coil
4    st    alder     maple     24  single coil

In [5]:
# Do some feature extracting for
cat_columns = ['material', 'fretboard', 'frets', 'elements']
cat_dict = df[cat_columns].to_dict(orient='records')

vec = feature_extraction.DictVectorizer()
cat_vector = vec.fit_transform(cat_dict).toarray()

df_vector = pd.DataFrame(cat_vector)
vector_columns = vec.get_feature_names()
df_vector.columns = vector_columns
df_vector.index = df.index

df = df.drop(cat_columns, axis=1)
df = df.join(df_vector)
df.head()

  model  elements=humbuckers  elements=single coil  fretboard=ebony  \
0    st                  1.0                   0.0              0.0   
1    st                  1.0                   0.0              0.0   
2    st                  0.0                   1.0              0.0   
3    st                  0.0                   1.0              0.0   
4    st                  0.0                   1.0              0.0   

   fretboard=maple  fretboard=rosewood  frets  material=alder  material=lime  \
0              1.0                 0.0   21.0             1.0            0.0   
1              1.0                 0.0   21.0             1.0            0.0   
2              1.0                 0.0   22.0             0.0            1.0   
3              1.0                 0.0   22.0             0.0            1.0   
4              1.0                 0.0   24.0             1.0            0.0   

   material=mahogany  material=maple  
0                0.0             0.0  
1             

In [6]:
# Assign an ID to the models
df.loc[df.model == 'st','model'] = 0
df.loc[df.model == 'lp','model'] = 1
df.model.value_counts()

1    16
0    10
Name: model, dtype: int64


Building the Decision Tree Classifier

We use Scikit Learn's DecisionTreeClassifier to construct a decision tree. To choose which feature gives the largest information gain at any given point in the tree, we use the entropy criterion. Entropy is a proportional measure of how pure a set of labels is, where 0.0 is perfectly pure and 1.0 is the largets possible mix of labels.


In [16]:
features.columns

Index(['elements=humbuckers', 'elements=single coil', 'fretboard=ebony',
       'fretboard=maple', 'fretboard=rosewood', 'frets', 'material=alder',
       'material=lime', 'material=mahogany', 'material=maple'],
      dtype='object')

In [7]:


# Split the data set in features and labels
features = df.drop(['model'], axis=1)
labels = df.model

test_features = features[-1:]
test_label = labels[-1:]

# Train the decision tree based on the entropy criterion
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(features[:-1], labels[:-1])
clf



DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [22]:
dot_data = tree.export_graphviz(clf)

In [24]:
# Create DOT data
dot_data = tree.export_graphviz(clf, out_file=None)

# Draw graph
graph = pydotplus.graph_from_dot_data(dot_data)  

# Show graph
Image(graph.create_png())

InvocationException: GraphViz's executables not found

In [None]:
# Make a prediction with test data
pred = clf.predict(test_features)
print(features[-1:].T)
print('Predicted class:', pred)
print('Accurate prediction?', pred[0] == test_label.values[0])

In [None]:
!cat "D:/Python Training/newfile.txt"