In [206]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import pandas as pd
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


In [207]:
def load_data():
    csv_path = "D:/development/python/2022_mL_scikit/2022_march23_data_for_MLb.csv"
    return pd.read_csv(csv_path)

In [208]:
dataset=load_data()
shapeofdataset =dataset.shape
print (shapeofdataset)

(7248, 116)


In [209]:
# new_columns = ['Shape','Size','L1','L2','L3','L4','L5','Date','Period']
# dataset.columns = new_columns
dataset.head()

Unnamed: 0,shape,abnormal,drop,hexangonal,kite,lozenge,octagonal,pointedoval,quatrofoil,rectangular,...,lambandstaff,lionsleeping,standingliturgicalapparel,manfightinganimal,bowandarrow,spearandpennon,seatedman,date_origin,timegroup_c_range,pk_timegroup_c
0,Abnormal shape,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1570,1550-1574,17
1,Abnormal shape,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1478,1475-1499,14
2,Abnormal shape,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1504,1500-1524,15
3,Abnormal shape,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1522,1500-1524,15
4,Abnormal shape,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1304,1300-1324,7


In [210]:
X = dataset.iloc[:,1:113] #limiting to shape and size and classes
y = dataset.iloc[:,114:115] #PeriodNum will be the target

In [211]:
print (y)

     timegroup_c_range
0            1550-1574
1            1475-1499
2            1500-1524
3            1500-1524
4            1300-1324
...                ...
7243         1425-1449
7244         1400-1424
7245         1400-1424
7246         1500-1524
7247         1400-1424

[7248 rows x 1 columns]


In [212]:
print(X)

      abnormal  drop  hexangonal  kite  lozenge  octagonal  pointedoval  \
0            1     0           0     0        0          0            0   
1            1     0           0     0        0          0            0   
2            1     0           0     0        0          0            0   
3            1     0           0     0        0          0            0   
4            1     0           0     0        0          0            0   
...        ...   ...         ...   ...      ...        ...          ...   
7243         0     0           0     0        0          0            0   
7244         0     0           0     0        0          0            0   
7245         0     0           0     0        0          0            0   
7246         0     0           0     0        0          0            0   
7247         0     0           0     0        0          0            0   

      quatrofoil  rectangular  round  ...  hammer  standingwomanholdingchild  \
0              0   

In [213]:
data_top = X.columns.values
print(data_top)

['abnormal' 'drop' 'hexangonal' 'kite' 'lozenge' 'octagonal' 'pointedoval'
 'quatrofoil' 'rectangular' 'round' 'roundedoval' 'scutiform' 'square'
 'trianglepointedup' 'undeterminedshape' 'size_area' 'animal' 'human'
 'objects' 'device' 'undetermined' 'unassigned' 'beast' 'bird' 'fish'
 'insect' 'bust' 'hand' 'boat' 'building' 'container' 'equipment'
 'naturalproduct' 'irregular' 'radial' 'lattice' 'fulllength' 'symbol'
 'hawkhunting' 'pelicaninpiety' 'headondish' 'twoheads' 'crossedhands'
 'handholdingitem' 'seated' 'standing' 'riding' 'crucified' 'apparel'
 'crenellation' 'tool' 'weapon' 'shell' 'wheatsheaf' 'stylizedlily'
 'crosses' 'heart' 'merchantmark' 'texts' 'handholdingbird' 'halflength'
 'crescent' 'beastbody' 'beasthead' 'doubleheadedeagle' 'horseshoe'
 'twobirdsdrinking' 'animalequipment' 'transport'
 'halflengthwomanholdingchild' 'halflengthwoman' 'halflengthman' 'swine'
 'boarhead' 'centaur' 'dragon' 'hare' 'lion' 'lionhead' 'mermaid'
 'squirrel' 'stag' 'staghead' 'unicorn

In [277]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
#from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

In [368]:
# Append classifier to preprocessing pipeline.
# tree_clf = DecisionTreeClassifier(max_depth=8, random_state=42)
# tree_clf = DecisionTreeClassifier(random_state=42)

#test regressor
y = dataset.iloc[:,113:114] #dateorigin will be the target
tree_clf = DecisionTreeRegressor(max_depth=7, min_samples_leaf=20, random_state=42)


In [369]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

tree_out= tree_clf.fit(X_train, y_train)

In [370]:
print (tree_out)

DecisionTreeRegressor(max_depth=7, min_samples_leaf=20, random_state=42)


In [371]:
print("model score: %.3f" % tree_clf.score(X_test, y_test))

model score: 0.608


In [372]:
featurenames = tree_out.feature_names_in_
print(featurenames)

['abnormal' 'drop' 'hexangonal' 'kite' 'lozenge' 'octagonal' 'pointedoval'
 'quatrofoil' 'rectangular' 'round' 'roundedoval' 'scutiform' 'square'
 'trianglepointedup' 'undeterminedshape' 'size_area' 'animal' 'human'
 'objects' 'device' 'undetermined' 'unassigned' 'beast' 'bird' 'fish'
 'insect' 'bust' 'hand' 'boat' 'building' 'container' 'equipment'
 'naturalproduct' 'irregular' 'radial' 'lattice' 'fulllength' 'symbol'
 'hawkhunting' 'pelicaninpiety' 'headondish' 'twoheads' 'crossedhands'
 'handholdingitem' 'seated' 'standing' 'riding' 'crucified' 'apparel'
 'crenellation' 'tool' 'weapon' 'shell' 'wheatsheaf' 'stylizedlily'
 'crosses' 'heart' 'merchantmark' 'texts' 'handholdingbird' 'halflength'
 'crescent' 'beastbody' 'beasthead' 'doubleheadedeagle' 'horseshoe'
 'twobirdsdrinking' 'animalequipment' 'transport'
 'halflengthwomanholdingchild' 'halflengthwoman' 'halflengthman' 'swine'
 'boarhead' 'centaur' 'dragon' 'hare' 'lion' 'lionhead' 'mermaid'
 'squirrel' 'stag' 'staghead' 'unicorn

In [373]:
featuresimportance = tree_out.feature_importances_
print(featuresimportance)

[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 4.67775587e-02 0.00000000e+00
 3.20678571e-04 9.83340931e-04 1.01276382e-03 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 7.95498169e-01
 1.73031796e-03 3.88018511e-03 1.64304846e-02 8.90745726e-04
 0.00000000e+00 0.00000000e+00 0.00000000e+00 4.66051584e-04
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 8.97938681e-04 6.67558979e-04 0.00000000e+00
 6.44556211e-03 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 1.86513731e-03 4.43727746e-03 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 5.03305232e-03 0.00000000e+00
 0.00000000e+00 0.00000000e+00 1.92152277e-02 0.00000000e+00
 0.00000000e+00 1.99835454e-04 4.10115183e-03 0.00000000e+00
 0.00000000e+00 0.000000

In [322]:
classnames = tree_out.classes_
print(featuresclasses)

AttributeError: 'DecisionTreeRegressor' object has no attribute 'classes_'

In [374]:
classnames = ['1000','1175','1200','1225','1250','1275','1300','1325','1350','1375','1400','1425','1450','1475','1500', '1525', '1550', '1575']


In [375]:
from sklearn import tree

# plt.figure(figsize=(30, 30)) # Resize figure

plt.figure(dpi=500, figsize=(40,40))

tree.plot_tree(tree_out, feature_names=featurenames, class_names=(tree_out.classes_))
plt.show()

AttributeError: 'DecisionTreeRegressor' object has no attribute 'classes_'

<Figure size 20000x20000 with 0 Axes>

In [376]:
from sklearn.tree import export_text
r = export_text(tree_out, feature_names=['abnormal', 'drop', 'hexangonal', 'kite', 'lozenge', 'octagonal', 'pointedoval',
 'quatrofoil', 'rectangular', 'round', 'roundedoval', 'scutiform', 'square',
 'trianglepointedup', 'undeterminedshape', 'size_area', 'animal', 'human',
 'objects', 'device', 'undetermined', 'unassigned', 'beast', 'bird', 'fish',
 'insect', 'bust', 'hand', 'boat', 'building', 'container', 'equipment',
 'naturalproduct', 'irregular', 'radial', 'lattice', 'fulllength', 'symbol',
 'hawkhunting', 'pelicaninpiety', 'headondish', 'twoheads', 'crossedhands',
 'handholdingitem', 'seated', 'standing', 'riding', 'crucified', 'apparel',
 'crenellation', 'tool', 'weapon', 'shell', 'wheatsheaf', 'stylizedlily',
 'crosses', 'heart', 'merchantmark', 'texts', 'handholdingbird', 'halflength',
 'crescent', 'beastbody', 'beasthead', 'doubleheadedeagle', 'horseshoe',
 'twobirdsdrinking', 'animalequipment', 'transport',
 'halflengthwomanholdingchild', 'halflengthwoman', 'halflengthman', 'swine',
 'boarhead', 'centaur', 'dragon', 'hare', 'lion', 'lionhead', 'mermaid',
 'squirrel', 'stag', 'staghead', 'unicorn', 'unicornhead', 'wolf', 'wolfhead',
 'standingwoman', 'standingman', 'armouredmanequestrian',
 'seatedwomanholdingchild', 'axe', 'shears', 'arrow', 'spear', 'sword', 'banner',
 'shield', 'christogram', 'lionfighting', 'sheep', 'griffin', 'hammer',
 'standingwomanholdingchild', 'hareonhound', 'lambandstaff', 'lionsleeping',
 'standingliturgicalapparel', 'manfightinganimal', 'bowandarrow',
 'spearandpennon', 'seatedman'])
print(r)

|--- size_area <= 217.96
|   |--- size_area <= 193.00
|   |   |--- texts <= 0.50
|   |   |   |--- size_area <= 133.86
|   |   |   |   |--- device <= 0.50
|   |   |   |   |   |--- size_area <= 122.85
|   |   |   |   |   |   |--- human <= 0.50
|   |   |   |   |   |   |   |--- value: [1443.58]
|   |   |   |   |   |   |--- human >  0.50
|   |   |   |   |   |   |   |--- value: [1468.21]
|   |   |   |   |   |--- size_area >  122.85
|   |   |   |   |   |   |--- bird <= 0.50
|   |   |   |   |   |   |   |--- value: [1422.46]
|   |   |   |   |   |   |--- bird >  0.50
|   |   |   |   |   |   |   |--- value: [1450.32]
|   |   |   |   |--- device >  0.50
|   |   |   |   |   |--- roundedoval <= 0.50
|   |   |   |   |   |   |--- size_area <= 80.69
|   |   |   |   |   |   |   |--- value: [1470.39]
|   |   |   |   |   |   |--- size_area >  80.69
|   |   |   |   |   |   |   |--- value: [1449.62]
|   |   |   |   |   |--- roundedoval >  0.50
|   |   |   |   |   |   |--- value: [1479.00]
|   |   |   |--- s

In [377]:
import graphviz 
dot_data = tree.export_graphviz(
    tree_out, 
    out_file=None,
    feature_names=None,
    class_names=classnames,
    rotate= True,
    node_ids=True,
    rounded=True,
    filled=True) 

graph = graphviz.Source(dot_data) 
graph.render("seals")

'seals.pdf'