In [1]:
# IMPORTS
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from manufacturing_company.src.common.const import *
from manufacturing_company.src.classification_algorithms.standard_classification import *
from manufacturing_company.src.logs.standard_classification_logger import StandardClassificationLogger
from manufacturing_company.src.visualization.plot_standard_classification import PlotStandardClassification
from manufacturing_company.src.visualization.random_baseline_mc import *

import warnings
warnings.filterwarnings(action='once')




In [None]:
positions = pd.read_csv(MC_FILE_POSITIONS, sep=';', comment='#', index_col=ID)
positions = positions.set_index(ID)

levels = 2

In [3]:
def decision_tree_params(n_features):
    max_depth = np.linspace(1, 20, 20, endpoint=True)
    max_features = list(range(1, n_features))
    return {'model__max_depth': max_depth, 'model__max_features': max_features}


In [4]:
# DECISION TREE

logger = StandardClassificationLogger('manufacturing_company', levels, DecisionTreeClassifier)

for month in range(MONTHS):
    features = pd.read_csv(MC_FILE_FEATURES.format(month), sep=';', index_col=ID)
    features = assign_management_levels(levels, features, positions)

    models = classification(features, DecisionTreeClassifier, decision_tree_params, 'f1_macro', logger, month)
    

plot = PlotStandardClassification(logger.directory_path, DecisionTreeClassifier, levels, random_baseline_mc)
plot.plot()


MONTH:  1
FEATURES %:  100.0
BEST SCORE: 0.5383841414175085
Features sorted from best:
 [('work_at_weekend', 0.5598598198892625), ('neighborhood_variability_sender', 0.1617707017205604), ('neighborhood_variability_all', 0.12642918560053737), ('overtime', 0.08865972602942478), ('max_clique', 0.03784722892357356), ('clustering_coeff', 0.025433337836641423), ('in_degree', 0.0), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('hubs', 0.0), ('authorities', 0.0), ('cliques_count', 0.0), ('neighborhood_variability_recipient', 0.0)]
REMOVED:  work_at_weekend
REMOVED:  neighborhood_variability_sender
FEATURES %:  90.0
BEST SCORE: 0.471200241406191
Features sorted from best:
 [('overtime', 0.43344172505517714), ('in_degree', 0.32737279222754545), ('clustering_coeff', 0.23392517606749155), ('pagerank', 0.00526030664978571), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('hubs', 0.0), ('authorities', 0.

BEST SCORE: 0.4566107085234171
Features sorted from best:
 [('clustering_coeff', 0.501945340497557), ('max_clique', 0.391174193525567), ('cliques_count', 0.0952215008204737), ('neighborhood_variability_all', 0.006697703387720428), ('hubs', 0.0049612617686818185), ('in_degree', 0.0), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('authorities', 0.0), ('neighborhood_variability_recipient', 0.0)]
REMOVED:  clustering_coeff
REMOVED:  max_clique
FEATURES %:  70.0
BEST SCORE: 0.44122928704443465
Features sorted from best:
 [('neighborhood_variability_recipient', 0.368974848876305), ('closeness', 0.24970559959550925), ('cliques_count', 0.21168021610153365), ('betweenness', 0.16963933542665205), ('in_degree', 0.0), ('out_degree', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('hubs', 0.0), ('authorities', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  neighborhood_variability_recipient
FEATURES %:  60.0
BEST SCORE: 0.4786982463

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


BEST SCORE: 0.47198884448884454
Features sorted from best:
 [('cliques_count', 0.3974398849420588), ('neighborhood_variability_all', 0.26944219645358886), ('in_degree', 0.17149693538999647), ('betweenness', 0.16162098321435592), ('eigenvector', 0.0), ('pagerank', 0.0)]
REMOVED:  cliques_count
FEATURES %:  30.0
BEST SCORE: 0.4328165991097026
Features sorted from best:
 [('neighborhood_variability_all', 0.4504215211005718), ('in_degree', 0.3603730844565288), ('betweenness', 0.1806796705154466), ('pagerank', 0.008525723927452754), ('eigenvector', 0.0)]
REMOVED:  neighborhood_variability_all
REMOVED:  in_degree
FEATURES %:  20.0
BEST SCORE: 0.4130290250684988
Features sorted from best:
 [('pagerank', 0.7755964058253133), ('betweenness', 0.2244035941746866), ('eigenvector', 0.0)]
REMOVED:  pagerank
FEATURES %:  10.0
BEST SCORE: 0.4552665566636155
Features sorted from best:
 [('betweenness', 1.0), ('eigenvector', 0.0)]
REMOVED:  betweenness
REMOVED:  eigenvector

****************************

BEST SCORE: 0.5857115881534487
Features sorted from best:
 [('max_clique', 0.3683214705766244), ('clustering_coeff', 0.3077941980354247), ('in_degree', 0.200687358413798), ('cliques_count', 0.11599757256680772), ('hubs', 0.00719940040734508), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('authorities', 0.0), ('overtime', 0.0), ('neighborhood_variability_recipient', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  max_clique
FEATURES %:  80.0
BEST SCORE: 0.524424194056997
Features sorted from best:
 [('clustering_coeff', 0.4377731399281788), ('overtime', 0.41753125800002094), ('hubs', 0.14018796173642253), ('betweenness', 0.004507640335377654), ('in_degree', 0.0), ('out_degree', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('authorities', 0.0), ('cliques_count', 0.0), ('neighborhood_variability_recipient', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  clustering_coeff
REMOVED:  overtime
FEATU

in_degree
FEATURES %:  40.0
BEST SCORE: 0.40875931152971
Features sorted from best:
 [('betweenness', 0.32775755470095463), ('out_degree', 0.3032834409866245), ('closeness', 0.25261655391169563), ('hubs', 0.11634245040072529), ('eigenvector', 0.0), ('authorities', 0.0)]
REMOVED:  betweenness
FEATURES %:  30.0
BEST SCORE: 0.520142773892774
Features sorted from best:
 [('out_degree', 0.5102441501065341), ('closeness', 0.29667166270111694), ('hubs', 0.19308418719234904), ('eigenvector', 0.0), ('authorities', 0.0)]
REMOVED:  out_degree
REMOVED:  closeness
FEATURES %:  20.0
BEST SCORE: 0.40106664366626416
Features sorted from best:
 [('authorities', 0.6061811733372473), ('hubs', 0.39381882666275275), ('eigenvector', 0.0)]
REMOVED:  authorities
FEATURES %:  10.0


  'precision', 'predicted', average, warn_for)


BEST SCORE: 0.27226518871017125
Features sorted from best:
 [('hubs', 1.0), ('eigenvector', 0.0)]
REMOVED:  hubs
REMOVED:  eigenvector

*****************************************************

MONTH:  3
FEATURES %:  100.0
BEST SCORE: 0.5438458550456398
Features sorted from best:
 [('work_at_weekend', 0.5097093329379474), ('neighborhood_variability_sender', 0.13055619831719698), ('clustering_coeff', 0.12577943767242186), ('neighborhood_variability_recipient', 0.11543402250155779), ('betweenness', 0.08098835712267766), ('max_clique', 0.0375326514481984), ('in_degree', 0.0), ('out_degree', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('hubs', 0.0), ('authorities', 0.0), ('cliques_count', 0.0), ('overtime', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  work_at_weekend
REMOVED:  neighborhood_variability_sender
FEATURES %:  90.0
BEST SCORE: 0.5624433012822206
Features sorted from best:
 [('clustering_coeff', 0.46604503223078914), ('max_clique', 0.4584040639785396

BEST SCORE: 0.5110410384249394
Features sorted from best:
 [('max_clique', 0.6397003241612532), ('cliques_count', 0.24842463212847102), ('hubs', 0.08076540588709753), ('pagerank', 0.031109637823178334), ('in_degree', 0.0), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('authorities', 0.0), ('overtime', 0.0), ('neighborhood_variability_recipient', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  max_clique
REMOVED:  cliques_count
FEATURES %:  70.0
BEST SCORE: 0.43155624552208993
Features sorted from best:
 [('neighborhood_variability_recipient', 0.44464817641120435), ('in_degree', 0.2567603172771709), ('authorities', 0.17983496612659908), ('hubs', 0.10433698963007464), ('closeness', 0.014419550554950986), ('out_degree', 0.0), ('betweenness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('overtime', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  neighborhood_variability_recipient
FEATURES %:  60.0
BEST SCORE: 0.37352745925800956
Feat

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


BEST SCORE: 0.4166281876483361
Features sorted from best:
 [('authorities', 0.5662263188723946), ('overtime', 0.3722875296789972), ('closeness', 0.06148615144860812), ('in_degree', 0.0), ('eigenvector', 0.0)]
REMOVED:  authorities
REMOVED:  overtime
FEATURES %:  20.0
BEST SCORE: 0.5325891210239037
Features sorted from best:
 [('closeness', 0.5189718409039279), ('in_degree', 0.4810281590960721), ('eigenvector', 0.0)]
REMOVED:  closeness
FEATURES %:  10.0
BEST SCORE: 0.5157534279813141
Features sorted from best:
 [('in_degree', 1.0), ('eigenvector', 0.0)]
REMOVED:  in_degree
REMOVED:  eigenvector

*****************************************************

MONTH:  4
FEATURES %:  100.0
BEST SCORE: 0.5104111650623279
Features sorted from best:
 [('work_at_weekend', 0.5323115660528978), ('neighborhood_variability_sender', 0.16968334256310993), ('clustering_coeff', 0.1271543197776362), ('overtime', 0.08466952199357551), ('neighborhood_variability_recipient', 0.06597598272538552), ('closeness', 0.

BEST SCORE: 0.5649933712449758
Features sorted from best:
 [('clustering_coeff', 0.5217882956947084), ('max_clique', 0.3629651392976331), ('cliques_count', 0.11524656500765863), ('in_degree', 0.0), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('hubs', 0.0), ('authorities', 0.0), ('neighborhood_variability_recipient', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  clustering_coeff
REMOVED:  max_clique
FEATURES %:  70.0
BEST SCORE: 0.41328263032650775
Features sorted from best:
 [('neighborhood_variability_recipient', 0.48552343962801414), ('cliques_count', 0.2268043515973323), ('neighborhood_variability_all', 0.1804986488444087), ('hubs', 0.09383137844859292), ('in_degree', 0.013342181481651774), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('authorities', 0.0)]
REMOVED:  neighborhood_variability_recipient
FEATURES %:  60.0
BEST SCORE: 0.42629382918479053
Feature

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


BEST SCORE: 0.5193187740212364
Features sorted from best:
 [('in_degree', 0.7302193682368946), ('betweenness', 0.21480419525954494), ('out_degree', 0.050181962110917605), ('pagerank', 0.004794474392642969), ('eigenvector', 0.0)]
REMOVED:  in_degree
REMOVED:  betweenness
FEATURES %:  20.0
BEST SCORE: 0.42609494783211826
Features sorted from best:
 [('out_degree', 0.7968981990737498), ('pagerank', 0.2031018009262502), ('eigenvector', 0.0)]
REMOVED:  out_degree
FEATURES %:  10.0
BEST SCORE: 0.4645437236502108
Features sorted from best:
 [('pagerank', 1.0), ('eigenvector', 0.0)]
REMOVED:  pagerank
REMOVED:  eigenvector

*****************************************************

MONTH:  5
FEATURES %:  100.0
BEST SCORE: 0.4858803614685967
Features sorted from best:
 [('work_at_weekend', 0.5003319999755074), ('neighborhood_variability_sender', 0.1822480417548133), ('neighborhood_variability_all', 0.1654057161292435), ('overtime', 0.10497699258799593), ('max_clique', 0.03700197316250538), ('pagera

BEST SCORE: 0.5378001606015188
Features sorted from best:
 [('overtime', 0.4025223264463654), ('clustering_coeff', 0.2201253317218004), ('max_clique', 0.18795008559584617), ('hubs', 0.13464112624955002), ('neighborhood_variability_recipient', 0.05043788288224559), ('cliques_count', 0.004323247104192463), ('in_degree', 0.0), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('authorities', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  overtime
FEATURES %:  80.0
BEST SCORE: 0.5866798094811678
Features sorted from best:
 [('clustering_coeff', 0.5020258492906509), ('max_clique', 0.4156219782603674), ('cliques_count', 0.07448689354804958), ('hubs', 0.007865278900932077), ('in_degree', 0.0), ('out_degree', 0.0), ('betweenness', 0.0), ('closeness', 0.0), ('eigenvector', 0.0), ('pagerank', 0.0), ('authorities', 0.0), ('neighborhood_variability_recipient', 0.0), ('neighborhood_variability_all', 0.0)]
REMOVED:  clustering_coeff
REMOV

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


BEST SCORE: 0.4644741651093738
Features sorted from best:
 [('neighborhood_variability_all', 0.345906303410694), ('in_degree', 0.3154007092455186), ('hubs', 0.226350104563907), ('authorities', 0.11234288277988032), ('eigenvector', 0.0), ('pagerank', 0.0)]
REMOVED:  neighborhood_variability_all
FEATURES %:  30.0
BEST SCORE: 0.43845008508224614
Features sorted from best:
 [('authorities', 0.5859752429843086), ('hubs', 0.4049066700330511), ('pagerank', 0.00911808698264035), ('in_degree', 0.0), ('eigenvector', 0.0)]
REMOVED:  authorities
REMOVED:  hubs
FEATURES %:  20.0
BEST SCORE: 0.5233874014722169
Features sorted from best:
 [('in_degree', 0.9902217026679325), ('pagerank', 0.009778297332067467), ('eigenvector', 0.0)]
REMOVED:  in_degree
FEATURES %:  10.0
BEST SCORE: 0.4842854165922434
Features sorted from best:
 [('pagerank', 1.0), ('eigenvector', 0.0)]
REMOVED:  pagerank
REMOVED:  eigenvector

*****************************************************



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [7]:
def random_forest_params(n_features):
    n_estimators = [1, 2, 4, 8, 16, 32, 64, 100, 200]
    max_depth = np.linspace(1, 20, 20, endpoint=True)
    max_features = list(range(1, n_features))
    return {'model__n_estimators': n_estimators, 
            'model__max_depth': max_depth, 
            'model__max_features': max_features}


In [None]:
# RANDOM FOREST

logger = StandardClassificationLogger('manufacturing_company', levels, RandomForestClassifier)

for month in range(MONTHS):
    features = pd.read_csv(MC_FILE_FEATURES.format(month), sep=';', index_col=ID)
    features = assign_management_levels(levels, features, positions)

    models = classification(features, RandomForestClassifier, decision_tree_params, 'f1_macro', logger, month)

plot = PlotStandardClassification(logger.directory_path, RandomForestClassifier, levels, random_baseline_mc)
plot.plot()
