In [1]:
# IMPORTS
import warnings
warnings.filterwarnings(action='ignore')

from manufacturing_company.src.common.const import *
from manufacturing_company.src.network.social_network import *

from manufacturing_company.src.classification_algorithms.standard_classification import *
from manufacturing_company.src.classification_algorithms.NodeInfo import NodeInfo
from manufacturing_company.src.classification_algorithms.CollectiveClassificationResult import CollectiveClassificationResult
from manufacturing_company.src.logs.collective_classification_logger import CollectiveClassificationLogger
from sklearn.metrics import jaccard_similarity_score, jaccard_score, f1_score



In [22]:
# CONST
pcts = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
thresholds = [2, 3, 4, 5, 6, 7, 8, 9, 10]
jaccard_mins = [0.7, 0.8, 0.9, 0.99]
levels = 3
minority_labels = [1, 2]

df_positions = pd.read_csv('manufacturing_company/data/raw/positions.csv', sep=';', comment='#', index_col=ID)

In [23]:
# COLLECTIVE CLASSIFICATION

logger = CollectiveClassificationLogger(levels)

for month in range(1, 2):
    df_features = pd.read_csv('manufacturing_company/data/intermediate/05_features/' + str(month) + '_months_features.csv', sep=';', index_col=ID)
    
    df_communication = pd.read_csv('manufacturing_company/data/intermediate/03_minimum_activity/' + str(month) + '_months_communication.csv', sep=';')
    G = create_network(df_communication, weight=False)
    G_undirected = G.to_undirected()
    
    # for pct in pcts:
    #     for threshold in thresholds:
    #         for jaccard_min in jaccard_mins:
    #             collective_classification(logger, month, G_undirected, df_features.copy(), pct, levels, df_positions.copy(), threshold, minority_labels, jaccard_min)


In [24]:
def select_nodes_based_on_utility_score(utility_score_name, utility_score, pct, levels):  
    utility_score = utility_score.sort_values(utility_score_name, ascending=False)

    known_nodes_map = dict()
    
    for position in range(1, levels + 1):
        employees_on_given_position = utility_score[utility_score[POSITION] == position]
        all_nodes = len(employees_on_given_position)
        known_nodes = round(all_nodes * pct)
        
        known_nodes_id = employees_on_given_position.iloc[:known_nodes].index
        known_nodes_map.update({id: position for id in known_nodes_id})
        
    return known_nodes_map


In [25]:
def message_passing(G, known_nodes, threshold, minority_labels, levels, jaccard_min): 
    df_nodes = pd.DataFrame(G.nodes(data='utility_score'), columns=[ID, 'utility_score'])
    df_nodes = df_nodes.set_index(ID)
    
    order_desc = df_nodes.sort_values('utility_score', ascending=True).index
    
    nodes = pd.DataFrame(G.nodes(data='label'), columns=[ID, 'label'])
    nodes = nodes.set_index(ID)
    nodes = nodes.loc[order_desc, 'label'].to_dict()
    # print(nodes)
    label_counter = {node_id: NodeInfo() for node_id in nodes.keys()}
    
    end = False
    
    while not end:
        old_labels = [value for (key, value) in nodes.items() if key not in known_nodes]
        for node, label in nodes.items():
            if label != -1:
                neighbors = G.neighbors(node)
                for neighbor in neighbors:
                    if neighbor not in known_nodes:
                        label_counter[neighbor].labels.append(label)
        
        # UPDATE LABELS
        for node, label in nodes.items():
            unique_labels = len(set(label_counter[node].labels)) == 1

            # TODO method calculate label frequency
            label_freq = None
            if levels == 2:
                label_freq = {1: 0, 2: 0}
            elif levels == 3:
                label_freq = {1: 0, 2: 0, 3: 0}
            else:
                print(set(nodes.values()))
                raise Exception
                
            for l in label_freq.keys():
                size = label_counter[node].labels.count(l)
                if l not in minority_labels:
                    size = round(size / float(threshold))
                label_freq[l] = size

            same_freq = len(set(label_freq.values())) == 1

            # TODO function select update strategy
            if unique_labels:
                print('unique_labels')
                nodes[node] = label_counter[node].labels[0]
                label_counter[node].unchanged_iter = 0
            elif same_freq:
                print('same_freq')
                # TODO update unchanged state
                label_counter[node].unchanged_iter += 1

                if label_counter[node].unchanged_iter > 100:
                    neighbors = G.neighbors(node)

                    max_utility_score = -1
                    node_label = -1

                    for neighbor_id in neighbors:
                        neighbor = G.nodes[neighbor_id]
                        if neighbor['utility_score'] > max_utility_score & neighbor['label'] != -1:
                            max_utility_score = neighbor['utility_score']
                            node_label = neighbor['label']

                    nodes[node] = node_label
                    label_counter[node].unchanged_iter = 0
            else:
                print('else')
                nodes[node] = max(label_freq.items(), key=operator.itemgetter(1))[0]
                label_counter[node].unchanged_iter = 0

            label_counter[node].labels = []
            
        new_labels = [value for (key, value) in nodes.items() if key not in known_nodes]
        # print(old_labels)
        # print(new_labels)
        # print(list(nodes.values()).count(-1))
        if (jaccard_score(old_labels, new_labels, average='micro') >= jaccard_min) & (-1 not in nodes.values()):
            end = True
        
    return nodes


In [26]:
def collective_classification(logger, month, G, df_features, pct, levels, df_positions, threshold, minority_labels, jaccard_min):
    feature_names = df_features.columns
    
    df_features = assign_management_levels(levels, df_features, df_positions)
    
    for utility_score_name in feature_names:
        utility_score = df_features[[utility_score_name, POSITION]]     
        known_nodes = select_nodes_based_on_utility_score(utility_score_name, utility_score, pct, levels)
        
        nx.set_node_attributes(G, -1, 'label')
        nx.set_node_attributes(G, known_nodes, 'label')
        nx.set_node_attributes(G, utility_score[utility_score_name], 'utility_score')
        
        nodes = message_passing(G, known_nodes, threshold, minority_labels, levels, jaccard_min)
        # print(nodes)
        nodes = pd.DataFrame.from_dict(nodes, orient='index', columns=[POSITION])
        nodes.index.name = ID
        
        nodes = nodes.loc[~nodes.index.isin(known_nodes)]
        
        df_merged = pd.merge(nodes, df_features[POSITION], on=ID)
        # print(df_merged)
        f1 = f1_score(df_merged.iloc[:, 0], df_merged.iloc[:, 1], average='macro')
        
        logger.save(CollectiveClassificationResult(f1, pct, utility_score_name, threshold, jaccard_min, minority_labels), month)


In [158]:
collective_classification(G, df_features, pct=0.6, levels=2, df_positions=df_positions, threshold=3, minority_labels=[1])

PCT:  0.6
F1:  0.8500000000000001
Utiliti score:  in_degree
***********************************
PCT:  0.6
F1:  0.6651162790697674
Utiliti score:  out_degree
***********************************
PCT:  0.6
F1:  0.7775718257645967
Utiliti score:  betweenness
***********************************
PCT:  0.6
F1:  0.4606741573033708
Utiliti score:  closeness
***********************************


  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)


PCT:  0.6
F1:  0.4606741573033708
Utiliti score:  eigenvector
***********************************
PCT:  0.6
F1:  0.1272727272727273
Utiliti score:  clustering_coeff
***********************************


  'recall', 'true', average, warn_for)


PCT:  0.6
F1:  0.7491289198606272
Utiliti score:  pagerank
***********************************


  'recall', 'true', average, warn_for)


PCT:  0.6
F1:  0.4606741573033708
Utiliti score:  hubs
***********************************
PCT:  0.6
F1:  0.8024691358024691
Utiliti score:  authorities
***********************************
PCT:  0.6
F1:  0.7433155080213902
Utiliti score:  max_clique
***********************************


PCT:  0.6
F1:  0.5909090909090909
Utiliti score:  cliques_count
***********************************
PCT:  0.6
F1:  0.4606741573033708
Utiliti score:  overtime
***********************************
PCT:  0.6
F1:  0.5909090909090909
Utiliti score:  work_at_weekend
***********************************


  'recall', 'true', average, warn_for)


PCT:  0.6
F1:  0.4606741573033708
Utiliti score:  neighborhood_variability_sender
***********************************
PCT:  0.6
F1:  0.6886005560704356
Utiliti score:  neighborhood_variability_recipient
***********************************
PCT:  0.6
F1:  0.4606741573033708
Utiliti score:  neighborhood_variability_all
***********************************


  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)


In [27]:
collective_classification(logger, 1, G_undirected, df_features.copy(), pct=0.6, levels=3, df_positions=df_positions.copy(), threshold=4, minority_labels=[1,2], jaccard_min=0.8)

unique_labels
unique_labels
unique_labels
else
unique_labels
else
else
unique_labels
else
unique_labels
unique_labels
unique_labels
else
unique_labels
else
else
else
unique_labels
else
else
unique_labels
else
else
else
unique_labels
else
else
unique_labels
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
else
same_freq
same_freq
else
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
sam


same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
unique_labels
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq


same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
unique_labels
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
s

else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else


else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq


else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq


else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
same_freq
else


KeyboardInterrupt: 