In [5]:
import pandas as pd
import json, os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" #CUBLAS_WORKSPACE_CONFIG=:4096:8 or CUBLAS_WORKSPACE_CONFIG=:16:8

import torch
import torch.nn as nn
import torch_geometric
from torch_geometric.nn import GCNConv, GatedGraphConv, GATConv, SuperGATConv, MLP, GraphSAGE
import torch.nn.functional as F
from torch.nn import Linear, LSTM, RNN, GRU, ReLU, Tanh, Sigmoid, CrossEntropyLoss

from torch.optim import Adam, SGD
from torch.nn.functional import cross_entropy
from torch.utils.data import random_split

from torch_geometric.data import Data, Batch
from torch_geometric.explain import Explainer, GNNExplainer, CaptumExplainer, PGExplainer, AttentionExplainer
from torch_geometric.nn import global_mean_pool, BatchNorm, global_max_pool, global_add_pool, TopKPooling, SAGPooling

from captum.attr import Saliency, IntegratedGradients

import numpy as np
import sklearn
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import KFold, StratifiedKFold

from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
print('Device:', device)

if device == 'cuda':
    #torch.use_deterministic_algorithms(True)
    torch.backends.cudnn.benchmark = False
#     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" #CUBLAS_WORKSPACE_CONFIG=:4096:8 or CUBLAS_WORKSPACE_CONFIG=:16:8

Device: cuda


In [6]:
torch.manual_seed(11)
torch_geometric.seed_everything(11)
np.random.seed(11)

In [7]:
### Read Stored Graphs ###
graphs_females = torch.load('GNN_data_structure_females_v4.pt')
graphs_males = torch.load('GNN_data_structure_males_v4.pt')

num_features = graphs_females[0].x.shape[-1]

### Move to GPU ###
batch_females = Batch.from_data_list(graphs_females).to(device)
batch_males = Batch.from_data_list(graphs_males).to(device)

print('Num of features:', num_features)
print('Females:', len(graphs_females))
print('Males:', len(graphs_males))
print('Total:', len(graphs_females)+len(graphs_males))
print(len(batch_females.x))
print(len(batch_males.x))

Num of features: 54
Females: 272
Males: 625
Total: 897
15617
32873


In [9]:
### Define GNN model ###
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, int(hidden_channels/2))
        self.linear = Linear(int(hidden_channels/2), int(hidden_channels/4))
        self.lstm = LSTM(int(hidden_channels/4), num_classes)
    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings: Embed each node by performing multiple rounds of message passing
        x = self.conv1(x, edge_index)
        x = self.conv2(x, edge_index)
        x = self.linear(x)
        # 2. Readout layer: Aggregate node embeddings into a unified graph embedding (readout layer)
        x = global_mean_pool(x, batch) #, TopKPooling, SAGPooling# global_mean_pool(x, batch)
        # 3. Apply a final classifier: Train a final classifier on the graph embedding
        x = F.dropout(x, p=0.5, training=self.training)
        return F.log_softmax(x, dim=1)

def train_model(num_features, hidden_channels, epochs, train_batch):
    # Create an instance of the model
    model = GCN(num_features, hidden_channels, 2).to(device)
    # Define the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()

    # Train Model
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(train_batch.x, train_batch.edge_index, train_batch.batch)
        loss = criterion(out, train_batch.y)
        loss.backward()
        optimizer.step()
    return model

def eval_model(model, test_batch):
    # Evaluate Model
    model.eval()
    out = model(test_batch.x, test_batch.edge_index, test_batch.batch)
    pred = out.argmax(dim=1).tolist()  # Use the class with highest probability.
    return pred

In [10]:
######################################################
### Females - Train Model for Explainable AI ###
######################################################
print('Females - Train Model for Explainable AI...')
torch.manual_seed(11)
torch_geometric.seed_everything(11)
np.random.seed(11)

hidden_channels = 128
epochs = 150

batch_females = Batch.from_data_list(graphs_females).to(device)
model_females = train_model(num_features, hidden_channels, epochs, batch_females)

print('Finish training model for Females.')

Females - Train Model for Explainable AI...
Finish training model for Females.


In [11]:
######################################################
### Males - Train Model for Explainable AI ###
######################################################
print('Males - Train Model for Explainable AI...')
torch.manual_seed(11)
torch_geometric.seed_everything(11)
np.random.seed(11)

hidden_channels = 128
epochs = 240

batch_males = Batch.from_data_list(graphs_males).to(device)
model_males = train_model(num_features, hidden_channels, epochs, batch_males)

print('Finish training model for Males.')

Males - Train Model for Explainable AI...
Finish training model for Males.


In [16]:
######################################################
### Explainer Captum - Females ###
######################################################
print('Explanations Females...')
feat_import_entre_females = {}
feat_import_inves_females = {}
pos_i = 0
count_graphs = 0

for graph in graphs_females:
    num_nodes = graph.x.shape[0]
    num_feat  = graph.x.shape[1]
    batch = Batch.from_data_list([graph]).to(device)
    
    feat_import_entre_females[count_graphs] = [0]*num_feat
    feat_import_inves_females[count_graphs] = [0]*num_feat
    count_entre = 0
    count_inves = 0
    
    explainer_females = Explainer(
        model = model_females,
        algorithm = CaptumExplainer('IntegratedGradients'), # GNNExplainer(epochs=200, lr=0.02),  CaptumExplainer, PGExplainer, AttentionExplainer
        explanation_type = "phenomenon", # model phenomenon
        node_mask_type = "attributes", # object common_attributes attributes
        model_config=dict(
            mode        = "multiclass_classification",
            task_level  = "graph",
            return_type = "log_probs",
        )
    )
    explanation = explainer_females(
        x=batch.x,
        edge_index=batch.edge_index,
        target=batch.y,
        batch=batch.batch
    )
    explanations_list = explanation['node_mask']
    
    for i in range(0, num_nodes):
        node_features_list = graph.x[i].tolist() # batch.x[pos_i].tolist()
        node_importance_list = explanations_list[i].tolist()
        isEntre = int(node_features_list[0])
        
        if isEntre == 1:
            count_entre += 1
        else:
            count_inves += 1
        
        for j in range(0, num_feat):
            if isEntre == 1: # add feature importance to entrepreneurs
                feat_import_entre_females[count_graphs][j] += node_importance_list[j]
                
            else: # add feature importance to investors
                feat_import_inves_females[count_graphs][j] += node_importance_list[j]
    
    if count_inves == 0:
        count_inves = 1
    feat_import_entre_females[count_graphs] = [x*1.0/count_entre for x in feat_import_entre_females[count_graphs]]
    feat_import_inves_females[count_graphs] = [x*1.0/count_inves for x in feat_import_inves_females[count_graphs]]
    count_graphs += 1

######################################################
### Explainer Captum - Males ###
######################################################
print('Explanations Males...')
feat_import_entre_males = {}
feat_import_inves_males = {}
pos_i = 0
count_graphs = 0

for graph in graphs_males:
    num_nodes = graph.x.shape[0]
    num_feat  = graph.x.shape[1]
    batch = Batch.from_data_list([graph]).to(device)
    
    feat_import_entre_males[count_graphs] = [0]*num_feat
    feat_import_inves_males[count_graphs] = [0]*num_feat
    count_entre = 0
    count_inves = 0
    
    explainer_males = Explainer(
        model = model_males,
        algorithm = CaptumExplainer('IntegratedGradients'), # GNNExplainer(epochs=200, lr=0.02),  CaptumExplainer, PGExplainer, AttentionExplainer
        explanation_type = "phenomenon", # model phenomenon
        node_mask_type = "attributes", # object common_attributes attributes
        model_config=dict(
            mode        = "multiclass_classification",
            task_level  = "graph",
            return_type = "log_probs",
        )
    )
    explanation = explainer_males(
        x=batch.x,
        edge_index=batch.edge_index,
        target=batch.y,
        batch=batch.batch
    )
    explanations_list = explanation['node_mask']
    
    for i in range(0, num_nodes):
        node_features_list = graph.x[i].tolist() # batch.x[pos_i].tolist()
        node_importance_list = explanations_list[i].tolist()
        isEntre = int(node_features_list[0])
        
        if isEntre == 1:
            count_entre += 1
        else:
            count_inves += 1
        
        for j in range(0, num_feat):
            if isEntre == 1: # add feature importance to entrepreneurs
                feat_import_entre_males[count_graphs][j] += node_importance_list[j]
                
            else: # add feature importance to investors
                feat_import_inves_males[count_graphs][j] += node_importance_list[j]
    
    if count_inves == 0:
        count_inves = 1
    feat_import_entre_males[count_graphs] = [x*1.0/count_entre for x in feat_import_entre_males[count_graphs]]
    feat_import_inves_males[count_graphs] = [x*1.0/count_inves for x in feat_import_inves_males[count_graphs]]
    count_graphs += 1

print('Finish.')

Explanations Females...
Number of nodes: 34
Number of features: 54
1
54
tensor([[0.2106, 0.7200, 0.0561, 0.1039, 0.0507, 0.0494, 0.0533, 0.0538, 0.0479,
         0.0516, 0.0506, 0.0472, 0.0504, 0.0312, 0.0525, 0.0478, 0.0551, 0.0511,
         0.0498, 0.0463, 0.0487, 0.0512, 0.0528, 0.0551, 0.5635, 0.6949, 0.0540,
         0.6148, 0.4338, 0.0500, 0.0509, 0.0430, 0.0443, 0.0721, 0.1009, 0.0783,
         0.6947, 0.6378, 0.6891, 0.1003, 0.0621, 0.0837, 0.1874, 0.6493, 0.1249,
         0.1335, 0.5040, 0.5439, 0.4179, 0.0464, 0.2792, 0.1018, 0.1349, 0.2770]],
       device='cuda:0')


In [22]:
######################################################
### Export Importance of Features ###
######################################################
columns = [
    'isEntre', 'isQ&A_v2', 'Duration (secs)', 'Age', 'Ethnicity==Black', 'Industry==Children / Education', 'Industry==Fashion / Beauty', 'Industry==Fitness / Sports / Outdoors', 
    'Industry==Food and Beverage', 'Industry==Health / Wellness / Cleaning', 'Industry==Lifestyle / Home', 'Industry==Software / Tech', 'Industry==Pet Products', 
    'Revenue_Model==Production/Transactional model', 'Revenue_Model==Subscription model', 'Revenue_Model==Rental or leasing model', 'Retail_Ecommerce==Retail', 
    'Retail_Ecommerce==Online', 'Has_Patent==YES', 'Has_Patent==IN-PROGRESS', 'Num_of_Presenters', 'Has_Debt', 'Seasonal', 'Num_of_Sales_Last_Year_USD_resc', 
    'VOICE_pitch', 'VOICE_articulation_rate', 'VOICE_neutral', 'VOICE_calm', 'VOICE_happy', 'VOICE_sad', 
    'VOICE_angry', 'FACE_smiling', 'FACE_happy', 'FACE_neutral', 'FACE_sad', 'FACE_angry', 
    'TEXT_financial_sentiment_score', 'TEXT_generic_sentiment_score', 'TEXT_joy', 'TEXT_sadness', 'TEXT_anger', 
    'TEXT_trust', 'TEXT_conflict', 'TEXT_social_support', 'TEXT_similarity', 'TEXT_respect', 'TEXT_knowledge', 'TEXT_power', 'TEXT_fun',  'TEXT_identity', 'TEXT_romance',
    'TEXT_lexical_diversity_mtld', 'TEXT_lexical_sophistication_word_frequency', 'TEXT_num_uncertainty_words_lexicon1',
    'inter_2way_1', 'inter_2way_2', 'inter_2way_3', 'inter_2way_4', 'inter_2way_5', 'inter_2way_6',
    
    'INVES_isEntre', 'INVES_isQ&A_v2', 'INVES_Duration (secs)', 'INVES_Age', 'INVES_Ethnicity==Black', 'INVES_Industry==Children / Education', 'INVES_Industry==Fashion / Beauty', 'INVES_Industry==Fitness / Sports / Outdoors', 
    'INVES_Industry==Food and Beverage', 'INVES_Industry==Health / Wellness / Cleaning', 'INVES_Industry==Lifestyle / Home', 'INVES_Industry==Software / Tech', 'INVES_Industry==Pet Products', 
    'INVES_Revenue_Model==Production/Transactional model', 'INVES_Revenue_Model==Subscription model', 'INVES_Revenue_Model==Rental or leasing model', 'INVES_Retail_Ecommerce==Retail', 
    'INVES_Retail_Ecommerce==Online', 'INVES_Has_Patent==YES', 'INVES_Has_Patent==IN-PROGRESS', 'INVES_Num_of_Presenters', 'INVES_Has_Debt', 'INVES_Seasonal', 'INVES_Num_of_Sales_Last_Year_USD_resc', 
    'INVES_VOICE_pitch', 'INVES_VOICE_articulation_rate', 'INVES_VOICE_neutral', 'INVES_VOICE_calm', 'INVES_VOICE_happy', 'INVES_VOICE_sad', 
    'INVES_VOICE_angry', 'INVES_FACE_smiling', 'INVES_FACE_happy', 'INVES_FACE_neutral', 'INVES_FACE_sad', 'INVES_FACE_angry', 
    'INVES_TEXT_financial_sentiment_score', 'INVES_TEXT_generic_sentiment_score', 'INVES_TEXT_joy', 'INVES_TEXT_sadness', 'INVES_TEXT_anger', 
    'INVES_TEXT_trust', 'INVES_TEXT_conflict', 'INVES_TEXT_social_support', 'INVES_TEXT_similarity', 'INVES_TEXT_respect', 'INVES_TEXT_knowledge', 'INVES_TEXT_power', 'INVES_TEXT_fun',  'INVES_TEXT_identity', 'INVES_TEXT_romance',
    'INVES_TEXT_lexical_diversity_mtld', 'INVES_TEXT_lexical_sophistication_word_frequency', 'INVES_TEXT_num_uncertainty_words_lexicon1',
    'INVES_inter_2way_1', 'INVES_inter_2way_2', 'INVES_inter_2way_3', 'INVES_inter_2way_4', 'INVES_inter_2way_5', 'INVES_inter_2way_6',
]

header = 'Entre_isFemale,'
header += ','.join([str(elem) for elem in columns])
print(header)

for i in range(0, len(graphs_females)):
    s = '1,'
    s += ','.join([str(elem) for elem in feat_import_entre_females[i]])
    s += ','
    s += ','.join([str(elem) for elem in feat_import_inves_females[i]])
    print(s)
for i in range(0, len(graphs_males)):
    s = '0,'
    s += ','.join([str(elem) for elem in feat_import_entre_males[i]])
    s += ','
    s += ','.join([str(elem) for elem in feat_import_inves_males[i]])
    print(s)

Entre_isFemale,isEntre,isQ&A_v2,Duration (secs),Age,Ethnicity==Black,Industry==Children / Education,Industry==Fashion / Beauty,Industry==Fitness / Sports / Outdoors,Industry==Food and Beverage,Industry==Health / Wellness / Cleaning,Industry==Lifestyle / Home,Industry==Software / Tech,Industry==Pet Products,Revenue_Model==Production/Transactional model,Revenue_Model==Subscription model,Revenue_Model==Rental or leasing model,Retail_Ecommerce==Retail,Retail_Ecommerce==Online,Has_Patent==YES,Has_Patent==IN-PROGRESS,Num_of_Presenters,Has_Debt,Seasonal,Num_of_Sales_Last_Year_USD_resc,VOICE_pitch,VOICE_articulation_rate,VOICE_neutral,VOICE_calm,VOICE_happy,VOICE_sad,VOICE_angry,FACE_smiling,FACE_happy,FACE_neutral,FACE_sad,FACE_angry,TEXT_financial_sentiment_score,TEXT_generic_sentiment_score,TEXT_joy,TEXT_sadness,TEXT_anger,TEXT_trust,TEXT_conflict,TEXT_social_support,TEXT_similarity,TEXT_respect,TEXT_knowledge,TEXT_power,TEXT_fun,TEXT_identity,TEXT_romance,TEXT_lexical_diversity_mtld,TEXT_l

In [23]:
######################################################
### Explainer Nodes - Females ###
######################################################
print('Explanations Females...')
node_importance_entre_females = {}
node_importance_inves_females = {}
node_importance_feat_entre_females = {}
node_importance_feat_inves_females = {}
feat_import_entre_females = {}
feat_import_inves_females = {}
pos_i = 0
count_graphs = 0

for graph in graphs_females:
    num_nodes = graph.x.shape[0]
    num_feat  = graph.x.shape[1]
    batch = Batch.from_data_list([graph]).to(device)
    
    node_importance_entre_females[count_graphs] = {}
    node_importance_inves_females[count_graphs] = {}
    node_importance_feat_entre_females[count_graphs] = []
    node_importance_feat_inves_females[count_graphs] = []
    
    feat_import_entre_females[count_graphs] = [0]*num_feat
    feat_import_inves_females[count_graphs] = [0]*num_feat
    count_entre = 0
    count_inves = 0
    
    explainer_females = Explainer(
        model = model_females,
        algorithm = GNNExplainer(epochs=200, lr=0.02), # GNNExplainer(epochs=200, lr=0.02), CaptumExplainer('IntegratedGradients'), PGExplainer, AttentionExplainer
        explanation_type = "phenomenon", # model phenomenon
        node_mask_type = "object", # object common_attributes attributes
        model_config=dict(
            mode        = "multiclass_classification",
            task_level  = "graph",
            return_type = "log_probs",
        )
    )
    explanation = explainer_females(
        x=batch.x,
        edge_index=batch.edge_index,
        target=batch.y,
        batch=batch.batch
    )
    explanations_list = explanation['node_mask']
    
    most_important_node_score_entre = -10
    index_most_important_node_entre = -1
    relative_pos_most_important_node_entre = -1
    most_important_node_feat_entre = []
    most_important_node_score_inves = -10
    index_most_important_node_inves = -1
    relative_pos_most_important_node_inves = -1
    most_important_node_feat_inves = []
    
    for i in range(0, num_nodes):
        node_features_list = graph.x[i].tolist() # batch.x[pos_i].tolist()
        isEntre = int(node_features_list[0])
        
        node_importance_score = explanations_list[i].tolist()[0]
        
        if isEntre == 1:
            count_entre += 1
            #node_importance_entre_females[count_graphs].append(node_importance_list[i])
            if node_importance_score > most_important_node_score_entre:
                most_important_node_score_entre = node_importance_score
                index_most_important_node_entre = i
                most_important_node_feat_entre = node_features_list
        else:
            count_inves += 1
            #node_importance_inves_females[count_graphs].append(node_importance_list[i])
            if node_importance_score > most_important_node_score_inves:
                most_important_node_score_inves = node_importance_score
                index_most_important_node_inves = i
                most_important_node_feat_inves = node_features_list
    
    relative_pos_most_important_node_entre = (index_most_important_node_entre+1)*100/num_nodes
    relative_pos_most_important_node_inves = (index_most_important_node_inves+1)*100/num_nodes
    
    
    node_importance_entre_females[count_graphs]['most_important_node_score_entre'] = most_important_node_score_entre
    node_importance_entre_females[count_graphs]['index_most_important_node_entre'] = index_most_important_node_entre
    node_importance_entre_females[count_graphs]['relative_pos_most_important_node_entre'] = relative_pos_most_important_node_entre
    node_importance_feat_entre_females[count_graphs] = most_important_node_feat_entre
    
    if count_inves != 0:
        node_importance_inves_females[count_graphs]['most_important_node_score_inves'] = most_important_node_score_inves
        node_importance_inves_females[count_graphs]['index_most_important_node_inves'] = index_most_important_node_inves
        node_importance_inves_females[count_graphs]['relative_pos_most_important_node_inves'] = relative_pos_most_important_node_inves
        node_importance_feat_inves_females[count_graphs] = most_important_node_feat_inves
    else:
        node_importance_inves_females[count_graphs]['most_important_node_score_inves'] = ""
        node_importance_inves_females[count_graphs]['index_most_important_node_inves'] = ""
        node_importance_inves_females[count_graphs]['relative_pos_most_important_node_inves'] = ""
        node_importance_feat_inves_females[count_graphs] = ['']*54
    
#     print(node_importance_entre_females[count_graphs])
#     print(node_importance_feat_entre_females[count_graphs])
#     print(node_importance_inves_females[count_graphs])
#     print(node_importance_feat_inves_females[count_graphs])
    count_graphs += 1
    
# ######################################################
# ### Explainer Captum - Males ###
# ######################################################
print('Explanations Males...')
node_importance_entre_males = {}
node_importance_inves_males = {}
node_importance_feat_entre_males = {}
node_importance_feat_inves_males = {}
feat_import_entre_females = {}
feat_import_inves_females = {}
feat_import_entre_males = {}
feat_import_inves_males = {}
pos_i = 0
count_graphs = 0

for graph in graphs_males:
    num_nodes = graph.x.shape[0]
    num_feat  = graph.x.shape[1]
    batch = Batch.from_data_list([graph]).to(device)
    
    node_importance_entre_males[count_graphs] = {}
    node_importance_inves_males[count_graphs] = {}
    node_importance_feat_entre_males[count_graphs] = []
    node_importance_feat_inves_males[count_graphs] = []
    
    feat_import_entre_males[count_graphs] = [0]*num_feat
    feat_import_inves_males[count_graphs] = [0]*num_feat
    count_entre = 0
    count_inves = 0
    
    explainer_males = Explainer(
        model = model_males,
        algorithm =  GNNExplainer(epochs=200, lr=0.02), # GNNExplainer(epochs=200, lr=0.02),  CaptumExplainer, PGExplainer, AttentionExplainer
        explanation_type = "phenomenon", # model phenomenon
        node_mask_type = "object", # object common_attributes attributes
        model_config=dict(
            mode        = "multiclass_classification",
            task_level  = "graph",
            return_type = "log_probs",
        )
    )
    explanation = explainer_males(
        x=batch.x,
        edge_index=batch.edge_index,
        target=batch.y,
        batch=batch.batch
    )
    explanations_list = explanation['node_mask']
    
    most_important_node_score_entre = -10
    index_most_important_node_entre = -1
    relative_pos_most_important_node_entre = -1
    most_important_node_feat_entre = []
    most_important_node_score_inves = -10
    index_most_important_node_inves = -1
    relative_pos_most_important_node_inves = -1
    most_important_node_feat_inves = []    
    
    for i in range(0, num_nodes):
        node_features_list = graph.x[i].tolist() # batch.x[pos_i].tolist()
        isEntre = int(node_features_list[0])
        
        node_importance_score = explanations_list[i].tolist()[0]
        
        if isEntre == 1:
            count_entre += 1
            if node_importance_score > most_important_node_score_entre:
                most_important_node_score_entre = node_importance_score
                index_most_important_node_entre = i
                most_important_node_feat_entre = node_features_list
        else:
            count_inves += 1
            if node_importance_score > most_important_node_score_inves:
                most_important_node_score_inves = node_importance_score
                index_most_important_node_inves = i
                most_important_node_feat_inves = node_features_list
    
    relative_pos_most_important_node_entre = (index_most_important_node_entre+1)*100/num_nodes
    relative_pos_most_important_node_inves = (index_most_important_node_inves+1)*100/num_nodes
    
    
    node_importance_entre_males[count_graphs]['most_important_node_score_entre'] = most_important_node_score_entre
    node_importance_entre_males[count_graphs]['index_most_important_node_entre'] = index_most_important_node_entre
    node_importance_entre_males[count_graphs]['relative_pos_most_important_node_entre'] = relative_pos_most_important_node_entre
    node_importance_feat_entre_males[count_graphs] = most_important_node_feat_entre
    
    if count_inves != 0:
        node_importance_inves_males[count_graphs]['most_important_node_score_inves'] = most_important_node_score_inves
        node_importance_inves_males[count_graphs]['index_most_important_node_inves'] = index_most_important_node_inves
        node_importance_inves_males[count_graphs]['relative_pos_most_important_node_inves'] = relative_pos_most_important_node_inves
        node_importance_feat_inves_males[count_graphs] = most_important_node_feat_inves
    else:
        node_importance_inves_males[count_graphs]['most_important_node_score_inves'] = ""
        node_importance_inves_males[count_graphs]['index_most_important_node_inves'] = ""
        node_importance_inves_males[count_graphs]['relative_pos_most_important_node_inves'] = ""
        node_importance_feat_inves_males[count_graphs] = ['']*54
    
    count_graphs += 1

print('Finish.')

Explanations Females...
Explanations Males...
Finish.


In [32]:
######################################################
### Export Importance of Features ###
######################################################
columns = [
    'isEntre', 'isQ&A_v2', 'Duration (secs)', 'Age', 'Ethnicity==Black', 'Industry==Children / Education', 'Industry==Fashion / Beauty', 'Industry==Fitness / Sports / Outdoors', 
    'Industry==Food and Beverage', 'Industry==Health / Wellness / Cleaning', 'Industry==Lifestyle / Home', 'Industry==Software / Tech', 'Industry==Pet Products', 
    'Revenue_Model==Production/Transactional model', 'Revenue_Model==Subscription model', 'Revenue_Model==Rental or leasing model', 'Retail_Ecommerce==Retail', 
    'Retail_Ecommerce==Online', 'Has_Patent==YES', 'Has_Patent==IN-PROGRESS', 'Num_of_Presenters', 'Has_Debt', 'Seasonal', 'Num_of_Sales_Last_Year_USD_resc', 
    'VOICE_pitch', 'VOICE_articulation_rate', 'VOICE_neutral', 'VOICE_calm', 'VOICE_happy', 'VOICE_sad', 
    'VOICE_angry', 'FACE_smiling', 'FACE_happy', 'FACE_neutral', 'FACE_sad', 'FACE_angry', 
    'TEXT_financial_sentiment_score', 'TEXT_generic_sentiment_score', 'TEXT_joy', 'TEXT_sadness', 'TEXT_anger', 
    'TEXT_trust', 'TEXT_conflict', 'TEXT_social_support', 'TEXT_similarity', 'TEXT_respect', 'TEXT_knowledge', 'TEXT_power', 'TEXT_fun',  'TEXT_identity', 'TEXT_romance',
    'TEXT_lexical_diversity_mtld', 'TEXT_lexical_sophistication_word_frequency', 'TEXT_num_uncertainty_words_lexicon1',
    
    'INVES_isEntre', 'INVES_isQ&A_v2', 'INVES_Duration (secs)', 'INVES_Age', 'INVES_Ethnicity==Black', 'INVES_Industry==Children / Education', 'INVES_Industry==Fashion / Beauty', 'INVES_Industry==Fitness / Sports / Outdoors', 
    'INVES_Industry==Food and Beverage', 'INVES_Industry==Health / Wellness / Cleaning', 'INVES_Industry==Lifestyle / Home', 'INVES_Industry==Software / Tech', 'INVES_Industry==Pet Products', 
    'INVES_Revenue_Model==Production/Transactional model', 'INVES_Revenue_Model==Subscription model', 'INVES_Revenue_Model==Rental or leasing model', 'INVES_Retail_Ecommerce==Retail', 
    'INVES_Retail_Ecommerce==Online', 'INVES_Has_Patent==YES', 'INVES_Has_Patent==IN-PROGRESS', 'INVES_Num_of_Presenters', 'INVES_Has_Debt', 'INVES_Seasonal', 'INVES_Num_of_Sales_Last_Year_USD_resc', 
    'INVES_VOICE_pitch', 'INVES_VOICE_articulation_rate', 'INVES_VOICE_neutral', 'INVES_VOICE_calm', 'INVES_VOICE_happy', 'INVES_VOICE_sad', 
    'INVES_VOICE_angry', 'INVES_FACE_smiling', 'INVES_FACE_happy', 'INVES_FACE_neutral', 'INVES_FACE_sad', 'INVES_FACE_angry', 
    'INVES_TEXT_financial_sentiment_score', 'INVES_TEXT_generic_sentiment_score', 'INVES_TEXT_joy', 'INVES_TEXT_sadness', 'INVES_TEXT_anger', 
    'INVES_TEXT_trust', 'INVES_TEXT_conflict', 'INVES_TEXT_social_support', 'INVES_TEXT_similarity', 'INVES_TEXT_respect', 'INVES_TEXT_knowledge', 'INVES_TEXT_power', 'INVES_TEXT_fun',  'INVES_TEXT_identity', 'INVES_TEXT_romance',
    'INVES_TEXT_lexical_diversity_mtld', 'INVES_TEXT_lexical_sophistication_word_frequency', 'INVES_TEXT_num_uncertainty_words_lexicon1',
    
    'node_score_entre', 'index_node_entre', 'relative_pos_node_entre',
    'node_score_inves', 'index_node_inves', 'relative_pos_node_inves',
    'DV'
]

header = 'Entre_isFemale,'
header += ','.join([str(elem) for elem in columns])
print(header)


for i in range(0, len(graphs_females)):
    s = '1,'
    s += ','.join([str(elem) for elem in node_importance_feat_entre_females[i]])
    s += ','
    s += ','.join([str(elem) for elem in node_importance_feat_inves_females[i]])
    s += ','
    s += (str(node_importance_entre_females[i]['most_important_node_score_entre']) + ',' +
         str(node_importance_entre_females[i]['index_most_important_node_entre']) + ',' +
         str(node_importance_entre_females[i]['relative_pos_most_important_node_entre']) + ',' + 
         str(node_importance_inves_females[i]['most_important_node_score_inves']) + ',' +
         str(node_importance_inves_females[i]['index_most_important_node_inves']) + ',' +
         str(node_importance_inves_females[i]['relative_pos_most_important_node_inves']) + ',' +
        str(int(graphs_females[i].y)) ) 
    print(s)
for i in range(0, len(graphs_males)):
    s = '0,'
    s += ','.join([str(elem) for elem in node_importance_feat_entre_males[i]])
    s += ','
    s += ','.join([str(elem) for elem in node_importance_feat_inves_males[i]])
    s += ','
    s += (str(node_importance_entre_males[i]['most_important_node_score_entre']) + ',' +
         str(node_importance_entre_males[i]['index_most_important_node_entre']) + ',' +
         str(node_importance_entre_males[i]['relative_pos_most_important_node_entre']) + ',' + 
         str(node_importance_inves_males[i]['most_important_node_score_inves']) + ',' +
         str(node_importance_inves_males[i]['index_most_important_node_inves']) + ',' +
         str(node_importance_inves_males[i]['relative_pos_most_important_node_inves']) + ',' +
        str(int(graphs_males[i].y)) )
    print(s)

Entre_isFemale,isEntre,isQ&A_v2,Duration (secs),Age,Ethnicity==Black,Industry==Children / Education,Industry==Fashion / Beauty,Industry==Fitness / Sports / Outdoors,Industry==Food and Beverage,Industry==Health / Wellness / Cleaning,Industry==Lifestyle / Home,Industry==Software / Tech,Industry==Pet Products,Revenue_Model==Production/Transactional model,Revenue_Model==Subscription model,Revenue_Model==Rental or leasing model,Retail_Ecommerce==Retail,Retail_Ecommerce==Online,Has_Patent==YES,Has_Patent==IN-PROGRESS,Num_of_Presenters,Has_Debt,Seasonal,Num_of_Sales_Last_Year_USD_resc,VOICE_pitch,VOICE_articulation_rate,VOICE_neutral,VOICE_calm,VOICE_happy,VOICE_sad,VOICE_angry,FACE_smiling,FACE_happy,FACE_neutral,FACE_sad,FACE_angry,TEXT_financial_sentiment_score,TEXT_generic_sentiment_score,TEXT_joy,TEXT_sadness,TEXT_anger,TEXT_trust,TEXT_conflict,TEXT_social_support,TEXT_similarity,TEXT_respect,TEXT_knowledge,TEXT_power,TEXT_fun,TEXT_identity,TEXT_romance,TEXT_lexical_diversity_mtld,TEXT_l