In [1]:
# Importing Libraries
from neo4j import GraphDatabase
import yaml
from yaml.loader import SafeLoader
import pandas as pd
import random
from tqdm.notebook import tqdm_notebook as bar

In [2]:
# Fetching Constants
with open('config.yaml') as f:
    configs = yaml.load(f, SafeLoader)

# Setting Up Database Connection
connector = GraphDatabase.driver(
    uri=configs['AUTHENTICATION']['uri'], 
    auth=(
        configs['AUTHENTICATION']['username'], 
        configs['AUTHENTICATION']['password']
    )
)

# Reading Data Sets for Node and Relationship Creatio
df_person = pd.read_csv('person_names.csv')
df_projects = pd.read_csv('project_names.csv')
df_skills = pd.read_csv('skill_names.csv')

In [3]:
# Person Data
df_person.head()

Unnamed: 0,person
0,Name0
1,Name1
2,Name2
3,Name3
4,Name4


In [4]:
# Projects Data
df_projects.head()

Unnamed: 0,projects
0,Project0
1,Project1
2,Project2
3,Project3
4,Project4


In [5]:
# Skills Data
df_skills.head()

Unnamed: 0,skills
0,Skill0
1,Skill1
2,Skill2
3,Skill3
4,Skill4


In [6]:
def create_node(node_label, node_value, connector):
    '''
    Function to create a Node.
    Args:
        node_label::[str]
            Node Label to be used to create Node on Neo4j.
        node_value::[str]
            Node Value to be used to display on Node when it is created on Neo4j.
        connector::[neo4j.BoltDriver]
            Neo4j Python Driver Object to query Database.
    Returns:
        None
    '''
    # Checking for Input Validity
    if node_label in configs['CATEGORIES']:
        
        # Setting Up Session
        with connector.session() as session:
            
            # Creating Node
            query = f'CREATE (n: {node_label}{{name: "{node_value}"}})'
            session.run(query)

In [7]:
def create_graph(persons, skills, projects, connector):
    '''
    Function to create Graph on Neo4j Server.
    Args:
        persons::[pandas.core.dataframe.DataFrame]
            Pandas DataFrame Object containing Person Entities. The Label for these Entities is "person".
        skills::[pandas.core.dataframe.DataFrame]
            Pandas DataFrame Object containing Skill Entities. The Label for these Entities is "skill".
        projects::[pandas.core.dataframe.DataFrame]
            Pandas DataFrame Object containing Project Entities. The Label for these Entities is "project".
        connector::[neo4j.BoltDriver]
            Neo4j Python Driver Object to query Database.
    Returns:
        None
    '''
    # Creating Person Nodes
    for person in bar(persons.values.tolist()):
        create_node('person', person[0], connector)
        
    # Creating Skill Nodes
    for skill in bar(skills.values.tolist()):
        create_node('skill', skill[0], connector)
    
    # Creating Project Nodes
    for project in bar(projects.values.tolist()):
        create_node('project', project[0], connector)

In [8]:
def record_exist(node_value, connector):
    '''
    Function to check if Record Exists.
    Args:
        node_value::[str]
            Node Value for which to check if it exists.
        connector::[neo4j.BoltDriver]
            Neo4j Python Driver Object to query Database.
    Returns:
        node_label::[str]
            If record exist then the Node Label will be provided.
        False, for Failure.
    '''
    # Setting Up Session
    with connector.session() as session:
        
        # Checking if Node Exists or Not
        query = f'MATCH (n) WHERE n.name="{node_value}" RETURN (n)'
        result = list(session.run(query))
        
        # Fetching Node Label
        if len(result) == 1:
            node_label = str(result[0]).split('frozenset')[1].split(' ')[0][3:-3]
            return node_label
        
        # If Node doesn't exist or more than one Nodes are present
        else:
            print('Either Node doesn\'t Exist or more than one Node with same "name" Property Exist. Please enter a Value.')
            return False

In [9]:
def setup_relationship(node_value1, node_value2, connector):
    '''
    Function to setup Relationship between 2 Nodes.
    Args:
        node_value1::[str]
            Node Value from where the Relationship started.
        node_value2::[str]
            Node Value from where the Relationship ended.
        connector::[neo4j.BoltDriver]
            Neo4j Python Driver Object to query Database.
    Returns:
        None
    '''
    # Fetching Label
    label1 = record_exist(node_value1, connector)
    label2 = record_exist(node_value2, connector)
    
    # Setting Up Relationship
    if label1 and label2:
        
        # Key to setup Relationship from Config File
        key = f'{label1}, {label2}'
        
        # Checking if Relationship is Possible or Not
        if key in configs['RELATIONSHIPS'].keys():
            relationship = configs['RELATIONSHIPS'][key]
            
            # Setting Up Session
            with connector.session() as session:
                
                # Finalizing Relationship
                query = f'MATCH(a: {label1}), (b: {label2}) WHERE a.name="{node_value1}" and b.name="{node_value2}" CREATE (a)-[r: {relationship}]->(b)'
                session.run(query)

In [10]:
def setup_graph_relationship(mapped_nodes, connector):
    '''
    Function to setup all Relationships between Nodes in the Graph.
    Args:
        mapped_nodes::[dictionary]
            Dictionary Containing Mapping Information.
        connector::[neo4j.BoltDriver]
            Neo4j Python Driver Object to query Database.
    Returns:
        None
    '''
    # Setting Up Relations
    for relationship_initiator in bar(mapped_nodes.keys()):
        for relationship_acceptor in mapped_nodes[relationship_initiator]:
            
            # Setting Relationship
            setup_relationship(relationship_initiator.split(' ')[0], relationship_acceptor, connector)

In [11]:
def best_match_finder(project_skill_required_list, persons_skill_mapping_list):
    '''
    Function to find the best matching of resources (persons) as per the skills needed to work on the Project.
    Args:
        project_skill_required_list::[list]
            List containing skills required to complete the Project.
        persons_skill_mapping_list::[list]
            List containing all the skills mapped with the persons available for work.
    Returns:
        resource_list::[list]
            Resource List containing names of Resources that are suitable to work on the Project based on Project Skills required and Skills they have mastery on.
    '''
    # Handler Variables
    resource_list = []
    current_skill_required_list = project_skill_required_list[:]
    success_flag = False
    
    # Finding Best Match
    for person, skills in persons_skill_mapping_list.items():
        
        # Checking if more resources are required
        if len(current_skill_required_list) != 0:
        
            # Traversing through Skills for Skill Matching
            for skill in skills:

                # Checking if the person has skills required to work on the Project
                if skill in current_skill_required_list:
                    success_flag = True
                    current_skill_required_list.remove(skill)
                    
            # Appending Person to the Final Resource List
            if success_flag:
                resource_list.append(person)
                success_flag = False
        
        # Exit Loop
        else:
            break
    
    return resource_list

In [12]:
def mapper(persons, skills, projects):
    '''
    Function to map entities in all 3 DataFrames.
    Args:
        persons::[pandas.core.dataframe.DataFrame]
            DataFrame containing Person Names.
        skills::[pandas.core.dataframe.DataFrame]
            DataFrame containing Skills Names.
        projects::[pandas.core.dataframe.DataFrame]
            DataFrame containing Projects Names.
    Returns:
        mapped_values::[dict]
            The dictionary contains the string keys and their respective list values.
            The string keys are the mapping initiator. They initiates mapping.
            The lists are the mapping receiver which contains multiple items mapped together with their respective string keys.
    '''
    # Handler Variables
    person_skill_mapping_list = {}
    project_skill_mapping_list = {}
    mapped_values = {}
    
    # Mapping Person and Skills
    for person in persons.values.tolist():
        
        # Adding Mapping
        person_skill_mapping_list[person[0]] = random.sample([skill[0] for skill in skills.values.tolist()], random.choice([n for n in range(2, 8)]))
    
    # Mapping Project and Skills
    for project in projects.values.tolist():
        
        # Adding Mapping
        project_skill_mapping_list[project[0]] = random.sample([skill[0] for skill in skills.values.tolist()], random.choice([n for n in range(8, 15)]))
                
    # Mapping Project and Person
    for project in projects.values.tolist():
        mapped_values[f'{project[0]} Resources'] = best_match_finder(project_skill_mapping_list[project[0]], person_skill_mapping_list)
    
    # Final Dictionary
    mapped_values.update(person_skill_mapping_list)
    mapped_values.update(project_skill_mapping_list)
    
    return mapped_values

In [13]:
# Mapping Nodes
mapped_nodes = mapper(df_person, df_skills, df_projects)

In [14]:
# Creating Nodes for the Entities
create_graph(df_person, df_skills, df_projects, connector)

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

In [15]:
# Creating Relationships
setup_graph_relationship(mapped_nodes, connector)

  0%|          | 0/80 [00:00<?, ?it/s]

Optimization Updates
1. Allocate Resources to Project based on the Number of Required. For example if 10 Resources are Required to Complete Project10, then 10 must be allocated to it and no more and no less.
2. The fixed allocation must be optimized, such that each Project will have skill-rich resources and no halting must occur.
3. The overallocation and underallocation also needed to be rectify. For example in current scenario, Resouce 0 and Resouce 1 are facing Overallocation and Later Resources are facing Under/Zero Allocation.