# Data Processor

#### Given a csv file with relations of the form 'head', 'type', and 'tail' format, this file implements methods to get sub dataframes.

In [30]:
import networkx as nx
import pandas as pd
from collections import Counter

In [176]:
class DataProcessor():

    # Contructing the object
    def __init__(self, file_path):
        self.df = pd.read_csv(file_path)
        word_counter = Counter()
        relations_counter = Counter()
        
        for index, row in self.df.iterrows():
            word_counter.update([row['head'], row['tail']])
            relations_counter.update([row['type']])
        self.word_counter = word_counter
        self.relations_counter = relations_counter
    
    # This method accepts relations as input and
    # outputs the sub-dataframe with the wanted relations.
    def get_df_with_relation(self, *relations):
        mp = {0:'head', 1:'type', 2:'tail'}
        columns = ['head', 'type','tail']
        new_df = pd.DataFrame(columns=columns) 
        for index, row in self.df.iterrows():
            if row['type'] in relations:
                new_df.loc[len(new_df)] = [row[mp[i]] for i in range(3)]
        return new_df       
    
    # This method accepts words as input and
    # outputs the sub-dataframe with the wanted words.
    def get_df_with_word(self, *words):        
        mp = {0:'head', 1:'type', 2:'tail'}
        columns = ['head', 'type','tail']
        new_df = pd.DataFrame(columns=columns) 
        for index, row in self.df.iterrows():
            if row['head'] in words or row['tail'] in words:
                new_df.loc[len(new_df)] = [row[mp[i]] for i in range(3)]
        return new_df
    
    # This method accepts words and relations as input and
    # outputs the sub-dataframe with the wanted words and relations.
    def get_df_with_word_relation(self, words=[], relations=[]):
        mp = {0:'head', 1:'type', 2:'tail'}
        columns = ['head', 'type','tail']
        new_df = pd.DataFrame(columns=columns) 
        for index, row in self.df.iterrows():
            if (row['head'] in words or row['tail'] in words) and row['type'] in relations:
                new_df.loc[len(new_df)] = [row[mp[i]] for i in range(3)]
        return new_df

    # This method accepts words as input and
    # outputs the sub-dataframe with the wanted words for the head.    
    def get_df_with_head(self, *words):        
        mp = {0:'head', 1:'type', 2:'tail'}
        columns = ['head', 'type','tail']
        new_df = pd.DataFrame(columns=columns) 
        for index, row in self.df.iterrows():
            if row['head'] in words:
                new_df.loc[len(new_df)] = [row[mp[i]] for i in range(3)]
        return new_df
    
    # This method accepts words as input and
    # outputs the sub-dataframe with the wanted words for the tail.    
    def get_df_with_tail(self, *words):        
        mp = {0:'head', 1:'type', 2:'tail'}
        columns = ['head', 'type','tail']
        new_df = pd.DataFrame(columns=columns) 
        for index, row in self.df.iterrows():
            if row['tail'] in words:
                new_df.loc[len(new_df)] = [row[mp[i]] for i in range(3)]
        return new_df
    
    # This method accepts words as input and
    # outputs the sub-dataframe with the wanted words on both sides.
    def get_df_with_word_both_sides(self, *words):        
        mp = {0:'head', 1:'type', 2:'tail'}
        columns = ['head', 'type','tail']
        new_df = pd.DataFrame(columns=columns) 
        for index, row in self.df.iterrows():
            if row['head'] in words and row['tail'] in words:
                new_df.loc[len(new_df)] = [row[mp[i]] for i in range(3)]
        return new_df
    


In [162]:
# Given a dataframe obtained using the texhniques above
# This function outputs
def display(df):
    G = nx.from_pandas_edgelist(df,'head', 'tail',)
    d = dict()
    for index, row in df.iterrows():
        d[(row['head'], row['tail'])] = row['type']
    nx.draw(G,node_size= 40, with_labels=True, arrows=True)
    pos = nx.spring_layout(G)
    nx.draw_networkx_edge_labels(
    G,
    pos=pos,
    edge_labels=d,
    font_color='black'
)


In [189]:
# Given a 
def display_nolabel(df):
    G = nx.from_pandas_edgelist(df,'head', 'tail',)
    nx.draw(G,node_size= 40, with_labels=True, arrows=True)
    pos = nx.spring_layout(G)
    nx.draw_networkx(
    G,
    pos=pos,
    font_color='black'
)


In [171]:
import matplotlib.pyplot as plt 
plt.rcParams["figure.figsize"] = (10,12)
plt.rcParams["figure.dpi"] = 260