# Development for NeonPandas

In [1]:
import pandas as pd 
import neonpandas as npd

from utils import cypher

## Load Pets Dataset

In [2]:
data = pd.read_csv('pets.csv')
data

Unnamed: 0,name,species,color,age,behavior
0,Ralph,Dog,black,10.0,
1,Pip,Cat,yellow,6.0,good
2,Babe,Pig,,3.0,
3,Bubbles,Fish,red,,acceptable
4,Freckles,Horse,brown,,


## NeonPandas NodeFrame
Look behind the scenes (e.g. `pets`) to see the _neo_node_ index that makes this all work.

In [4]:
pets = npd.NodeFrame(data, id_col='name', lbl_col='species', labels={'Pet'})
pets.show()

labels,name,color,age,behavior
"{'Pet', 'Dog'}",Ralph,black,10.0,
"{'Pet', 'Cat'}",Pip,yellow,6.0,good
"{'Pig', 'Pet'}",Babe,,3.0,
"{'Pet', 'Fish'}",Bubbles,red,,acceptable
"{'Pet', 'Horse'}",Freckles,brown,,


## Set up Graph

In [6]:
graph = npd.Graph(uri='bolt://localhost:7687', auth=('neo4j', 'neonpandas'))

### Creates Nodes

In [None]:
graph.create_nodes(pets)

### Create Constraints

In [None]:
constraints = npd.NodeFrame([
    {'labels': 'Pet', 'property': 'name'}, 
    {'labels': 'Owner', 'property': 'name'}
], lbl_col='labels')
constraints

In [None]:
# create from dataframe
graph.create_node_constraints(constraints)

## NeonPandas EdgeFrame

In [7]:
edges = pd.DataFrame([
    {'src': 'Ralph', 'rel_type': 'IS_FRIENDLY_WITH', 'dest': 'Bubbles'},
    {'src': 'Pip', 'rel_type': 'IS_MEAN_TO', 'dest': 'Babe'},
    {'src': 'Ralph', 'rel_type': 'IS_MEAN_TO', 'dest': 'Freckles'},
    {'src': 'Freckles', 'rel_type': 'IS_FRIENDLY_WITH', 'dest': 'Babe'}
])
edges

Unnamed: 0,src,rel_type,dest
0,Ralph,IS_FRIENDLY_WITH,Bubbles
1,Pip,IS_MEAN_TO,Babe
2,Ralph,IS_MEAN_TO,Freckles
3,Freckles,IS_FRIENDLY_WITH,Babe


#### Requirements for EdgeFrame
- Designate source, relationship-type, and dest columns (i.e. src, dest, rel_type)
- Ability to join **src** and **dest** cols with NodeFrame to get ID info (e.g. labels, id_props, etc.)
- `create_edges()` method with option to set **src** and **dest** (individually) to `MATCH` or `MERGE`
- Assumes all columns in EdgeFrame beyond **src**, **dest**, and **rel-type** are edge properties

In [8]:
edges = npd.EdgeFrame(edges, rel_col='rel_type', src_key='name', dest_key='name')
edges

Unnamed: 0,rel_types,src,dest
0,{IS_FRIENDLY_WITH},Ralph,Bubbles
1,{IS_MEAN_TO},Pip,Babe
2,{IS_MEAN_TO},Ralph,Freckles
3,{IS_FRIENDLY_WITH},Freckles,Babe


In [9]:
pets

Unnamed: 0_level_0,labels,name,color,age,behavior
neo_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(n:Pet:Dog {name: ""Ralph""})","{Pet, Dog}",Ralph,black,10.0,
"(n:Pet:Cat {name: ""Pip""})","{Pet, Cat}",Pip,yellow,6.0,good
"(n:Pig:Pet {name: ""Babe""})","{Pig, Pet}",Babe,,3.0,
"(n:Pet:Fish {name: ""Bubbles""})","{Pet, Fish}",Bubbles,red,,acceptable
"(n:Pet:Horse {name: ""Freckles""})","{Pet, Horse}",Freckles,brown,,


In [None]:
src_join = pd.merge(edges[['src']].drop_duplicates(), 
                    pets[[pets.id_col, 'labels']], 
                    left_on='src', 
                    right_on=pets.id_col, 
                    how='left')

src_join = src_join[['src', 'labels']]
src_join

In [None]:
dest_join = pd.merge(edges[['dest']].drop_duplicates(), 
                     pets[[pets.id_col, 'labels']], 
                     left_on='dest', 
                     right_on=pets.id_col, 
                     how='left')
dest_join = dest_join[['dest', 'labels']]
dest_join

In [None]:
## join src back to complete edgelist
full_edges = edges.merge(src_join, 
                        how='left', 
                        left_index=True, 
                        right_index=True,
                        suffixes=('_src', '_dup'))
#df.drop(df.filter(regex='Test').columns, axis=1, inplace=True)
#full_edges.drop(full_edges.filter(regex='_dup$').columns, axis=1, inplace=True)
full_edges

In [None]:
# join dest back to complete edgelist
full_edges = edges.merge(dest_join,
                        how='left',
                        left_index=True,
                        right_index=True,
                        suffixes=('', '_dup'))
#full_edges.drop(full_edges.filter(regex='_dup$').columns, axis=1, inplace=True)
full_edges

In [None]:
dest_join

In [None]:
## improved edge --> node frame join
## convert edges index to idx_column
## melt edges into node list
## join to nodeframe
## (careful which type of join as nodes will be duplicated with edge melt)
## unmelt edges via idx_column

## maybe some way to join against an array of nodeframes (would require some preprocessing/concat of nodeframes)

In [None]:
query = """UNWIND $edges AS edge
        MATCH (s:Pet {name: edge.src})
        MATCH (d:Pet {name: edge.dest})
        WITH s,d,edge
        CALL apoc.merge.relationship(s, edge.rel_type, {}, {}, d) YIELD rel
        RETURN COUNT(rel)"""

In [None]:
graph.create_relationships(edges, query)

### Node Joining
Perform _join_ operations with an input DataFrame against nodes in Neo4j.

In [None]:
new_pets = pd.DataFrame([
    {'name': 'Betsy', 'age': 2, 'species': 'Cow'},
    {'name': 'Carrie', 'species': 'Rabbit'}
])
all_pets = pd.concat([data, new_pets], sort=False).reset_index(drop=True)

In [None]:
all_pets = npd.NodeFrame(all_pets, id_col='name', lbl_col='species', labels={'Pet'})
all_pets

### Semi-Join
Check which nodes in DataFrame exist in Neo4j.

In [None]:
graph.semi_join(all_pets, on='name', labels='Pet')

### Anti-Join
Check which nodes in DataFrame do not exist in Neo4j.

In [None]:
graph.anti_join(all_pets, on='name', labels='Pet')

## Match Nodes
Search for nodes via `MATCH` statement

In [None]:
graph.match_nodes(labels={'Pet'}, limit=3)

In [None]:
## add properties to search
graph.match_nodes(labels={'Pet'}, properties={'name': 'Ralph'})

## DataFrame Object

In [None]:
class Test(pd.DataFrame):
    def __init__(self, data, column:str=None, labels=None):
        super().__init__(data)
        self.whatami = "I am a NeonPandas DataFrame"
        self._set_labels(column=column, labels=labels)
        
    def _set_labels(self, column:str=None, labels:set=None) -> list:
        if column is not None and labels is None:
            assert column in self.columns
            _lbls = self[column].apply(lambda x: df_tools.conform_to_set(x))
        elif column is not None and labels is not None:
            _lbls = self[column].apply(lambda x: {x}.union(df_tools.conform_to_set(labels)))
        elif column is None and labels is not None:
            labels = df_tools.conform_to_set(labels)
            _lbls = [labels for i in range(len(self))]
        else:
            raise ValueError("Must provide either 'labels' or 'use_column' as input for attribute type.")
        # finish processing dataframe and labels column
        self.drop(columns=[column], inplace=True)
        # set labels as column
        self.insert(0, 'labels', _lbls)
        return

In [None]:
pets_data = df_tools.convert_to_records(all_pets)
pets_data[0]

In [None]:
pets_test = Test(pets_data, column='labels')
pets_test

In [None]:
pets = pd.read_csv('pets.csv')
pets

In [None]:
pets_test = Test(pets, column='species', labels={'Animal', 'Pet'})
pets_test