# Proper Pandas Sub-Classing

In [11]:
import pandas as pd 
from pandas import DataFrame
from utils import df_tools

import neonpandas as npd
from neonpandas import NodeFrame
from utils.df_tools import conform_to_set

In [26]:
class EdgeFrame(DataFrame):
    def __init__(self, data, rel_col:str=None, src_col:str='src', dest_col:str='dest'):
        super(EdgeFrame, self).__init__(data)
        self.rel_col = rel_col
        if self.rel_col:
            self.set_relationship(self.rel_col)
        if src_col or dest_col:
            self.src_col = self.set_src_column(src_col)
            self.dest_col = self.set_dest_column(dest_col)
    
    @property
    def _constructor(self):
        return EdgeFrame
    
    def set_relationship(self, rel_col):
        assert rel_col in self
        _rels = self[rel_col]
        self.drop(columns=[rel_col], inplace=True)
        self.insert(0, 'rel_type', _rels)
        return
    
    def _set_node_column(self, node_col:str):
        if isinstance(node_col, str):
            assert node_col in self
            return node_col 
        elif node_col is None:
            return node_col
        else:
            raise ValueError("'{}' column not found in EdgeFrame.".format(node_col))
            
    def set_src_column(self, src_col:str):
        return self._set_node_column(src_col)
    
    def set_dest_column(self, dest_col:str):
        return self._set_node_column(dest_col)

In [27]:
edge_data = [
    {'src': 'Ralph', 'rel_type': 'FRIENDLY_WITH', 'dest': 'Pip', 'reporter': 'Frank'},
    {'src': 'Ralph', 'rel_type': 'MEAN_TO', 'dest': 'Bubbles', 'reporter': 'Jane'},
    {'src': 'Pip', 'rel_type': 'FRIENDLY_WITH', 'dest': 'Babe', 'reporter': 'Frank'}
]

In [30]:
ef = EdgeFrame(edge_data)
print(ef.src_col, ef.dest_col)
ef

src dest


Unnamed: 0,src,rel_type,dest,reporter
0,Ralph,FRIENDLY_WITH,Pip,Frank
1,Ralph,MEAN_TO,Bubbles,Jane
2,Pip,FRIENDLY_WITH,Babe,Frank


In [31]:
ef = EdgeFrame(edge_data, rel_col='rel_type')
ef

Unnamed: 0,rel_type,src,dest,reporter
0,FRIENDLY_WITH,Ralph,Pip,Frank
1,MEAN_TO,Ralph,Bubbles,Jane
2,FRIENDLY_WITH,Pip,Babe,Frank


In [32]:
ef.drop(columns=['reporter'], inplace=True)
ef

Unnamed: 0,rel_type,src,dest
0,FRIENDLY_WITH,Ralph,Pip
1,MEAN_TO,Ralph,Bubbles
2,FRIENDLY_WITH,Pip,Babe


In [33]:
type(ef)

__main__.EdgeFrame

In [None]:
class NodeFrame(DataFrame):
    def __init__(self, data, id_col:str=None, lbl_col:str=None, labels:set=None):
        super(NodeFrame, self).__init__(data)
        self.id_col = self.set_id_column(id_col)
        
        if lbl_col or labels:
            self.set_labels(lbl_col, labels)
    
    @property
    def _constructor(self):
        return NodeFrame
    
    def set_id_column(self, id_col:str):
        if id_col in self:
            return id_col
        elif id_col is None:
            return None
        else:
            raise ValueError("Column '{}' not in NodeFrame.".format(id_col))
        return
    
    def set_labels(self, lbl_col:str=None, labels:set=None):
        if lbl_col is not None and labels is None:
            assert lbl_col in self.columns
            _lbls = self[lbl_col].apply(lambda x: conform_to_set(x))
        elif lbl_col is not None and labels is not None:
            assert lbl_col in self.columns
            _lbls = self[lbl_col].apply(lambda x: conform_to_set(labels).union(conform_to_set(x)))
        elif lbl_col is None and labels is not None:
            labels = conform_to_set(labels)
            _lbls = [labels for i in range(len(self))]
        else:
            raise ValueError("Must provide either 'labels' or 'column' as input for attribute type.")
        if lbl_col in self:
            self.drop(columns=[lbl_col], inplace=True)
        self.insert(0, 'labels', _lbls)
        return

In [2]:
data = pd.read_csv('pets.csv')
data

Unnamed: 0,name,species,color,age,behavior
0,Ralph,Dog,black,10.0,
1,Pip,Cat,yellow,6.0,good
2,Babe,Pig,,3.0,
3,Bubbles,Fish,red,,acceptable
4,Freckles,Horse,brown,,


In [3]:
nf = NodeFrame(data)
print(nf.id_col)
nf

None


Unnamed: 0,name,species,color,age,behavior
0,Ralph,Dog,black,10.0,
1,Pip,Cat,yellow,6.0,good
2,Babe,Pig,,3.0,
3,Bubbles,Fish,red,,acceptable
4,Freckles,Horse,brown,,


In [4]:
nf = NodeFrame(data, id_col='name')
print(nf.id_col)
nf

name


Unnamed: 0,name,species,color,age,behavior
0,Ralph,Dog,black,10.0,
1,Pip,Cat,yellow,6.0,good
2,Babe,Pig,,3.0,
3,Bubbles,Fish,red,,acceptable
4,Freckles,Horse,brown,,


In [5]:
nf.set_labels(lbl_col='species', labels={'Pet'})
nf

Unnamed: 0,labels,name,color,age,behavior
0,"{Pet, Dog}",Ralph,black,10.0,
1,"{Cat, Pet}",Pip,yellow,6.0,good
2,"{Pet, Pig}",Babe,,3.0,
3,"{Pet, Fish}",Bubbles,red,,acceptable
4,"{Pet, Horse}",Freckles,brown,,


In [8]:
nf = NodeFrame(data, id_col='name', lbl_col='species', labels={'Pet'})
print(nf.id_col)
nf

name


Unnamed: 0,labels,name,color,age,behavior
0,"{Pet, Dog}",Ralph,black,10.0,
1,"{Cat, Pet}",Pip,yellow,6.0,good
2,"{Pet, Pig}",Babe,,3.0,
3,"{Pet, Fish}",Bubbles,red,,acceptable
4,"{Pet, Horse}",Freckles,brown,,


In [9]:
nf.drop(columns=['behavior'], inplace=True)
print(nf.id_col, type(nf))
nf

name <class 'neonpandas.NodeFrame'>


Unnamed: 0,labels,name,color,age
0,"{Pet, Dog}",Ralph,black,10.0
1,"{Cat, Pet}",Pip,yellow,6.0
2,"{Pet, Pig}",Babe,,3.0
3,"{Pet, Fish}",Bubbles,red,
4,"{Pet, Horse}",Freckles,brown,
