# Splunk Hypergraphs

### Prerequisites
Install Python packages using some variant of `pip install juypter splunk-sdk graphistry`

### Headers

In [44]:
import pandas as pd
import graphistry
#api.graphistry.com/api/encrypt?text=emailCanary
graphistry.register("FIXME")

### Connect to Splunk
https://github.com/graphistry/pygraphistry/blob/master/demos/more/splunk/Splunk%20Mashup.ipynb

In [45]:
import splunklib.client as client
import splunklib.results as results
#FIXME
cargs = {
    'host': 'splunk.FIXME.com',
    'scheme': 'https',
    'port': 8089,
    'username': 'FIXME',
    'password': 'FIXME'   
}
service = client.connect(host=cargs['host'], scheme=cargs['scheme'], port=cargs['port'], username=cargs['username'], password=cargs['password'])

### Splunk Helpers
Data adapter to splunk (in its entirety!)

In [46]:
def splunkToPandas (qry):
    kwargs_blockingsearch = {"count": 0, "required_field_list": "*"}
    out = service.jobs.oneshot(qry, **kwargs_blockingsearch)
    reader = results.ResultsReader(out)
    lst = [x for x in reader]
    print('# alerts', len(lst))
    return pandas.DataFrame(lst)

## Event -> Hypergraph Transform Helpers
https://github.com/graphistry/pygraphistry/blob/master/demos/more/malware-hypergraph/Malware%20Hypergraph.ipynb

In [47]:
### COMMON TO HYPERGRAPH AND SIMPLE GRAPH
def makeDefs(DEFS, opts={}):
    defs = {key: opts[key] if key in opts else DEFS[key] for key in DEFS}    
    base_skip = opts['SKIP'] if 'SKIP' in opts else defs['SKIP']
    skip = [x for x in base_skip] #copy
    defs['SKIP'] = skip
    for key in DEFS:
        if not defs[key] in skip:
            skip.append(defs[key])
    return defs

def screen_entities(events, entity_types, defs):
    base = entity_types if not entity_types == None else events.columns
    return [x for x in base if not x in defs['SKIP']]

#ex output: pd.DataFrame([{'val::state': 'CA', 'nodeType': 'state', 'nodeID': 'state::CA'}])
def format_entities(events, entity_types, defs, drop_na):
    lst = sum([[{
                    col: v,
                    defs['TITLE']: v,
                    defs['NODETYPE']: col, 
                    defs['NODEID']: col + defs['DELIM'] + str(v)
                } 
                for v in events[col].unique() if v != 'nan'] for col in entity_types], [])
    return pd.DataFrame(lst)

In [48]:
DEFS_HYPER = {
    'TITLE': 'nodeTitle',
    'DELIM': '::',
    'NODEID': 'nodeID',
    'ATTRIBID': 'attribID',
    'EVENTID': 'eventID',
    'NODETYPE': 'nodeType',
    'EDGETYPE': 'edgeType',
    'SKIP': []
}


#ex output: pd.DataFrame([{'edgeType': 'state', 'attribID': 'state::CA', 'eventID': 'eventID::0'}])
def format_hyperedges(events, entity_types, defs, drop_na, drop_edge_attrs):
    subframes = []
    for col in entity_types:
        raw = events[[col, defs['EVENTID']]].copy()
        if drop_na:
            raw = raw.dropna()[[col, defs['EVENTID']]].copy()
        if len(raw):
            raw[defs['EDGETYPE']] = raw.apply(lambda r: col, axis=1)
            raw[defs['ATTRIBID']] = raw.apply(lambda r: col + defs['DELIM'] + str(r[col]), axis=1)
            subframes.append(raw)
    print('printing')
    if len(subframes):
        return pd.concat(subframes)[[defs['EDGETYPE'], defs['ATTRIBID'], defs['EVENTID']]]
    return pd.DataFrame([])

def format_hypernodes(events, defs, drop_na):
    event_nodes = events.copy()
    event_nodes[defs['NODETYPE']] = defs['EVENTID']
    event_nodes[defs['NODEID']] = event_nodes[defs['EVENTID']]    
    event_nodes[defs['TITLE']] = event_nodes[defs['EVENTID']]    
    return event_nodes

def hyperbinding(defs, entities, event_entities, edges):
    return graphistry\
        .bind(source=defs['ATTRIBID'], destination=defs['EVENTID']).edges(edges)\
        .bind(node=defs['NODEID'], point_title=defs['TITLE']).nodes(pd.concat([entities, event_entities]))

def hypergraph(raw_events, entity_types=None, opts={}, drop_na=True, drop_edge_attrs=True):
    defs = makeDefs(DEFS_HYPER, opts)
    entity_types = screen_entities(raw_events, entity_types, defs)
    events = raw_events.copy()
    if defs['EVENTID'] in events.columns:
        events[defs['EVENTID']] = events.apply(
            lambda r: defs['EVENTID'] + defs['DELIM'] + str(r[defs['EVENTID']]), 
            axis=1)
    else:
        events[defs['EVENTID']] = events.reset_index().apply(
            lambda r: defs['EVENTID'] + defs['DELIM'] + str(r['index']), 
            axis=1)
    events[defs['NODETYPE']] = 'event'
    entities = format_entities(events, entity_types, defs, drop_na)
    event_entities = format_hypernodes(events, defs, drop_na)
    edges = format_hyperedges(events, entity_types, defs, drop_na, drop_edge_attrs)
    print('# links', len(edges))
    print('# event entities', len(events))
    print('# attrib entities', len(entities))
    return hyperbinding(defs, entities, event_entities, edges)

## Fun!

In [49]:
## %time is a macro that prints timing information
## Start with small records..
%time df = splunkToPandas('search index=graphistry_apps eventtype="nix-all-logs" | fields - _* | head 100')
print('# events', len(df))
df

('# alerts', 100)
CPU times: user 276 ms, sys: 3.01 ms, total: 279 ms
Wall time: 1.16 s
('# events', 100)


Unnamed: 0,host,index,linecount,source,sourcetype,splunk_server,msg
0,labs-docker0-0,graphistry_apps,1,/var/log/nginx/access.log,access_combined,splunk.graphistry.com,


In [50]:
g = hypergraph(df)

printing
('# links', 689)
('# event entities', 100)
('# attrib entities', 56)


In [51]:
g.plot()