In [17]:
import libpgm
from libpgm.nodedata import NodeData
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.graphskeleton import GraphSkeleton
import json

from libpgm.pgmlearner import PGMLearner

In [2]:
num_sources = 6
skeleton_file = 'skeleton.txt'
data_file = 'data.txt'

In [3]:
num_sources = 6

bins = ["d" + str(i) for i in range(16)]
sources = ["source" + str(i) for i in range(num_sources)]
factors = ["humidity", "temp", "wind_dir","wind_speed","time"]
nodes = bins + sources + factors

In [4]:
edges = []
for factor in factors:
    for source in sources:
        for b in bins:
            edges.append([source, b])
        edges.append([factor, source])

In [5]:
frame = {"V": nodes, "E": edges}
with open(skeleton_file, 'w') as f:
    json.dump(frame, f)

skeleton = GraphSkeleton()
skeleton.load(skeleton_file)

In [6]:
model = LGBayesianNetwork(skeleton)

In [20]:
frame

{'E': [['source0', 'd0'],
  ['source0', 'd1'],
  ['source0', 'd2'],
  ['source0', 'd3'],
  ['source0', 'd4'],
  ['source0', 'd5'],
  ['source0', 'd6'],
  ['source0', 'd7'],
  ['source0', 'd8'],
  ['source0', 'd9'],
  ['source0', 'd10'],
  ['source0', 'd11'],
  ['source0', 'd12'],
  ['source0', 'd13'],
  ['source0', 'd14'],
  ['source0', 'd15'],
  ['humidity', 'source0'],
  ['source1', 'd0'],
  ['source1', 'd1'],
  ['source1', 'd2'],
  ['source1', 'd3'],
  ['source1', 'd4'],
  ['source1', 'd5'],
  ['source1', 'd6'],
  ['source1', 'd7'],
  ['source1', 'd8'],
  ['source1', 'd9'],
  ['source1', 'd10'],
  ['source1', 'd11'],
  ['source1', 'd12'],
  ['source1', 'd13'],
  ['source1', 'd14'],
  ['source1', 'd15'],
  ['humidity', 'source1'],
  ['source2', 'd0'],
  ['source2', 'd1'],
  ['source2', 'd2'],
  ['source2', 'd3'],
  ['source2', 'd4'],
  ['source2', 'd5'],
  ['source2', 'd6'],
  ['source2', 'd7'],
  ['source2', 'd8'],
  ['source2', 'd9'],
  ['source2', 'd10'],
  ['source2', 'd11'],
  [

Prepare Data

In [7]:
import pandas as pd
import numpy as np

In [51]:
p = "pickles/alliance_sub.p"
df_full = pd.read_pickle(p)

In [52]:
df_full['dt'].max()

Timestamp('2017-01-10 13:30:00')

In [9]:
nodes = bins + ['temp','humidity',"dir_(degrees)","spd_(m/s)", "t"]

In [10]:
df = df[nodes]
df.rename(columns={'dir_(degrees)': 'wind_dir', 'spd_(m/s)': 'wind_speed', 't':'time'}, inplace=True)

In [11]:
df['time'] = pd.to_datetime(df['time'])
df['time'] = df['time'].dt.hour

In [12]:
df.head()

Unnamed: 0,d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,...,d11,d12,d13,d14,d15,temp,humidity,wind_dir,wind_speed,time
0,1343.291926,1221.033005,861.866063,236.959853,99.804128,16.9349,6.455696,8.003923,0.0,8.776291,...,0.0,0.0,0.0,0.0,0.0,19.2,84.45,320,3,22
1,937.028026,1183.462758,639.449015,131.644363,110.893476,42.337249,32.278481,24.011768,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,18.95,83.96,350,2,22
2,904.264809,870.377372,556.042622,140.420654,66.536086,42.337249,6.455696,16.007846,10.318851,17.552582,...,0.0,0.0,0.0,0.0,0.0,19.05,85.03,30,3,23
3,1172.923194,1277.388374,778.45967,298.393889,55.446738,42.337249,25.822785,8.003923,10.318851,0.0,...,10.318851,0.0,0.0,0.0,0.0,18.84,86.4,10,3,23
4,1743.003182,1177.201051,815.529178,166.749526,88.714781,33.869799,32.278481,16.007846,10.318851,8.776291,...,0.0,0.0,0.0,0.0,0.0,18.22,85.67,20,2,0


In [13]:
data = df.to_dict(orient='records')

In [14]:
with open(data_file,'w') as f:
    json.dump(data,f)

In [15]:
isinstance(data[1], dict)

True

In [28]:
data_u = json.load(open(data_file))

In [31]:
# instantiate my learner 
learner = PGMLearner()
# estimate parameters
result = learner.lg_mle_estimateparams(skeleton, data_u)

# output
print json.dumps(result.Vdata, indent=2)

KeyError: u'source0'

In [39]:
len(data)

7056

In [46]:
#%%timeit
result = learner.lg_estimatebn(data)

In [35]:
result

<libpgm.lgbayesiannetwork.LGBayesianNetwork at 0x110f11810>

In [42]:
%%timeit
result_constraint = learner.lg_constraint_estimatestruct(data[:100])

1 loop, best of 3: 2.74 s per loop


In [44]:
result_constraint = learner.lg_constraint_estimatestruct(data[:100])

In [48]:
print json.dumps(result.E, indent=2)
print json.dumps(result.Vdata, indent=2)

[
  [
    "d10", 
    "d8"
  ], 
  [
    "d2", 
    "d1"
  ], 
  [
    "wind_speed", 
    "wind_dir"
  ], 
  [
    "wind_dir", 
    "time"
  ], 
  [
    "d6", 
    "time"
  ], 
  [
    "d8", 
    "d6"
  ], 
  [
    "wind_dir", 
    "d6"
  ], 
  [
    "wind_dir", 
    "temp"
  ]
]
{
  "d14": {
    "mean_base": 1.0972865072590714, 
    "parents": [], 
    "children": [], 
    "mean_scal": [], 
    "variance": 22.52140812946028
  }, 
  "d15": {
    "mean_base": 1.922755763732985, 
    "parents": [], 
    "children": [], 
    "mean_scal": [], 
    "variance": 59.93095201795428
  }, 
  "d10": {
    "mean_base": 14.732362519755707, 
    "parents": [], 
    "children": [
      "d8"
    ], 
    "mean_scal": [], 
    "variance": 396.4314151574552
  }, 
  "d11": {
    "mean_base": 7.948264316417576, 
    "parents": [], 
    "children": [], 
    "mean_scal": [], 
    "variance": 164.4655307441802
  }, 
  "d12": {
    "mean_base": 3.994967587148667, 
    "parents": [], 
    "children": [], 
    "m

In [47]:
print json.dumps(result_constraint.E, indent=2)
print json.dumps(result_constraint.Vdata, indent=2)

[
  [
    "humidity", 
    "time"
  ], 
  [
    "d2", 
    "d1"
  ]
]


AttributeError: 'GraphSkeleton' object has no attribute 'Vdata'