# 结构学习

In [1]:
import pandas as pd
import numpy as np
from pgmpy.estimators import BdeuScore, K2Score, BicScore
from pgmpy.models import BayesianModel
import os
# from root import get_root_path
from pgmpy.estimators import ExhaustiveSearch
from pgmpy.estimators import HillClimbSearch

data_path = r'D:\PROJECT_TW\git\finance'
path = os.path.sep.join([data_path,'data','train','600893.txt'])
print(path)
data = pd.read_csv(path, sep=',')
k2_score = K2Score(data)
bic_score = BicScore(data)
# model = BayesianModel([('WD', 'ST'), ('Month', 'ST')])  # WD -> ST <- Month
# print(k2_score.score(model))
es = ExhaustiveSearch(data, scoring_method=bic_score)
best_model = es.estimate()
print(best_model.edges())

print("\nAll DAGs by score:")
for score, dag in reversed(es.all_scores()):
    print(score, dag.edges())
    
hc = HillClimbSearch(data, scoring_method=BicScore(data))
best_model = hc.estimate()
print(best_model.edges())
# for score, dag in reversed(hc.all_scores()):
#     print(score, dag.edges())

D:\PROJECT_TW\git\finance\data\train\600893.txt
[]

All DAGs by score:
-50504.05359149926 []
-50516.116125264605 [('WD', 'ST')]
-50516.116125264605 [('ST', 'WD')]
-50531.86083187595 [('ST', 'Month')]
-50531.86083187595 [('Month', 'ST')]
-50543.9233656413 [('ST', 'Month'), ('WD', 'ST')]
-50543.9233656413 [('ST', 'WD'), ('ST', 'Month')]
-50543.9233656413 [('Month', 'ST'), ('ST', 'WD')]
-50678.80715554881 [('WD', 'Month')]
-50678.80715554881 [('Month', 'WD')]
-50690.86968931416 [('WD', 'Month'), ('WD', 'ST')]
-50690.86968931416 [('ST', 'WD'), ('WD', 'Month')]
-50690.86968931416 [('Month', 'WD'), ('WD', 'ST')]
-50702.79815592117 [('Month', 'ST'), ('WD', 'ST')]
-50706.614395925506 [('Month', 'WD'), ('ST', 'Month')]
-50706.614395925506 [('Month', 'ST'), ('WD', 'Month')]
-50706.614395925506 [('Month', 'ST'), ('Month', 'WD')]
-50849.74447959403 [('Month', 'WD'), ('ST', 'WD')]
-50865.48918620537 [('ST', 'Month'), ('WD', 'Month')]
-50877.55171997072 [('ST', 'Month'), ('WD', 'Month'), ('WD', 'ST'

# 数据网络

## 按时间划分网络

In [5]:
import pandas as pd
import numpy as np
from pgmpy.estimators import BdeuScore, K2Score, BicScore
from pgmpy.models import BayesianModel
import os
# from root import get_root_path
from pgmpy.readwrite import ProbModelXMLWriter, get_probmodel_data
data_path = r'D:\PROJECT_TW\git\finance'
path = os.path.sep.join([data_path,'data','train','600893.txt'])
print(path)
data = pd.read_csv(path, sep=',')
model = BayesianModel([('WD', 'ST'), ('Month', 'ST')])  # WD -> ST <- Month
print(model.nodes)
print(model)



D:\PROJECT_TW\git\finance\data\train\600893.txt
['WD', 'ST', 'Month']



In [2]:
# 状态统计
from pgmpy.estimators import ParameterEstimator
pe = ParameterEstimator(model, data)
print("\n", pe.state_counts('WD'))
# print("\n ", pe.state_counts('ST'))
# print("\n", pe.state_counts('Month'))


     WD
0  588
1  604
2  613
3  602
4  597


In [5]:
# Maximum Likelihood Estimation 
from pgmpy.estimators import MaximumLikelihoodEstimator
print('model:', model)
print('data:', data)
mle = MaximumLikelihoodEstimator(model, data)
# model.fit(data, estimator=MaximumLikelihoodEstimator)
print("\n", mle.estimate_cpd('WD'))
print("\n", mle.estimate_cpd('Month'))
from pgmpy.readwrite import XMLBIFWriter,XMLBIFReader,BIFReader,BIFWriter

writer = BIFWriter(model)
writer.write_bif(filename=os.path.sep.join([get_root_path(),'save','600893','600893.bif']))
reader = BIFReader(os.path.sep.join([get_root_path(),'save','600893','600893.bif']))
mod = reader.get_model()
print(reader.get_states())
print(mod)

from pgmpy.inference import VariableElimination
inference = VariableElimination(mod)
print(inference.query(['ST'],evidence={'WD':'WD_0'}))

model: 
data:             Date  WD  ST  Month
0     2005-01-05   2   0      1
1     2005-01-06   3   1      1
2     2005-01-07   4   1      1
3     2005-01-10   0   1      1
4     2005-01-11   1   1      1
...          ...  ..  ..    ...
2999  2019-12-11   2   1     12
3000  2019-12-12   3   1     12
3001  2019-12-13   4   0     12
3002  2019-12-16   0   0     12
3003  2019-12-17   1   0     12

[3004 rows x 4 columns]

 +-------+----------+
| WD(0) | 0.195739 |
+-------+----------+
| WD(1) | 0.201065 |
+-------+----------+
| WD(2) | 0.204061 |
+-------+----------+
| WD(3) | 0.200399 |
+-------+----------+
| WD(4) | 0.198735 |
+-------+----------+

 +-----------+-----------+
| Month(1)  | 0.0792277 |
+-----------+-----------+
| Month(2)  | 0.0649134 |
+-----------+-----------+
| Month(3)  | 0.0888815 |
+-----------+-----------+
| Month(4)  | 0.0765646 |
+-----------+-----------+
| Month(5)  | 0.0845539 |
+-----------+-----------+
| Month(6)  | 0.0845539 |
+-----------+-----------+
| Mo

Finding Elimination Order: : 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.68it/s]
Eliminating: Month: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.60it/s]


+----------+-----------+
| ST       |   phi(ST) |
| ST(ST_0) |    0.4739 |
+----------+-----------+
| ST(ST_1) |    0.5261 |
+----------+-----------+


In [56]:
??inference.query

In [17]:
# 推断
import pandas as pd
import numpy as np
from pgmpy.estimators import BdeuScore, K2Score, BicScore
from pgmpy.models import BayesianModel
import os
from root import get_root_path
from pgmpy.estimators import ParameterEstimator
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

path = os.path.sep.join([get_root_path(),'data','train','ider_600893.txt'])
data = pd.read_csv(path, sep=',')
model = BayesianModel([('MACD', 'ST'), ('MFI', 'ST')])  # WD -> ST <- Month
print('model:', model)
# pe = ParameterEstimator(model, data)
# print("\n", pe.state_counts('MACD'))
# print("\n", pe.state_counts('MFI'))

# # mle.estimate_cpd(variable) computes the state counts and divides each cell by the (conditional) sample size. The mle.get_parameters()-method returns a list of CPDs for all variable of the model.
# # The built-in fit()-method of BayesianModel provides more convenient access to parameter estimators:
# mle = MaximumLikelihoodEstimator(model, data)
# print("\n", mle.estimate_cpd('MACD'))
# print("\n", mle.estimate_cpd('MFI'))
print('data:', data)
mle = model.fit(data, estimator=MaximumLikelihoodEstimator)

# print(model.get_root_path())
# model_data = get_probmodel_data(model)
# writer = ProbModelXMLWriter(model_data=model_data)
# writer.write_file(os.path.join([get_root_path(),'data','train','600893.pgmx']))
infer = VariableElimination(model)
print('MACD:BS\n',infer.query(['ST'], evidence={'MACD': 'BS'}))
print('MFI:BS\n',infer.query(['ST'], evidence={'MFI': 'BS'}))


model: 
data:      MACD MFI    ST
0      HS  HS  HOLD
1      HS  HS  HOLD
2      HS  HS  HOLD
3      HS  HS  HOLD
4      HS  HS   BUY
...   ...  ..   ...
2959   HS  HS   BUY
2960   HS  HS   BUY
2961   HS  HS  HOLD
2962   HS  HS  HOLD
2963   HS  HS  HOLD

[2964 rows x 3 columns]


AttributeError: 'NoneType' object has no attribute 'estimate_cpd'

## 按连续性划分网络

## 按指标数据划分

In [3]:
import pandas as pd
import numpy as np
from pgmpy.estimators import BdeuScore, K2Score, BicScore
from pgmpy.models import BayesianModel
import os
from root import get_root_path

code = '600893'
path = os.path.sep.join([get_root_path(),'data','train','{}_signal.txt'.format(code)])
print(path)
data = pd.read_csv(path, sep=',')
index_keys=['MACD','MFI','BBANDS','AROON','SAR','CCI']
model = BayesianModel([('MACD', 'ST'), ('MFI', 'ST'),('BBANDS','ST'),('AROON','ST'),('SAR','ST'),('CCI','ST')])
# pe = ParameterEstimator(model, data)
mle = MaximumLikelihoodEstimator(model, data)
# print("\n", mle.estimate_cpd('MACD'))
# print("\n", mle.estimate_cpd('MFI'))
model.fit(data, estimator=MaximumLikelihoodEstimator)
infer = VariableElimination(model)
# print('MACD:BS\n',infer.query(['ST'], evidence={'MACD':'BS','AROON':'BS','BBANDS':'BS','SAR':'BS','CCI':'BS'}))
print('MACD:BS\n',infer.query(['BBANDS','MACD'], evidence={'ST':'BUY'}))
# print('MFI:BS\n',infer.query(['ST'], evidence={'MFI': 'BS'}))
# print('BBANDS:BS\n',infer.query(['ST'], evidence={'BBANDS': 'BS'}))
# print('AROON:BS\n',infer.query(['ST'], evidence={'AROON': 'BS'}))
# print('SAR:BS\n',infer.query(['ST'], evidence={'SAR': 'BS'}))
# print('CCI:BS\n',infer.query(['ST'], evidence={'CCI': 'BS'}))
from pgmpy.readwrite import XMLBIFWriter
writer = XMLBIFWriter(model)
writer.write_xmlbif(filename=os.path.sep.join([get_root_path(),'data','train','600893_signal.bif']))



D:\PROJECT_TW\git\finance\data\train\600893_signal.txt


Finding Elimination Order: : 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1332.48it/s]
Eliminating: MFI: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 138.01it/s]


MACD:BS
 +----------+------------+--------------------+
| MACD     | BBANDS     |   phi(MACD,BBANDS) |
| MACD(BS) | BBANDS(BS) |             0.0078 |
+----------+------------+--------------------+
| MACD(BS) | BBANDS(HS) |             0.0324 |
+----------+------------+--------------------+
| MACD(HS) | BBANDS(BS) |             0.1649 |
+----------+------------+--------------------+
| MACD(HS) | BBANDS(HS) |             0.7950 |
+----------+------------+--------------------+


In [30]:
from pgmpy.readwrite import XMLBIFWriter,XMLBIFReader
reader = XMLBIFReader(os.path.sep.join([get_root_path(),'data','train','600893_signal.bif']))
# reader.get_edges
model = reader.get_model()
infer = VariableElimination(model)
# print('BBANDS:BS\n',infer.query(['ST'], evidence={'BBANDS': 'BS'}))
result = infer.query(['ST'], evidence={'MACD': 'BS'})
print(result)
print(result.variables)
print(result.cardinality)
print(result.values)
print(result.evidence)
print(result.get_cardinality(['ST']))

Finding Elimination Order: : 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 2500.78it/s]
Eliminating: MFI: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 111.18it/s]


+----------+-----------+
| ST       |   phi(ST) |
| ST(BUY)  |    0.5166 |
+----------+-----------+
| ST(HOLD) |    0.4834 |
+----------+-----------+
['ST']
[2]
[0.51661481 0.48338519]


AttributeError: 'DiscreteFactor' object has no attribute 'evidence'

In [31]:
??result