### Creating connection to databases

In [1]:
from mysql_connection import MySqlManip
from neo4j_connection import Neo4jManip
import pandas as pd


mysql_conn = MySqlManip(
    user='root', host='10.60.5.99', port=3306, password='Boway@123', database='IOT'
)

neo4j_conn = Neo4jManip(
    uri = "bolt://10.60.5.99:7687",
    user = "neo4j",
    password = "Boway123"
)

# conn.create_params_table()

### Reading network structure from Neo4J

In [2]:
network_graph, sql_info = neo4j_conn.read_bayesian_graph(relationship_type = "Impact")
print(f'''
network_graph: {network_graph},

"sql_info": {sql_info}      
      ''')


network_graph: [('镁含量', '气泡失效'), ('进水流量', '气泡失效'), ('硅含量', '气泡失效'), ('镍含量', '气泡失效'), ('浇注温度_5米采样', '气泡失效'), ('浇注温度_开始采样', '气泡失效'), ('浇注温度_3米采样', '气泡失效'), ('锌含量', '气泡失效'), ('淘捡材料比例', '气泡失效'), ('铣面屑料比例', '气泡失效'), ('外购材料比例', '气泡失效'), ('湿度', '气泡失效'), ('总冷却强度', '气泡失效'), ('铝含量', '气泡失效'), ('除气湿度', '气泡失效'), ('新金属材料比例', '气泡失效'), ('车间废料比例', '气泡失效')],

"sql_info": {'table_name': 'device_4_bubble_cleaned_data_bayes', '镁含量': {'type': 'factor', 'sql_column': 'MG', 'upper': 0.15, 'lower': 0.08}, '气泡失效': {'type': 'target', 'sql_column': 'label'}, '进水流量': {'type': 'factor', 'sql_column': '727castingInputWaterFlow_mean_3min', 'upper': '20', 'lower': '16'}, '硅含量': {'type': 'factor', 'sql_column': 'SI', 'upper': '0.5', 'lower': '0.475'}, '镍含量': {'type': 'factor', 'sql_column': 'NI', 'upper': '2.35', 'lower': '2.25'}, '浇注温度_5米采样': {'type': 'factor', 'sql_column': 'temp_5m_jiaozhu', 'upper': '1270', 'lower': '1230'}, '浇注温度_开始采样': {'type': 'factor', 'sql_column': 'temp_jiaozhu', 'upper': '1270', 'lower': '1

### Reading structured data from MySQL

In [3]:
raw_data = mysql_conn.load_data_to_dataframe(sql_info["table_name"])
raw_data = raw_data.dropna()
series = []

for variable in sql_info.keys():
    if variable == "table_name":
        continue
    ser = raw_data[sql_info[variable]["sql_column"]]
    ser.name = variable
    if sql_info[variable]["type"] == "target":
        ser = pd.cut(
            ser, 
            2,
            labels=["FineBatch", "BadBatch"])
    else:
        try:
            ser = pd.cut(
                ser.astype('float'), 
                [-float("inf"), 
                float(sql_info[variable]["lower"]), 
                float(sql_info[variable]["upper"]),
                float("inf")],
                labels=["lower", "medium", "upper"])
        except Exception as e:
            print(f"{variable}: {e}")
    series.append(ser)

data = pd.DataFrame(series).T
data


Unnamed: 0,镁含量,气泡失效,进水流量,硅含量,镍含量,浇注温度_5米采样,浇注温度_开始采样,浇注温度_3米采样,锌含量,淘捡材料比例,铣面屑料比例,外购材料比例,湿度,总冷却强度,铝含量,除气湿度,新金属材料比例,车间废料比例
0,medium,FineBatch,lower,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
1,medium,FineBatch,lower,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
2,medium,FineBatch,lower,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
3,medium,FineBatch,lower,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
4,medium,FineBatch,lower,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16208,medium,FineBatch,medium,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
16209,medium,FineBatch,medium,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
16210,medium,FineBatch,medium,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium
16211,medium,FineBatch,medium,medium,medium,medium,medium,medium,medium,lower,medium,lower,upper,lower,upper,lower,medium,medium


### Creating model

In [4]:
from models.bayes_net import BNCreator
model = BNCreator(network_graph, node_info = sql_info)
# model.to_graphviz()

  from .autonotebook import tqdm as notebook_tqdm


training model

In [5]:
# from pgmpy.estimators import ExpectationMaximization

# estimator = ExpectationMaximization(model, data)
# latent_card = {}
# for latent_node in latents:
#     latent_card[latent_node] = 10
# params = estimator.get_parameters(latent_card = latent_card)
# model.add_cpds(*params)

model.fit(data)

### Making inference

In [6]:
sample_data = data.drop(columns=["气泡失效"]).loc[[499]]

model.predict_probability(sample_data)

Unnamed: 0,气泡失效_BadBatch,气泡失效_FineBatch
499,0.141246,0.858754


### Input interface

In [11]:
res = model.make_inference({"气泡失效": 1, "镁含量": 0.0, "车间废料比例": 0.0}, target="")

print(res)

defaultdict(<class 'dict'>, {'浇注温度_3米采样': {'lower': 0.030116218704975557, 'medium': 0.9698837812950236}, '外购材料比例': {'lower': 0.8828826402662298, 'medium': 0.11711735973376958}, '锌含量': {'medium': 0.9999999999999993}, '淘捡材料比例': {'lower': 0.9483704472873615, 'medium': 0.0516295527126376}, '总冷却强度': {'lower': 0.9425199229091596, 'medium': 0.05444752224822483, 'upper': 0.0030325548426151407}, '铝含量': {'lower': 0.011982289865942755, 'medium': 0.4403947616326816, 'upper': 0.5476229485013755}, '铣面屑料比例': {'lower': 0.1910110216480603, 'medium': 0.808988978351941}, '新金属材料比例': {'lower': 0.2748605992612887, 'medium': 0.7251394007387111}, '除气湿度': {'lower': 0.9999999999999993}, '浇注温度_5米采样': {'lower': 0.03602005987076572, 'medium': 0.9639799401292342}, '进水流量': {'lower': 0.024334403493180022, 'medium': 0.9607247165505206, 'upper': 0.014940879956298983}, '浇注温度_开始采样': {'lower': 0.024097584820458602, 'medium': 0.9759024151795418}, '硅含量': {'lower': 0.06530191307311839, 'medium': 0.3917116413519804, 'upper': 

### Result values write to databases

In [12]:
for key, value in res.items():
    neo4j_conn.update_node(
        key, "probablities", str(value)
    )
    if value.get('medium') and value.get('medium') < 0.5:
        neo4j_conn.add_node_type(key, "activate")

In [10]:
for key, value in res.items():
    neo4j_conn.remove_node_type(key, "activate")

In [15]:
print(model.get_cpds()[4])

+-------------+----------+
| 镍含量(lower)  | 0.058965 |
+-------------+----------+
| 镍含量(medium) | 0.362919 |
+-------------+----------+
| 镍含量(upper)  | 0.578116 |
+-------------+----------+
