In [54]:
from neo4j.v1 import GraphDatabase, basic_auth
import pandas as pd
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor
import pickle

In [55]:
db_location = "bolt://localhost:7687"
username = "neo4j"
password = "P^5lw9MJi@BQ"
driver = GraphDatabase.driver(db_location, auth=basic_auth(username, password))

In [56]:
# Erase database

with driver.session() as session:
    
    session.run("MATCH (n) DETACH DELETE n")

In [57]:
with driver.session() as session:
    
    # Сreating patients with properties
    session.run("FOREACH(patient IN range(1,20) | CREATE (:Patient {blood_pressure: round(rand()*175), heart_rate: round(rand()*160), age: round(rand()*15+23)}))")
    
    # Сreating clinics with properties
    session.run("FOREACH(clinic IN range(1,10) | CREATE (:Clinic {avg_salary: round(rand()*1300), budget: round(rand()*100000), patients_daily: round(rand()*500)}))")
    
    # Сreating random relationships = Y_data and set how many journals, books and pens that person were buying in that store
    session.run("MATCH (patient :Patient), (clinic :Clinic) "
                "WHERE rand() < 0.7 AND NOT (patient)-[:BUYING_IN]->(clinic) "
                "CREATE (patient)-[bi :BUYING_IN]->(clinic) "
                "SET bi.analysis=round(rand()*8)+1, bi.consumables=round(rand()*5)+1, bi.operations=round(rand()*3)+1")

In [58]:
with  driver.session() as session:
    
    # Get data from database
    with session.begin_transaction() as get_data:
        result = get_data.run("MATCH (patient :Patient)-[bi :BUYING_IN]->(clinic :Clinic) "
                              "RETURN patient.blood_pressure as patient_blood_pressure, patient.heart_rate as patient_heart_rate, patient.age as patient_age, clinic.avg_salary as clinic_avg_salary, clinic.budget as clinic_budget, clinic.patients_daily as clinic_patients_daily, bi.analysis as analysis, bi.consumables as consumables, bi.operations as operations")
        training_data = pd.DataFrame([{k: v for k, v in r.items()} for r in result])

In [59]:
training_data

Unnamed: 0,analysis,clinic_avg_salary,clinic_budget,clinic_patients_daily,consumables,operations,patient_age,patient_blood_pressure,patient_heart_rate
0,6.0,541.0,42669.0,303.0,3.0,3.0,32.0,101.0,2.0
1,7.0,541.0,42669.0,303.0,1.0,3.0,29.0,32.0,116.0
2,4.0,541.0,42669.0,303.0,5.0,1.0,35.0,85.0,96.0
3,3.0,541.0,42669.0,303.0,1.0,3.0,28.0,152.0,20.0
4,1.0,541.0,42669.0,303.0,1.0,3.0,29.0,85.0,92.0
5,3.0,541.0,42669.0,303.0,3.0,4.0,37.0,99.0,54.0
6,8.0,541.0,42669.0,303.0,6.0,3.0,28.0,131.0,7.0
7,7.0,541.0,42669.0,303.0,2.0,1.0,30.0,77.0,67.0
8,5.0,541.0,42669.0,303.0,2.0,2.0,31.0,28.0,112.0
9,7.0,541.0,42669.0,303.0,3.0,1.0,31.0,6.0,97.0


In [60]:
features_columns = [x for x in training_data.columns if x not in ['analysis','consumables', 'operations']]
responses_columns = ['analysis', 'consumables', 'operations']
X_data = training_data[features_columns]
Y_data = training_data[responses_columns]

In [61]:
X_data.head()

Unnamed: 0,clinic_avg_salary,clinic_budget,clinic_patients_daily,patient_age,patient_blood_pressure,patient_heart_rate
0,541.0,42669.0,303.0,32.0,101.0,2.0
1,541.0,42669.0,303.0,29.0,32.0,116.0
2,541.0,42669.0,303.0,35.0,85.0,96.0
3,541.0,42669.0,303.0,28.0,152.0,20.0
4,541.0,42669.0,303.0,29.0,85.0,92.0


In [62]:
Y_data.head()

Unnamed: 0,analysis,consumables,operations
0,6.0,3.0,3.0
1,7.0,1.0,3.0
2,4.0,5.0,1.0
3,3.0,1.0,3.0
4,1.0,1.0,3.0


In [63]:
model_mlr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)).fit(X_data, Y_data)

In [64]:
new_data = pd.DataFrame(data = {'patient_blood_pressure': [98, 113, 127], 'patient_heart_rate': [63, 71, 79], 'patient_age': [33, 27, 43], 'clinic_patients_daily': [39, 10, 212], 'clinic_avg_salary': [1200, 1500, 900], 'clinic_budget': [99000, 45000, 27000]})

In [65]:
new_data

Unnamed: 0,clinic_avg_salary,clinic_budget,clinic_patients_daily,patient_age,patient_blood_pressure,patient_heart_rate
0,1200,99000,39,33,98,63
1,1500,45000,10,27,113,71
2,900,27000,212,43,127,79


In [66]:
model_mlr.predict(new_data)

array([[ 5.68796983,  3.6646883 ,  2.67932557],
       [ 7.3665953 ,  4.46818855,  3.17745841],
       [ 6.60552554,  3.91040475,  3.02271766]])

In [67]:
output = model_mlr.predict(new_data)

In [68]:
output_data = pd.DataFrame({'analysis': output[:,0], 'consumables': output[:,1], 'operations': output[:,2]})

In [69]:
output_data

Unnamed: 0,analysis,consumables,operations
0,5.68797,3.664688,2.679326
1,7.366595,4.468189,3.177458
2,6.605526,3.910405,3.022718


In [70]:
model_pkl_filename = 'prediction_mlr.pkl'

In [71]:
with open(model_pkl_filename, 'wb') as pickled_model:
    pickle.dump(model_mlr, pickled_model)

In [72]:
# Load prediction model

with open(model_pkl_filename, 'rb') as model_pkl:
    prediction_model = pickle.load(model_pkl)

In [73]:
prediction_model

MultiOutputRegressor(estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=0,
             subsample=1.0, verbose=0, warm_start=False),
           n_jobs=1)

In [75]:
new_data = pd.DataFrame(data = {'patient_blood_pressure': [121], 'patient_heart_rate': [69], 'patient_age': [34], 'clinic_patients_daily': [39], 'clinic_avg_salary': [1150], 'clinic_budget': [38000]})

In [76]:
result = prediction_model.predict(new_data)

In [77]:
result

array([[ 6.07809012,  4.06111272,  3.04473859]])