In [141]:
import numpy as np
from numpy.random import randn, gamma
from sklearn.linear_model import LogisticRegression
from neo4j.v1 import GraphDatabase, basic_auth
import pandas as pd
from sklearn import tree
import pickle

In [123]:
db_location = "bolt://localhost:7687"
username = "neo4j"
password = "P^5lw9MJi@BQ"
driver = GraphDatabase.driver(db_location, auth=basic_auth(username, password))

In [124]:
# Erase database

with driver.session() as session:
    
    session.run("MATCH (n) DETACH DELETE n")

In [125]:
with driver.session() as session:
    
    # creating people with properties
    session.run("FOREACH(patient IN range(1,20) | CREATE (:Person {blood_pressure: round(rand()*175), lung_volumes: round(rand()*4), age: round(rand()*15+23), heart_rate: round(rand()*160), EKG_quality: round(rand()*100)}))")
    
    
    # creating random = Y_data and set random class to which certain person related to
    session.run("MATCH (patient :Person) "
                "WITH patient, [1, 2, 3] as classes, toInt(round(rand()*10)%3) as position "
                "WITH patient, classes, position, classes[position] as class "
                "SET patient.class = class ")

In [126]:
# LOAD DATA
with driver.session() as session:
    
    # Get data from database
    with session.begin_transaction() as get_data:
        result = get_data.run("MATCH (patient :Person) "
                              "RETURN patient.blood_pressure as blood_pressure, patient.lung_volumes as lung_volumes, patient.age as age, patient.heart_rate as heart_rate, patient.EKG_quality as EKG_quality, patient.class as class ")
        training_data = pd.DataFrame([{k: v for k, v in r.items()} for r in result])

In [127]:
training_data

Unnamed: 0,EKG_quality,age,blood_pressure,class,heart_rate,lung_volumes
0,7.0,30.0,17.0,3,17.0,2.0
1,48.0,28.0,45.0,2,42.0,0.0
2,14.0,23.0,68.0,2,37.0,2.0
3,67.0,37.0,52.0,3,104.0,0.0
4,74.0,36.0,61.0,3,120.0,0.0
5,68.0,31.0,72.0,1,42.0,1.0
6,80.0,36.0,32.0,2,19.0,3.0
7,95.0,32.0,95.0,2,1.0,1.0
8,49.0,24.0,2.0,3,149.0,2.0
9,66.0,26.0,114.0,1,9.0,0.0


In [128]:
features_columns = [x for x in training_data.columns if x not in ['class']]
responses_columns = ['class']
X_data = training_data[features_columns]
Y_data = training_data[responses_columns]

In [129]:
X_data.head()

Unnamed: 0,EKG_quality,age,blood_pressure,heart_rate,lung_volumes
0,7.0,30.0,17.0,17.0,2.0
1,48.0,28.0,45.0,42.0,0.0
2,14.0,23.0,68.0,37.0,2.0
3,67.0,37.0,52.0,104.0,0.0
4,74.0,36.0,61.0,120.0,0.0


In [130]:
Y_data.head()

Unnamed: 0,class
0,3
1,2
2,2
3,3
4,3


In [131]:
d_tree_model = tree.DecisionTreeClassifier().fit(X_data, Y_data)

In [132]:
new_data = pd.DataFrame(data = {'blood_pressure': [125, 137, 99], 'lung_volumes': [3, 1.5, 2], 'age': [53, 27, 39], 'heart_rate': [75, 59, 89], 'EKG_quality': [99, 65, 43]})

In [133]:
new_data

Unnamed: 0,EKG_quality,age,blood_pressure,heart_rate,lung_volumes
0,99,53,125,75,3.0
1,65,27,137,59,1.5
2,43,39,99,89,2.0


In [142]:
d_tree_model.predict(new_data)


array([2, 1, 3], dtype=int64)

In [148]:
output = d_tree_model.predict(new_data)

In [149]:
predicted_data = pd.DataFrame({'class': output})

In [150]:
predicted_data

Unnamed: 0,class
0,2
1,1
2,3


In [151]:
model_pkl_filename = 'prediction_model.pkl'

In [152]:
with open(model_pkl_filename, 'wb') as pickled_model:
    pickle.dump(d_tree_model, pickled_model)

In [153]:
with open(model_pkl_filename, 'rb') as model_pkl:
    prediction_model = pickle.load(model_pkl)

In [154]:
prediction_model

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [158]:
new_data = pd.DataFrame(data = {'blood_pressure': [115], 'lung_volumes': [2.5], 'age': [23], 'heart_rate': [65], 'EKG_quality': [96]})

In [159]:
new_data

Unnamed: 0,EKG_quality,age,blood_pressure,heart_rate,lung_volumes
0,96,23,115,65,2.5


In [160]:
result = prediction_model.predict(new_data)

In [161]:
result

array([2], dtype=int64)