# connect to database

In [40]:
from neo4j.v1 import GraphDatabase, basic_auth

In [41]:
# ----- connection -------

db_location = "bolt://localhost:7687"
username = "neo4j"
password = "password"
driver = GraphDatabase.driver(db_location, auth=basic_auth(username, password))

# create data

for multi linear regression, we should create an example to train our skill in recognizing where is exactly should be multi and where it's just different objects.

the important thing is multi = it's when you want to predict a lot of properties without cross-section but at the same time, it describes to you the whole system you use in your left side (X data).

let's take an example, you will see )

In [43]:
# first erase database

with driver.session() as session:
    
    session.run("MATCH (n) DETACH DELETE n")

In [44]:
# then we want to predict how many journals, books and pens you will buy
# there are 2 objects 1) people 2) stores
# there are some properties for both of them
# we need to choose the right properties what could have impact for our question = for our answers (Y data)
# Y data = number of journals, number of books and number of pens
# X data it's those properties of each object what really can affect result = 
# example = location (your and store), your age (old people not buying often )

with driver.session() as session:
    
    # creating people with properties
    session.run("FOREACH(person IN range(1,20) | CREATE (:Person {latitude: round(rand()*180), longitude: round(rand()*90), age: round(rand()*15+23)}))")
    
    # creating stores with properties
    session.run("FOREACH(store IN range(1,10) | CREATE (:Store {latitude: round(rand()*180), longitude: round(rand()*90), customers_daily: round(rand()*500)}))")
    
    # creating random relationships = Y_data and set how many journals, books and pens that person were buying in that store
    session.run("MATCH (person :Person), (store :Store) "
                "WHERE rand() < 0.7 AND NOT (person)-[:BUYING_IN]->(store) "
                "CREATE (person)-[bi :BUYING_IN]->(store) "
                "SET bi.journals=round(rand()*8)+1, bi.books=round(rand()*5)+1, bi.pens=round(rand()*3)+1")

# load data

In [45]:
import pandas as pd

In [46]:
with driver.session() as session:
    
    # get data from database
    with session.begin_transaction() as get_data:
        result = get_data.run("MATCH (person :Person)-[bi :BUYING_IN]->(store :Store) "
                              "RETURN person.latitude as person_latitude, person.longitude as person_longitude, person.age as person_age, store.latitude as store_latitude, store.longitude as store_longitude, store.customers_daily as store_customer_daily, bi.journals as journals, bi.books as books, bi.pens as pens")
        training_data = pd.DataFrame([{k: v for k, v in r.items()} for r in result])

# Set X_data (features) and Y_data (responses)

In [47]:
# create 2 variables called X_data and Y_data:
# X_data shall be a matrix with features columns
# and Y_data shall be a matrix with responses columns

features_columns = [x for x in training_data.columns if x not in ['journals','books', 'pens']]
responses_columns = ['journals', 'books', 'pens']
X_data = training_data[features_columns]
Y_data = training_data[responses_columns]

Unnamed: 0,person_age,person_latitude,person_longitude,store_customer_daily,store_latitude,store_longitude
0,29.0,132.0,58.0,425.0,170.0,40.0
1,36.0,118.0,7.0,425.0,170.0,40.0
2,28.0,35.0,6.0,425.0,170.0,40.0
3,27.0,157.0,69.0,425.0,170.0,40.0
4,38.0,105.0,57.0,425.0,170.0,40.0


In [49]:
Y_data.head()

Unnamed: 0,journals,books,pens
0,8.0,5.0,2.0
1,5.0,5.0,2.0
2,7.0,4.0,2.0
3,3.0,2.0,3.0
4,4.0,4.0,3.0


# Multi Linear Regression Model

In [50]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor

In [51]:
# give a name to model and put data to fit
model_3 = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)).fit(X_data, Y_data)

# Prediction
we have to use the same type of data to predict new result

In [52]:
# create new data 

new_data = pd.DataFrame(data = {'person_latitude': [23, 31, 17], 'person_longitude': [39, 45, 67], 'person_age': [33, 27, 43], 'store_customer_daily': [39, 10, 212], 'store_latitude': [12, 37, 78], 'store_longitude': [45, 23, 34]})

In [53]:
new_data

Unnamed: 0,person_age,person_latitude,person_longitude,store_customer_daily,store_latitude,store_longitude
0,33,23,39,39,12,45
1,27,31,45,10,37,23
2,43,17,67,212,78,34


In [54]:
model_3.predict(new_data)

array([[3.56024393, 3.22193152, 2.51957338],
       [4.58284404, 3.80523442, 1.50779837],
       [4.45219571, 2.12267049, 3.03706   ]])

let's write the output data in more correct way with data frame

In [55]:
output = model_3.predict(new_data)

In [56]:
output_data = pd.DataFrame({'journals': output[:,0], 'books': output[:,1], 'pens': output[:,2]})

In [57]:
output_data

Unnamed: 0,books,journals,pens
0,3.221932,3.560244,2.519573
1,3.805234,4.582844,1.507798
2,2.12267,4.452196,3.03706


In [58]:
output_data.round()

Unnamed: 0,books,journals,pens
0,3.0,4.0,3.0
1,4.0,5.0,2.0
2,2.0,4.0,3.0


# Save Model

In [59]:
import pickle

In [60]:
# ------ model name -------
model_pkl_filename = 'prediction_model.pkl'

In [61]:
with open(model_pkl_filename, 'wb') as pickled_model:
    pickle.dump(model_3, pickled_model)

# Load Model

In [62]:
# ----- load prediction model -------
with open(model_pkl_filename, 'rb') as model_pkl:
    prediction_model = pickle.load(model_pkl)

In [63]:
prediction_model

MultiOutputRegressor(estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=0,
             subsample=1.0, verbose=0, warm_start=False),
           n_jobs=1)

# Use Prediction Model

In [67]:
# create new data 

new_data = pd.DataFrame(data = {'person_latitude': [17], 'person_longitude': [39], 'person_age': [23], 'store_customer_daily': [212], 'store_latitude': [37], 'store_longitude': [23]})

In [68]:
new_data

Unnamed: 0,person_age,person_latitude,person_longitude,store_customer_daily,store_latitude,store_longitude
0,23,17,39,212,37,23


In [71]:
result = prediction_model.predict(new_data)

In [72]:
result.round()

array([[7., 3., 1.]])