## Neo4j Database Creation

In [13]:
!pip install py2neo

You should consider upgrading via the '/Users/darenguyen/opt/anaconda3/bin/python3 -m pip install --upgrade pip' command.[0m


In [9]:
import py2neo
import numpy as np

In [24]:
from py2neo import Graph
from py2neo import Node
from py2neo import Relationship

In [11]:
graph = Graph("bolt://localhost:7687", auth=("neo4j", "launch"))

In [12]:
import pandas as pd

In [303]:
courses = pd.read_csv('courses.csv', encoding = "ISO-8859-1", engine='python') 
fam_members = pd.read_csv('familyMembers.csv', encoding = "ISO-8859-1", engine='python') 
group_members = pd.read_csv('groupMembers.csv', encoding = "ISO-8859-1", engine='python') 
pets = pd.read_csv('pets.csv', encoding = "ISO-8859-1", engine='python') 

In [150]:
group_members.columns

Index(['id', 'name', 'birthday', 'birthCity', 'hometown', 'age',
       'siblingCount', 'sibling1', 'sibling2', 'sibling3', 'petCount', 'pet1',
       'pet2', 'course1', 'course2', 'course3', 'course4', 'course5',
       'course6', 'course7', 'major', 'favIceCream', 'inState'],
      dtype='object')

In [151]:
pets

Unnamed: 0,id,name,species,breed,gender,age,ownerGroup,color
0,1,King Louie,Dog,Unknown,M,5.0,1,white
1,2,Taro,Dog,Border Collie mix,M,0.58,3,Brown
2,3,Blacky,Cat,,M,13.0,4,Black
3,4,Jacky,Cat,,M,13.0,4,Brown
4,5,Leo,Dog,German Shepherd,M,2.8,2,Black


In [398]:
# Group Member Node Creation
for i in range(len(group_members)):
    row = group_members.iloc[i] 
    person = Node("Person", name = row['name'], birthday=row['birthday'], birthCity = row['birthCity'], hometown = row['hometown'], age = int(row['age']), siblingCount = int(row['siblingCount']), major = row['major'], inState = int(row['inState']))
    graph.create(person)

# Pet Node Creation
for i in range(len(pets)):
    row = pets.iloc[i] 
    pet = Node("Pet", name = row['name'], species=row['species'], breed = row['breed'], gender = row['gender'], age = int(row['age']), color = row['color'])
    graph.create(pet)
    
# Fam Members Node Creation
for i in range(len(fam_members)):
    row = fam_members.iloc[i] 
    familyMember = Node("familyMember", name = row['name'], age= int(row['age']), gender = row['gender'], birthday = row['birthday'], petCount = int(row['petCount']), enrolledAtUVA = bool(row['enrolledAtUVA']))
    graph.create(familyMember)

# Courses Node Creation
for i in range(len(courses)):
    row = courses.iloc[i] 
    course = Node("Course", courseMnemonic = row['course mnemonic'], courseNumber= int(row['course number']), department = row['department'], classTitle = row['class title'], professor = row['professor'], meetingDays = (row['meeting day(s)']), classType = row['type'], classCapacity = int(row['class capacity']), courseDescription = row['course description'])
    graph.create(course)


                  

In [399]:
from py2neo import Graph, NodeMatcher
matcher = NodeMatcher(graph)

In [400]:
fam_members.columns

Index(['id ', 'relation', 'name', 'age', 'gender', 'birthday', 'favIceCream',
       'petCount', 'pet1Name', 'pet2Name', 'enrolledAtUVA', 'courses',
       'groupMemberRelative'],
      dtype='object')

In [401]:
# Group Members to Pet 1 Relationship
Owns = Relationship.type("OWNS")
for i in range(len(group_members)):
    if not pd.isna(group_members['pet1'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        thisPet = pets.loc[pets['id '] == group_members.pet1[i]].reset_index()
        matchtwo = matcher.match("Pet").where("_.name = '" + str(thisPet.name[0]) + "'").first()
        graph.create(Owns(matchone, matchtwo))

In [402]:
# Group Members to Pet 2 Relationship
for i in range(len(group_members)):
    if not pd.isna(group_members['pet2'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        thisPet = pets.loc[pets['id '] == group_members.pet2[i]].reset_index()
        matchtwo = matcher.match("Pet").where("_.name = '" + str(thisPet.name[0]) + "'").first()
        graph.create(Owns(matchone, matchtwo))

In [404]:
# Group Member to Family Member Relationships
IsRelatedTo = Relationship.type("IS_RELATED_TO")
for i in range(len(fam_members)):
    matchone = matcher.match("familyMember").where("_.name = '" + str(fam_members.name[i]) + "'").first()
    relative = group_members.loc[group_members['id'] == fam_members.groupMemberRelative[i]].reset_index()
    matchtwo = matcher.match("Person").where("_.name = '" + str(relative.name[0]) + "'").first()
    graph.create(IsRelatedTo(matchone, matchtwo))

In [406]:
# Group Member to Course Relationships 
IsEnrolledIn = Relationship.type("IS_ENROLLED_IN")
for i in range(len(group_members)):
    matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
    course = courses.loc[courses['id'] == group_members.course1[i]].reset_index()
    matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
    graph.create(IsEnrolledIn(matchone, matchtwo))
    
for i in range(len(group_members)):
    matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
    course = courses.loc[courses['id'] == group_members.course2[i]].reset_index()
    matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
    graph.create(IsEnrolledIn(matchone, matchtwo))
    
for i in range(len(group_members)):
    if not pd.isna(group_members['course3'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        course = courses.loc[courses['id'] == group_members.course3[i]].reset_index()
        matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
        graph.create(IsEnrolledIn(matchone, matchtwo))
    
for i in range(len(group_members)):
    if not pd.isna(group_members['course4'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        course = courses.loc[courses['id'] == group_members.course4[i]].reset_index()
        matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
        graph.create(IsEnrolledIn(matchone, matchtwo))
    
for i in range(len(group_members)):
    if not pd.isna(group_members['course5'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        course = courses.loc[courses['id'] == group_members.course5[i]].reset_index()
        matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
        graph.create(IsEnrolledIn(matchone, matchtwo))
    
for i in range(len(group_members)):
    if not pd.isna(group_members['course6'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        course = courses.loc[courses['id'] == group_members.course6[i]].reset_index()
        matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
        graph.create(IsEnrolledIn(matchone, matchtwo))

for i in range(len(group_members)):
    if not pd.isna(group_members['course7'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        course = courses.loc[courses['id'] == group_members.course7[i]].reset_index()
        matchtwo = matcher.match("Course").where("_.classTitle = '" + str(course['class title'][0]) + "'").first()
        graph.create(IsEnrolledIn(matchone, matchtwo))


In [397]:
graph.run("match (n) detach delete n")