## Neo4j Database Creation

In [1]:
!pip install py2neo



In [2]:
import py2neo
import numpy as np

In [3]:
from py2neo import Graph
from py2neo import Node
from py2neo import Relationship

In [4]:
graph = Graph("bolt://localhost:7687", auth=("neo4j", "launch"))

In [5]:
import pandas as pd

In [6]:
courses = pd.read_csv('courses.csv', encoding = "ISO-8859-1", engine='python') 
fam_members = pd.read_csv('familyMembers.csv', encoding = "ISO-8859-1", engine='python') 
group_members = pd.read_csv('groupMembers.csv', encoding = "ISO-8859-1", engine='python') 
pets = pd.read_csv('pets.csv', encoding = "ISO-8859-1", engine='python') 

In [7]:
group_members.columns

Index(['id', 'name', 'birthday', 'birthCity', 'hometown', 'age',
       'siblingCount', 'sibling1', 'sibling2', 'sibling3', 'petCount', 'pet1',
       'pet2', 'course1', 'course2', 'course3', 'course4', 'course5',
       'course6', 'course7', 'major', 'favIceCream', 'inState'],
      dtype='object')

In [8]:
pets

Unnamed: 0,id,name,species,breed,gender,age,ownerGroup,color
0,1,King Louie,Dog,Unknown,M,5.0,1,white
1,2,Taro,Dog,Border Collie mix,M,0.58,3,Brown
2,3,Blacky,Cat,,M,13.0,4,Black
3,4,Jacky,Cat,,M,13.0,4,Brown
4,5,Leo,Dog,German Shepherd,M,2.8,2,Black


In [9]:
# Group Member Node Creation
for i in range(len(group_members)):
    row = group_members.iloc[i] 
    person = Node("Person", name = row['name'], birthday=row['birthday'], birthCity = row['birthCity'], hometown = row['hometown'], age = int(row['age']), siblingCount = int(row['siblingCount']), major = row['major'], inState = int(row['inState']))
    graph.create(person)

# Pet Node Creation
for i in range(len(pets)):
    row = pets.iloc[i] 
    pet = Node("Pet", name = row['name'], species=row['species'], breed = row['breed'], gender = row['gender'], age = int(row['age']), color = row['color'])
    graph.create(pet)
    
# Fam Members Node Creation
for i in range(len(fam_members)):
    row = fam_members.iloc[i] 
    familyMember = Node("familyMember", name = row['name'], age= int(row['age']), gender = row['gender'], birthday = row['birthday'], petCount = int(row['petCount']), enrolledAtUVA = bool(row['enrolledAtUVA']))
    graph.create(familyMember)

# Courses Node Creation
for i in range(len(courses)):
    row = courses.iloc[i] 
    course = Node("Course", courseMnemonic = row['course mnemonic'], courseNumber= int(row['course number']), department = row['department'], classTitle = row['class title'], professor = row['professor'], meetingDays = (row['meeting day(s)']), classType = row['type'], classCapacity = int(row['class capacity']), courseDescription = row['course description'])
    graph.create(course)


                  

In [10]:
from py2neo import Graph, NodeMatcher
matcher = NodeMatcher(graph)

In [11]:
fam_members.columns

Index(['id ', 'relation', 'name', 'age', 'gender', 'birthday', 'favIceCream',
       'petCount', 'pet1Name', 'pet2Name', 'enrolledAtUVA', 'courses',
       'groupMemberRelative'],
      dtype='object')

In [12]:
Owns = Relationship.type("OWNS")
for i in range(len(group_members)):
    if not pd.isna(group_members['pet1'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        thisPet = pets.loc[pets['id '] == group_members.pet1[i]].reset_index()
        matchtwo = matcher.match("Pet").where("_.name = '" + str(thisPet.name[0]) + "'").first()
        graph.create(Owns(matchone, matchtwo))

In [13]:
for i in range(len(group_members)):
    if not pd.isna(group_members['pet2'][i]):
        matchone = matcher.match("Person").where("_.name = '" + str(group_members.name[i]) + "'").first()
        thisPet = pets.loc[pets['id '] == group_members.pet2[i]].reset_index()
        matchtwo = matcher.match("Pet").where("_.name = '" + str(thisPet.name[0]) + "'").first()
        graph.create(Owns(matchone, matchtwo))

In [14]:
fam_members.columns

Index(['id ', 'relation', 'name', 'age', 'gender', 'birthday', 'favIceCream',
       'petCount', 'pet1Name', 'pet2Name', 'enrolledAtUVA', 'courses',
       'groupMemberRelative'],
      dtype='object')

In [24]:
IsRelatedTo = Relationship.type("IS_RELATED_TO")
for i in range(len(fam_members)):
    matchone = matcher.match("familyMember").where("_.name= '" + str(fam_members.name[i]) + "'").first()
    relative = group_members.loc[group_members['id'] == fam_members.groupMemberRelative[i]].reset_index()
    matchtwo = matcher.match("Person").where("_.name = '" + str(relative.name[0]) + "'").first()
    graph.create(IsRelatedTo(matchone, matchtwo))

In [16]:
courses.columns

Index(['id', 'course mnemonic', 'course number', 'department', 'class title',
       'professor', 'meeting day(s)', 'type', 'class capacity',
       'course description'],
      dtype='object')

In [17]:
group_members.head()

Unnamed: 0,id,name,birthday,birthCity,hometown,age,siblingCount,sibling1,sibling2,sibling3,...,course1,course2,course3,course4,course5,course6,course7,major,favIceCream,inState
0,1,Anna Grace Calhoun,2001/06/11,"Boston, MA","Charlottesville, VA",19,3,1.0,2.0,3.0,...,1,3,5.0,8.0,10.0,14.0,15.0,"Computer Science, Public Policy and Leadership",Chocolate,1
1,2,Daniel Tohti,2001/12/28,"Gainesville, FL","Fairfax, VA",19,2,6.0,7.0,,...,0,2,12.0,4.0,6.0,7.0,9.0,Aerospace Engineering,Mint Chocolate Chip,1
2,3,Dare Nguyen,1996/09/03,"Virginia Beach, VA","Virginia Beach, VA",24,3,13.0,14.0,17.0,...,11,16,13.0,17.0,,,,Psychology,Cookies and Cream,1
3,4,David Siamon,2014/05/30,"Reston, VA","Fairfax, VA",7,1,10.0,,,...,18,19,20.0,21.0,22.0,,,"Mathematics, Computer Science",Melted,1
4,5,Danny Devito,1944/11/17,"Neptune Township, NJ","Neptune Township, NJ",76,0,,,,...,14,23,,,,,,Acting,Chocolate,0


In [32]:
IsEnrolledIn = Relationship.type("IS_ENROLLED_IN")
#course1
for i in range(len(group_members)):
    course = group_members.loc[i,'course1']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))
#course2
for i in range(len(group_members)):
    course = group_members.loc[i,'course2']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))
#course3
for i in range(len(group_members)):
    course = group_members.loc[i,'course3']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))
#course4
for i in range(len(group_members)):
    course = group_members.loc[i,'course4']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))
#course5
for i in range(len(group_members)):
    course = group_members.loc[i,'course5']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))
#course6
for i in range(len(group_members)):
    course = group_members.loc[i,'course6']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))
#course7
for i in range(len(group_members)):
    course = group_members.loc[i,'course7']
    if(not pd.isna(course)):
        courseOb = courses.loc[course]
        matchone = matcher.match("Course").where("_.classTitle = '" + str(courseOb['class title']) + "'").first()
        # matching courses to group members
        matchtwo = matcher.match("Person").where("_.name = '" + str(group_members.loc[i,'name']) + "'").first()
        graph.create(IsEnrolledIn(matchtwo, matchone))

In [19]:
fam_members.groupMemberRelative.value_counts()

1    5
3    5
2    4
4    3
5    3
Name: groupMemberRelative, dtype: int64

In [20]:
#graph.run("match (n) detach delete n")