In [1]:
from neo4j import GraphDatabase
from tqdm import tqdm
import pandas as pd

class Neo4j:
    def __init__(self,uri,user,password):
        self.driver = GraphDatabase.driver(uri,auth=(user,password))

    def query(self,query):
        result=None
        with self.driver.session() as session:
            result = list(session.run(query))
            session.close()
        return result
    def close(self):
        self.driver.close()

neo = Neo4j("bolt://localhost:7687","neo4j","1234")

In [2]:
# Geting all genres 
all_genres=[_['name']  for _ in neo.query(f''' 
    MATCH (g:genre) RETURN g.name as name
''')]
all_genres.sort()
all_genres

['Action',
 'Adult',
 'Adventure',
 'Animation',
 'Biography',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'Film-Noir',
 'History',
 'Horror',
 'Music',
 'Musical',
 'Mystery',
 'Reality-TV',
 'Romance',
 'Sci-Fi',
 'Short',
 'Sport',
 'Thriller',
 'War',
 'Western']

# For Writers

In [3]:
all_writers_ids=[_["id"] for _ in neo.query(f'''
    match(w:writer) 
    with w
    return w.id as id
    ''')]
print("Length: " +str(len(all_writers_ids)))
print(all_writers_ids[0])

Length: 78648
2779808


In [4]:
for each_id in tqdm(all_writers_ids):

    query_result=neo.query(f''' match(w:writer) where w.id={each_id}
                    with w
                    match (w)-[r:written]-(m:movie)
                    with w,m
                    return m.genre as genre
                    ''')
    personality_trait=dict()
    for g in all_genres:
        personality_trait[g]=personality_trait.get(g,-1)+1
    for each in query_result:
        for e in each["genre"].split("|"):
            personality_trait[e]=personality_trait.get(e,0)+1
    sum=0
    for i in personality_trait:
        sum=sum+personality_trait[i]
    # print(sum)
    for i in personality_trait:
        personality_trait[i]=int(personality_trait[i]*100/sum)
    # print(personality_trait)
    for each_key in personality_trait:
        # print(each_key)
        field=each_key
        if(each_key=="Film-Noir"):
            field="FilmNoir"
        elif(each_key=="Sci-Fi"):
            field="SciFi"
        elif(each_key=="Reality-TV"):
            field="RealityTV"
        # print(each_id,personality_trait[each_key])
        s= neo.query(f''' 
                match(w:writer) where w.id={each_id} set w.{field+"_trait"}={personality_trait[each_key]} return w
        ''')

100%|██████████| 78648/78648 [4:55:58<00:00,  4.43it/s]


# For Directors

In [None]:
all_directors_ids=[_["id"] for _ in neo.query(f'''
    match(d:director) 
    with d
    return d.id as id
    ''')]
print("Length: " +str(len(all_directors_ids)))
print(all_directors_ids[0])

In [149]:
for each_id in tqdm(all_directors_ids):

    query_result=neo.query(f''' match(d:director) where d.id={each_id}
                    with d 
                    match (d)-[r:directed]-(m:movie)
                    with d,m
                    return m.genre as genre
                    ''')
    personality_trait=dict()
    for g in all_genres:
        personality_trait[g]=personality_trait.get(g,-1)+1
    for each in query_result:
        for e in each["genre"].split("|"):
            personality_trait[e]=personality_trait.get(e,0)+1
    sum=0
    for i in personality_trait:
        sum=sum+personality_trait[i]
    # print(sum)
    for i in personality_trait:
        personality_trait[i]=int(personality_trait[i]*100/sum)
    # print(personality_trait)
    for each_key in personality_trait:
        # print(each_key)
        field=each_key
        if(each_key=="Film-Noir"):
            field="FilmNoir"
        elif(each_key=="Sci-Fi"):
            field="SciFi"
        elif(each_key=="Reality-TV"):
            field="RealityTV"

        s= neo.query(f''' 
                match(d:director) where d.id={each_id} set d.{field+"_trait"}={personality_trait[each_key]} return d
        ''')
    # break

100%|██████████| 36123/36123 [2:20:17<00:00,  4.29it/s]
