In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

from langchain_neo4j import Neo4jGraph

#LangChain 도구 활용 - DB연결 객체 초기화
graph = Neo4jGraph(
    url = os.getenv("NEO4J_URI"),
    username = os.getenv("NEO4J_USERNAME"),
    password = os.getenv("NEO4J_PASSWORD"),
)

In [2]:
# 테스트 쿼리 실행
cypher_query="""
MATCH (n:Movie)
RETURN COUNT(n) AS Movie_count
"""

graph.query(cypher_query)

[{'Movie_count': 4803}]

### 2.1 기본 분석 쿼리

In [5]:
# 평점 기준 상위 10개 영화를 조회하는 cypher 쿼리
cypher_query="""
MATCH (m:Movie) // Movie 라벨을 가진 모든 노드 매칭
WHERE m.rating IS NOT NULL // 평점 값이 존재하는 영화만 필터링
RETURN  m.title AS Movie,
        m.released AS Released,
        m.rating AS Rating
ORDER BY m.rating DESC
LIMIT 10
"""

result = graph.query(cypher_query)
result

[{'Movie': 'Stiff Upper Lips', 'Released': '1998-06-12', 'Rating': 10.0},
 {'Movie': 'Little Big Top', 'Released': '2006-01-01', 'Rating': 10.0},
 {'Movie': 'Me You and Five Bucks', 'Released': '2015-07-07', 'Rating': 10.0},
 {'Movie': 'Dancer, Texas Pop. 81', 'Released': '1998-05-01', 'Rating': 10.0},
 {'Movie': 'Sardaarji', 'Released': '2015-06-26', 'Rating': 9.5},
 {'Movie': "One Man's Hero", 'Released': '1999-08-02', 'Rating': 9.3},
 {'Movie': 'The Shawshank Redemption',
  'Released': '1994-09-23',
  'Rating': 8.5},
 {'Movie': 'There Goes My Baby', 'Released': '1994-09-02', 'Rating': 8.5},
 {'Movie': 'The Prisoner of Zenda', 'Released': '1937-09-03', 'Rating': 8.4},
 {'Movie': 'The Godfather', 'Released': '1972-03-14', 'Rating': 8.4}]

In [6]:
import pandas as pd
pd.DataFrame(result)

Unnamed: 0,Movie,Released,Rating
0,Stiff Upper Lips,1998-06-12,10.0
1,Little Big Top,2006-01-01,10.0
2,Me You and Five Bucks,2015-07-07,10.0
3,"Dancer, Texas Pop. 81",1998-05-01,10.0
4,Sardaarji,2015-06-26,9.5
5,One Man's Hero,1999-08-02,9.3
6,The Shawshank Redemption,1994-09-23,8.5
7,There Goes My Baby,1994-09-02,8.5
8,The Prisoner of Zenda,1937-09-03,8.4
9,The Godfather,1972-03-14,8.4


In [10]:
# 출연 영화가 많은 배우 상위 10명을 조회하는 cypher 쿼리
cypher_query="""
MATCH (actor:Person {name: $actor_name})-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(co_actor:Person)
WHERE actor <> co_actor
RETURN co_actor.name AS CoActor, count(m) AS MoviesTogether
ORDER BY MoviesTogether DESC
LIMIT 5
"""

graph.query(cypher_query, params={"actor_name":"Tom Hanks"})

[{'CoActor': 'Tim Allen', 'MoviesTogether': 3},
 {'CoActor': 'Joan Cusack', 'MoviesTogether': 2},
 {'CoActor': 'Gary Sinise', 'MoviesTogether': 2},
 {'CoActor': 'Don Rickles', 'MoviesTogether': 2},
 {'CoActor': 'Martin Sheen', 'MoviesTogether': 2}]

In [None]:
# 배우-감독 협업 관계 분석
cypher_query="""
MATCH (a:Person)-[:ACTED_IN]->(m:Movie)<-[:DIRECTED]-(d:Person) //배우 a가 출연한 영화(m)와 그 영화를 감독한 감독(d)을 찾는 패턴
WHERE a.name = $actor_name
RETURN d.name AS Director, count(m) AS CollaborationCount
ORDER BY CollaborationCount DESC
LIMIT 5
"""

graph.query(cypher_query, params={"actor_name":"Leonardo DiCaprio"})

[{'Director': 'Martin Scorsese', 'CollaborationCount': 5},
 {'Director': 'Baz Luhrmann', 'CollaborationCount': 2},
 {'Director': 'Jerry Zaks', 'CollaborationCount': 1},
 {'Director': 'James Cameron', 'CollaborationCount': 1},
 {'Director': 'Sam Raimi', 'CollaborationCount': 1}]

### 2.3 그래프 기반 추천 시스템

In [None]:
cypher_query="""
MATCH (m:Movie{title: $movie_title})-[:IN_GENRE]->(g:Genre)<-[:IN_GENRE]-(rec:Movie)
WHERE m <> rec AND rec.rating > 7.0
RETURN  rec.title AS RecommendMovie,
        rec.rating AS Rating,
        collect(g.name) AS SharedGenres // collect() : 여러 행의 값을 하나의 배열로 수집하는 집계 함수
ORDER BY Rating DESC, size(SharedGenres) DESC
LIMIT 5
"""

graph.query(cypher_query, params={"movie_title": "Apollo 13"})

[{'RecommendMovie': 'Me You and Five Bucks',
  'Rating': 10.0,
  'SharedGenres': ['Drama']},
 {'RecommendMovie': 'Dancer, Texas Pop. 81',
  'Rating': 10.0,
  'SharedGenres': ['Drama']},
 {'RecommendMovie': "One Man's Hero",
  'Rating': 9.3,
  'SharedGenres': ['Drama']},
 {'RecommendMovie': 'The Shawshank Redemption',
  'Rating': 8.5,
  'SharedGenres': ['Drama']},
 {'RecommendMovie': 'There Goes My Baby',
  'Rating': 8.5,
  'SharedGenres': ['Drama']}]

In [15]:
# 복합 추천 (장르+배우 가중치)
cypher_query="""
MATCH (m:Movie {title: $movie_title})
MATCH (rec:Movie) WHERE m <> rec AND rec.rating >7.0
OPTIONAL MATCH (m)-[:IN_GENRE]->(g:Genre)<-[:IN_GENRE]-(rec)

WITH m, rec, COLLECT(g.name) AS genres
OPTIONAL MATCH (m)<-[:ACTED_IN]-(a:Person)-[:ACTED_IN]->(rec)

WITH m, rec, genres, COLLECT(a.name) AS actors

WITH rec,
    size(genres) *2 + size(actors)*3 AS score,
    genres,
    actors
WHERE score > 0
RETURN  rec.title AS RecommendedMovie,
        rec.rating AS Rating,
        score AS RecommendationsScore,
        genres AS SharedGenres,
        actors AS SharedActors

ORDER BY score DESC, Rating DESC
LIMIT 5
"""
graph.query(cypher_query, params={"movie_title": "Apollo 13"})

[{'RecommendedMovie': 'Forrest Gump',
  'Rating': 8.2,
  'RecommendationsScore': 8,
  'SharedGenres': ['Drama'],
  'SharedActors': ['Tom Hanks', 'Gary Sinise']},
 {'RecommendedMovie': 'The Green Mile',
  'Rating': 8.2,
  'RecommendationsScore': 5,
  'SharedGenres': ['Drama'],
  'SharedActors': ['Tom Hanks']},
 {'RecommendedMovie': 'Saving Private Ryan',
  'Rating': 7.9,
  'RecommendationsScore': 5,
  'SharedGenres': ['Drama'],
  'SharedActors': ['Tom Hanks']},
 {'RecommendedMovie': 'The Truman Show',
  'Rating': 7.8,
  'RecommendationsScore': 5,
  'SharedGenres': ['Drama'],
  'SharedActors': ['Ed Harris']},
 {'RecommendedMovie': 'A Beautiful Mind',
  'Rating': 7.7,
  'RecommendationsScore': 5,
  'SharedGenres': ['Drama'],
  'SharedActors': ['Ed Harris']}]