# 데이터 탐색

영화 데이터와 카탈로그를 탐색합니다.
- 특정 영화 검색
- 카탈로그 테이블 확인
- 원본 데이터 컬럼 확인

In [2]:
import sys
sys.path.insert(0, "/Users/jisoo/projects/thesis/carte_test")

import pandas as pd
import json
from pathlib import Path
from IPython.display import display, HTML

from config import RAW, PROCESSED

# Pandas 출력 설정
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', 100)

print("Ready!")

Ready!


---
## 1. 원본 데이터 컬럼 확인

In [None]:
# MovieLens 원본 데이터
print("=" * 60)
print("MovieLens 원본 데이터")
print("=" * 60)

# movies.csv
if RAW.MOVIES_CSV.exists():
    df_movies_raw = pd.read_csv(RAW.MOVIES_CSV, nrows=5)
    print(f"\n[movies.csv]")
    print(f"컬럼: {list(df_movies_raw.columns)}")
    display(df_movies_raw)

# links.csv
if RAW.LINKS_CSV.exists():
    df_links_raw = pd.read_csv(RAW.LINKS_CSV, nrows=5)
    print(f"\n[links.csv]")
    print(f"컬럼: {list(df_links_raw.columns)}")
    display(df_links_raw)

# ratings.csv
if RAW.RATINGS_CSV.exists():
    df_ratings_raw = pd.read_csv(RAW.RATINGS_CSV, nrows=5)
    print(f"\n[ratings.csv]")
    print(f"컬럼: {list(df_ratings_raw.columns)}")
    display(df_ratings_raw)

In [None]:
# TMDB 원본 데이터 (JSONL)
print("=" * 60)
print("TMDB 원본 데이터 (JSONL)")
print("=" * 60)

if RAW.TMDB_MOVIES_JSONL.exists():
    # 첫 번째 레코드만 읽어서 구조 확인
    with open(RAW.TMDB_MOVIES_JSONL, 'r') as f:
        first_line = f.readline()
        sample = json.loads(first_line)
    
    print(f"\n[tmdb_movies.jsonl] 최상위 키:")
    for key in sample.keys():
        val = sample[key]
        val_type = type(val).__name__
        if isinstance(val, str) and len(val) > 50:
            val = val[:50] + "..."
        elif isinstance(val, list) and len(val) > 3:
            val = val[:3] + ["..."]
        print(f"  - {key} ({val_type}): {val}")
else:
    print("TMDB JSONL 파일이 없습니다.")

In [None]:
# TMDB 샘플 레코드 전체 출력
if RAW.TMDB_MOVIES_JSONL.exists():
    print("[TMDB 샘플 레코드 전체]")
    print(json.dumps(sample, indent=2, ensure_ascii=False)[:3000])

---
## 2. 처리된 데이터 컬럼 확인

In [3]:
# Movie Catalog (Processed)
print("=" * 60)
print("Movie Catalog (Processed)")
print("=" * 60)

df_catalog = pd.read_parquet(PROCESSED.MOVIE_CATALOG_PARQUET)
print(f"\n총 영화 수: {len(df_catalog):,}")
print(f"\n컬럼 ({len(df_catalog.columns)}개):")
for col in df_catalog.columns:
    dtype = df_catalog[col].dtype
    non_null = df_catalog[col].notna().sum()
    pct = non_null / len(df_catalog) * 100
    print(f"  - {col:30s} ({dtype}) - {non_null:,} ({pct:.1f}%)")

Movie Catalog (Processed)

총 영화 수: 86,272

컬럼 (20개):
  - movieId                        (int64) - 86,272 (100.0%)
  - tmdbId                         (int64) - 86,272 (100.0%)
  - release_year                   (Int64) - 86,237 (100.0%)
  - original_title                 (object) - 86,272 (100.0%)
  - tagline                        (object) - 41,949 (48.6%)
  - overview                       (object) - 85,991 (99.7%)
  - produced_by_company_1          (object) - 75,547 (87.6%)
  - produced_by_company_2          (object) - 42,891 (49.7%)
  - produced_in_country_1          (object) - 81,610 (94.6%)
  - produced_in_country_2          (object) - 15,524 (18.0%)
  - spoken_language_1              (object) - 83,147 (96.4%)
  - spoken_language_2              (object) - 15,329 (17.8%)
  - actor_1                        (object) - 83,077 (96.3%)
  - actor_2                        (object) - 80,250 (93.0%)
  - actor_3                        (object) - 78,568 (91.1%)
  - director_1                 

In [4]:
# 카탈로그 샘플 확인
print("\n[카탈로그 샘플 (처음 5개)]")
display(df_catalog.head())


[카탈로그 샘플 (처음 5개)]


Unnamed: 0,movieId,tmdbId,release_year,original_title,tagline,overview,produced_by_company_1,produced_by_company_2,produced_in_country_1,produced_in_country_2,spoken_language_1,spoken_language_2,actor_1,actor_2,actor_3,director_1,writer_1,genre_1,genre_2,genre_3
0,2858,14,1999,American Beauty,... look closer,"Lester Burnham, a depressed suburban father in a mid-life crisis, decides to turn his hectic lif...",DreamWorks Pictures,Jinks/Cohen Company,United States of America,,English,,Kevin Spacey,Annette Bening,Thora Birch,Sam Mendes,Alan Ball,Drama,,
1,923,15,1941,Citizen Kane,Some called him a hero...others called him a heel.,Newspaper magnate Charles Foster Kane is taken from his mother as a boy and made the ward of a r...,Mercury Productions,RKO Radio Pictures,United States of America,,English,,Orson Welles,Joseph Cotten,Dorothy Comingore,Orson Welles,Orson Welles,Mystery,Drama,
2,1266,33,1992,Unforgiven,Some legends will never be forgotten. Some wrongs can never be forgiven.,"William Munny is a retired, once-ruthless killer turned gentle widower and hog farmer. To help s...",Warner Bros. Pictures,Malpaso Productions,United States of America,,English,,Clint Eastwood,Gene Hackman,Morgan Freeman,Clint Eastwood,David Webb Peoples,Western,,
3,3910,16,2000,Dancer in the Dark,"In a world of shadows, she found the light of life.","Selma, a Czech immigrant on the verge of blindness, struggles to make ends meet for herself and ...",Zentropa Entertainments,DR,Denmark,Finland,English,,Björk,Catherine Deneuve,David Morse,Lars von Trier,Lars von Trier,Drama,Crime,
4,43589,17,2005,The Dark,One of the living for one of the dead.,"In an attempt to pull her family together, Adèlle travels with her young daughter Sarah to Wales...",Constantin Film,Impact Pictures,Germany,United Kingdom,Welsh,English,Maria Bello,Sean Bean,Abigail Stone,John Fawcett,Stephen Massicotte,Horror,Thriller,Mystery


In [5]:
# Embeddings
print("=" * 60)
print("Movie Embeddings")
print("=" * 60)

if PROCESSED.MOVIE_EMBEDDINGS_PARQUET.exists():
    df_emb = pd.read_parquet(PROCESSED.MOVIE_EMBEDDINGS_PARQUET)
    print(f"총 영화 수: {len(df_emb):,}")
    print(f"컬럼: {list(df_emb.columns)}")
    print(f"임베딩 차원: {len(df_emb['embedding'].iloc[0])}")
else:
    print("임베딩 파일이 없습니다.")

Movie Embeddings
총 영화 수: 86,272
컬럼: ['movieId', 'embedding']
임베딩 차원: 300


In [6]:
# Filtered Ratings
print("=" * 60)
print("Filtered Ratings")
print("=" * 60)

if PROCESSED.RATINGS_PARQUET.exists():
    df_ratings = pd.read_parquet(PROCESSED.RATINGS_PARQUET)
    print(f"총 평점 수: {len(df_ratings):,}")
    print(f"유저 수: {df_ratings['userId'].nunique():,}")
    print(f"영화 수: {df_ratings['movieId'].nunique():,}")
    print(f"컬럼: {list(df_ratings.columns)}")
    display(df_ratings.head())
else:
    print("평점 파일이 없습니다.")

Filtered Ratings
총 평점 수: 13,717,662
유저 수: 200,948
영화 수: 54,520
컬럼: ['userId', 'movieId', 'rating', 'timestamp']


Unnamed: 0,userId,movieId,rating,timestamp
0,3,3248,4.0,1084486164
1,3,1957,5.0,1084486061
2,3,534,4.0,1084486058
3,3,2150,4.0,1084486055
4,3,26,4.0,1084486051


---
## 3. 영화 검색

In [7]:
def search_movie(query: str, max_results: int = 20):
    """
    영화 제목으로 검색합니다.
    
    Args:
        query: 검색어 (부분 일치)
        max_results: 최대 결과 수
    """
    title_col = 'original_title' if 'original_title' in df_catalog.columns else 'title'
    
    mask = df_catalog[title_col].str.contains(query, case=False, na=False)
    results = df_catalog[mask].head(max_results)
    
    print(f"'{query}' 검색 결과: {mask.sum()}개 중 {len(results)}개 표시")
    print("=" * 80)
    
    if len(results) == 0:
        print("검색 결과가 없습니다.")
        return None
    
    # 간단한 목록 출력
    display_cols = [title_col, 'release_year', 'genre_1', 'director_1']
    display_cols = [c for c in display_cols if c in results.columns]
    display(results[display_cols])
    
    return results

In [26]:
# 검색 예시
search_movie("The Lord of the Rings")

'The Lord of the Rings' 검색 결과: 6개 중 6개 표시


Unnamed: 0,original_title,release_year,genre_1,director_1
86,The Lord of the Rings: The Fellowship of the Ring,2001,Adventure,Peter Jackson
87,The Lord of the Rings: The Two Towers,2002,Adventure,Peter Jackson
88,The Lord of the Rings: The Return of the King,2003,Adventure,Peter Jackson
89,The Lord of the Rings,1978,Adventure,Ralph Bakshi
69829,Passage to Middle-Earth: The Making of 'The Lord of the Rings',2001,Documentary,
79228,"J.R.R. Tolkien and the Birth of ""The Lord of the Rings"" and ""The Hobbit""",2004,Documentary,


Unnamed: 0,movieId,tmdbId,release_year,original_title,tagline,overview,produced_by_company_1,produced_by_company_2,produced_in_country_1,produced_in_country_2,spoken_language_1,spoken_language_2,actor_1,actor_2,actor_3,director_1,writer_1,genre_1,genre_2,genre_3
86,4993,120,2001,The Lord of the Rings: The Fellowship of the Ring,One ring to rule them all.,"Young hobbit Frodo Baggins, after inheriting a mysterious ring from his uncle Bilbo, must leave ...",New Line Cinema,WingNut Films,New Zealand,United States of America,English,,Elijah Wood,Ian McKellen,Viggo Mortensen,Peter Jackson,Philippa Boyens,Adventure,Fantasy,Action
87,5952,121,2002,The Lord of the Rings: The Two Towers,The journey continues.,Frodo Baggins and the other members of the Fellowship continue on their sacred quest to destroy ...,New Line Cinema,WingNut Films,New Zealand,United States of America,English,,Elijah Wood,Ian McKellen,Viggo Mortensen,Peter Jackson,Fran Walsh,Adventure,Fantasy,Action
88,7153,122,2003,The Lord of the Rings: The Return of the King,There can be no triumph without loss. No victory without suffering. No freedom without sacrifice.,"As armies mass for a final battle that will decide the fate of the world--and powerful, ancient ...",New Line Cinema,WingNut Films,New Zealand,United States of America,English,,Elijah Wood,Ian McKellen,Viggo Mortensen,Peter Jackson,Peter Jackson,Adventure,Fantasy,Action
89,2116,123,1978,The Lord of the Rings,Fantasy...beyond your imagination,The Fellowship of the Ring embark on a journey to destroy the One Ring and end Sauron's reign ov...,Fantasy Films,Bakshi Productions,United Kingdom,United States of America,English,,Christopher Guard,William Squire,Michael Scholes,Ralph Bakshi,Peter S. Beagle,Adventure,Animation,Fantasy
69829,231829,453779,2001,Passage to Middle-Earth: The Making of 'The Lord of the Rings',,The making of Peter Jackson's adaptation of Tolkien's classic work.,Highway,New Line Television,United States of America,,English,,Elijah Wood,Peter Jackson,Ian McKellen,,,Documentary,TV Movie,
79228,267392,651342,2004,"J.R.R. Tolkien and the Birth of ""The Lord of the Rings"" and ""The Hobbit""",This program will give you a real insight into the man behind the legends of “THE LORD OF THE RI...,In a house in Oxford lived a remarkable man called J.R.R. Tolkien who told stories that thrilled...,,,,,English,,,,,,,Documentary,,


In [28]:
search_movie("Hobbit")

'Hobbit' 검색 결과: 5개 중 5개 표시


Unnamed: 0,original_title,release_year,genre_1,director_1
22713,The Hobbit: An Unexpected Journey,2012,Adventure,Peter Jackson
25812,The Hobbit: The Desolation of Smaug,2013,Fantasy,Peter Jackson
37027,J. R. R. Tolkien's The Hobbit,1967,Animation,Gene Deitch
41063,The Hobbit: The Battle of the Five Armies,2014,Action,Peter Jackson
79228,"J.R.R. Tolkien and the Birth of ""The Lord of the Rings"" and ""The Hobbit""",2004,Documentary,


Unnamed: 0,movieId,tmdbId,release_year,original_title,tagline,overview,produced_by_company_1,produced_by_company_2,produced_in_country_1,produced_in_country_2,spoken_language_1,spoken_language_2,actor_1,actor_2,actor_3,director_1,writer_1,genre_1,genre_2,genre_3
22713,98809,49051,2012,The Hobbit: An Unexpected Journey,From the smallest beginnings come the greatest legends.,"Bilbo Baggins, a hobbit enjoying his quiet life, is swept into an epic quest by Gandalf the Grey...",New Line Cinema,Metro-Goldwyn-Mayer,New Zealand,United States of America,English,,Martin Freeman,Ian McKellen,Richard Armitage,Peter Jackson,Fran Walsh,Adventure,Fantasy,Action
25812,106489,57158,2013,The Hobbit: The Desolation of Smaug,Beyond darkness... beyond desolation... lies the greatest danger of all.,"The Dwarves, Bilbo and Gandalf have successfully escaped the Misty Mountains, and Bilbo has gain...",New Line Cinema,Metro-Goldwyn-Mayer,New Zealand,United States of America,English,,Ian McKellen,Martin Freeman,Richard Armitage,Peter Jackson,Philippa Boyens,Fantasy,Adventure,Action
37027,249698,99457,1967,J. R. R. Tolkien's The Hobbit,,The very first movie adaptation of J.R.R Tolkien's The Hobbit. Made in just 30 days so that the ...,Rembrandt Films,,Czechoslovakia,United States of America,English,,Herbert Lass,,,Gene Deitch,Gene Deitch,Animation,Fantasy,Adventure
41063,118696,122917,2014,The Hobbit: The Battle of the Five Armies,Will you follow me... one last time?,"Following Smaug's attack on Laketown, Bilbo and the dwarves try to defend Erebor's mountain of t...",New Line Cinema,Metro-Goldwyn-Mayer,New Zealand,United States of America,English,,Ian McKellen,Martin Freeman,Richard Armitage,Peter Jackson,Peter Jackson,Action,Adventure,Fantasy
79228,267392,651342,2004,"J.R.R. Tolkien and the Birth of ""The Lord of the Rings"" and ""The Hobbit""",This program will give you a real insight into the man behind the legends of “THE LORD OF THE RI...,In a house in Oxford lived a remarkable man called J.R.R. Tolkien who told stories that thrilled...,,,,,English,,,,,,,Documentary,,


In [10]:
search_movie("Godfather")

'Godfather' 검색 결과: 15개 중 15개 표시


Unnamed: 0,original_title,release_year,genre_1,director_1
187,The Godfather,1972,Drama,Francis Ford Coppola
189,The Godfather Part II,1974,Drama,Francis Ford Coppola
191,The Godfather Part III,1990,Crime,Francis Ford Coppola
13526,3 Godfathers,1948,Western,John Ford
25491,The Godfather Family: A Look Inside,1990,Documentary,Jeff Werner
26353,Herschell Gordon Lewis: The Godfather of Gore,2010,Documentary,Frank Henenlotter
31683,Disco Godfather,1979,Action,J. Robert Wagoner
32664,The Black Godfather,1974,Action,John Evans
44807,Three Godfathers,1936,Action,Richard Boleslawski
50663,The Rockford Files: Godfather Knows Best,1996,Crime,Tony Wharmby


Unnamed: 0,movieId,tmdbId,release_year,original_title,tagline,overview,produced_by_company_1,produced_by_company_2,produced_in_country_1,produced_in_country_2,spoken_language_1,spoken_language_2,actor_1,actor_2,actor_3,director_1,writer_1,genre_1,genre_2,genre_3
187,858,238,1972,The Godfather,An offer you can't refuse.,"Spanning the years 1945 to 1955, a chronicle of the fictional Italian-American Corleone crime fa...",Paramount Pictures,Alfran Productions,United States of America,,English,Italian,Marlon Brando,Al Pacino,James Caan,Francis Ford Coppola,Mario Puzo,Drama,Crime,
189,1221,240,1974,The Godfather Part II,The rise and fall of the Corleone empire.,"In the continuing saga of the Corleone crime family, a young Vito Corleone grows up in Sicily an...",Paramount Pictures,The Coppola Company,United States of America,,English,Italian,Al Pacino,Robert Duvall,Diane Keaton,Francis Ford Coppola,Mario Puzo,Drama,Crime,
191,2023,242,1990,The Godfather Part III,All the power on earth can't change destiny.,"In the midst of trying to legitimize his business dealings in 1979 New York and Italy, aging maf...",Paramount Pictures,American Zoetrope,United States of America,,English,Italian,Al Pacino,Diane Keaton,Talia Shire,Francis Ford Coppola,Mario Puzo,Crime,Drama,Thriller
13526,25934,29244,1948,3 Godfathers,John Ford's Legend of the Southwest!,Three outlaws on the run discover a dying woman and her baby. They swear to bring the infant to ...,Argosy Pictures,Metro-Goldwyn-Mayer,United States of America,,Spanish,English,John Wayne,Pedro Armendáriz,"Harry Carey, Jr.",John Ford,Robert Nathan,Western,,
25491,188173,56205,1990,The Godfather Family: A Look Inside,,"A documentary on the making of the three Godfather films, with interviews and recollections from...",Paramount Pictures,American Zoetrope,,,English,Italian,Francis Ford Coppola,Mario Puzo,James Caan,Jeff Werner,David Gilbert,Documentary,TV Movie,
26353,176537,58862,2010,Herschell Gordon Lewis: The Godfather of Gore,,Take an outrageous ride through this wild world of exploitation films with this thoroughly enter...,Something Weird Video (SWV),,United States of America,,English,,Herschell Gordon Lewis,John Waters,Ray Sager,Frank Henenlotter,,Documentary,Horror,
31683,106704,77403,1979,Disco Godfather,Touch him and you're dust!,Retired cop and celebrity DJ Tucker Williams (aka The Disco Godfather) takes to the streets as a...,Generation International,,United States of America,,English,,Rudy Ray Moore,Carol Speed,Jimmy Lynch,J. Robert Wagoner,J. Robert Wagoner,Action,Comedy,Crime
32664,123880,80755,1974,The Black Godfather,There's a new godfather in town...,The heroes in The Black Godfather are members of an African-American criminal organization. Like...,Cougnar Productions,,United States of America,,English,,Rod Perry,Damu King,Don Chastain,John Evans,John Evans,Action,Crime,Thriller
44807,124791,149955,1936,Three Godfathers,On the brink of hell...a baby's smile came into the darkened souls of three bad men!,"In a town called New Jerusalem, three bandits hold up a bank. After a gun battle with the townsp...",Metro-Goldwyn-Mayer,,United States of America,,English,,Chester Morris,Lewis Stone,Walter Brennan,Richard Boleslawski,Ainsworth Morgan,Action,Drama,Western
50663,217963,208391,1996,The Rockford Files: Godfather Knows Best,,"Jim Rockford tries to help his godson, son of Denis Becker, rise out of poverty and homelessness...",,,,,English,,James Garner,Stuart Margolin,Maxwell Caulfield,Tony Wharmby,,Crime,Mystery,TV Movie


---
## 4. 특정 영화 상세 정보

In [11]:
def show_movie_detail(query: str):
    """
    특정 영화의 전체 카탈로그 정보를 표시합니다.
    
    Args:
        query: 영화 제목 (부분 일치, 첫 번째 결과 사용)
    """
    title_col = 'original_title' if 'original_title' in df_catalog.columns else 'title'
    
    mask = df_catalog[title_col].str.contains(query, case=False, na=False)
    
    if mask.sum() == 0:
        print(f"'{query}'에 해당하는 영화를 찾을 수 없습니다.")
        return None
    
    movie = df_catalog[mask].iloc[0]
    
    print("=" * 80)
    print(f"{movie[title_col]} ({movie.get('release_year', 'N/A')})")
    print("=" * 80)
    
    # 모든 필드 출력
    for col in df_catalog.columns:
        val = movie[col]
        if pd.notna(val) and val != '':
            val_str = str(val)
            if len(val_str) > 100:
                val_str = val_str[:100] + "..."
            print(f"  {col:30s}: {val_str}")
    
    return movie

In [12]:
# 상세 정보 예시
show_movie_detail("Toy Story")

Toy Story (1995)
  movieId                       : 1
  tmdbId                        : 862
  release_year                  : 1995
  original_title                : Toy Story
  tagline                       : Hang on for the comedy that goes to infinity and beyond!
  overview                      : Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto ...
  produced_by_company_1         : Pixar
  produced_in_country_1         : United States of America
  spoken_language_1             : English
  actor_1                       : Tom Hanks
  actor_2                       : Tim Allen
  actor_3                       : Don Rickles
  director_1                    : John Lasseter
  writer_1                      : Joss Whedon
  genre_1                       : Family
  genre_2                       : Comedy
  genre_3                       : Animation


movieId                                                                                                                    1
tmdbId                                                                                                                   862
release_year                                                                                                            1995
original_title                                                                                                     Toy Story
tagline                                                             Hang on for the comedy that goes to infinity and beyond!
overview                 Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear o...
produced_by_company_1                                                                                                  Pixar
produced_by_company_2                                                                                                   None


In [13]:
show_movie_detail("Inception")

Inception (2010)
  movieId                       : 79132
  tmdbId                        : 27205
  release_year                  : 2010
  original_title                : Inception
  tagline                       : Your mind is the scene of the crime.
  overview                      : Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his target...
  produced_by_company_1         : Legendary Pictures
  produced_by_company_2         : Syncopy
  produced_in_country_1         : United Kingdom
  produced_in_country_2         : United States of America
  spoken_language_1             : English
  spoken_language_2             : French
  actor_1                       : Leonardo DiCaprio
  actor_2                       : Joseph Gordon-Levitt
  actor_3                       : Ken Watanabe
  director_1                    : Christopher Nolan
  writer_1                      : Christopher Nolan
  genre_1                       : Action
  genre_2                 

movieId                                                                                                                79132
tmdbId                                                                                                                 27205
release_year                                                                                                            2010
original_title                                                                                                     Inception
tagline                                                                                 Your mind is the scene of the crime.
overview                 Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his ta...
produced_by_company_1                                                                                     Legendary Pictures
produced_by_company_2                                                                                                Syncopy


In [14]:
show_movie_detail("The Dark Knight")

The Dark Knight (2008)
  movieId                       : 58559
  tmdbId                        : 155
  release_year                  : 2008
  original_title                : The Dark Knight
  tagline                       : Welcome to a world without rules.
  overview                      : Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney ...
  produced_by_company_1         : Warner Bros. Pictures
  produced_by_company_2         : Legendary Pictures
  produced_in_country_1         : United Kingdom
  produced_in_country_2         : United States of America
  spoken_language_1             : English
  spoken_language_2             : Mandarin
  actor_1                       : Christian Bale
  actor_2                       : Heath Ledger
  actor_3                       : Aaron Eckhart
  director_1                    : Christopher Nolan
  writer_1                      : Christopher Nolan
  genre_1                       : Drama
  genre_2     

movieId                                                                                                                58559
tmdbId                                                                                                                   155
release_year                                                                                                            2008
original_title                                                                                               The Dark Knight
tagline                                                                                    Welcome to a world without rules.
overview                 Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attor...
produced_by_company_1                                                                                  Warner Bros. Pictures
produced_by_company_2                                                                                     Legendary Pictures


---
## 5. 카테고리별 분포 확인

In [15]:
# 장르 분포
print("[장르 분포 (genre_1)]")
print(df_catalog['genre_1'].value_counts().head(20))

[장르 분포 (genre_1)]
genre_1
Drama              22256
Comedy             16611
Documentary         9073
Action              6326
Horror              5755
Animation           3555
Thriller            3522
Crime               3223
Romance             2777
Adventure           2301
Science Fiction     1649
Family              1468
Music               1154
Mystery             1144
Western             1140
Fantasy             1124
TV Movie            1025
War                  777
History              596
Name: count, dtype: int64


In [16]:
# 연도 분포
print("[연도 분포]")
year_counts = df_catalog['release_year'].value_counts().sort_index()
print(f"연도 범위: {year_counts.index.min()} ~ {year_counts.index.max()}")
print(f"\n최근 10년:")
print(year_counts.tail(10))

[연도 분포]
연도 범위: 1874 ~ 2025

최근 10년:
release_year
2016    3134
2017    3256
2018    3181
2019    3114
2020    2497
2021    2362
2022    2118
2023    1040
2024      35
2025      13
Name: count, dtype: Int64


In [17]:
# 제작 국가 분포
if 'produced_in_country_1' in df_catalog.columns:
    print("[제작 국가 분포]")
    print(df_catalog['produced_in_country_1'].value_counts().head(15))

[제작 국가 분포]
produced_in_country_1
United States of America    34722
United Kingdom               6052
France                       5529
Canada                       3900
Japan                        3035
Italy                        2994
Germany                      2731
India                        2361
Spain                        1443
Australia                    1132
Russia                       1080
South Korea                  1031
Hong Kong                    1016
Soviet Union                 1003
Belgium                       974
Name: count, dtype: int64


In [18]:
# 다작 감독
if 'director_1' in df_catalog.columns:
    print("[다작 감독 TOP 20]")
    print(df_catalog['director_1'].value_counts().head(20))

[다작 감독 TOP 20]
director_1
Georges Méliès      118
Chuck Jones         102
Friz Freleng        100
Tex Avery            86
Michael Curtiz       77
John Ford            70
Richard Thorpe       64
Werner Herzog        62
Jean-Luc Godard      62
Charlie Chaplin      62
David DeCoteau       60
Jack Hannah          58
Gordon Douglas       56
William Beaudine     56
Alfred Hitchcock     56
Raoul Walsh          56
Henry Hathaway       55
Lloyd Bacon          55
Mervyn LeRoy         54
D.W. Griffith        53
Name: count, dtype: int64


In [19]:
# 다작 배우
if 'actor_1' in df_catalog.columns:
    print("[다작 배우 TOP 20]")
    print(df_catalog['actor_1'].value_counts().head(20))

[다작 배우 TOP 20]
actor_1
Mel Blanc           232
Clarence Nash       116
John Wayne          106
Nicolas Cage         78
Charlie Chaplin      76
Georges Méliès       72
Jackie Chan          66
Akshay Kumar         65
Pinto Colvig         61
Robert De Niro       59
Stan Laurel          58
Burt Reynolds        57
Isabelle Huppert     57
Barbara Stanwyck     57
Bing Crosby          56
Michael Caine        56
Alberto Sordi        56
Gérard Depardieu     54
Ajay Devgn           54
Leo Gorcey           54
Name: count, dtype: int64


---
## 6. 직접 검색

In [21]:
# 원하는 영화 제목을 입력하세요
search_movie("Avengers")

'Avengers' 검색 결과: 11개 중 11개 표시


Unnamed: 0,original_title,release_year,genre_1,director_1
3473,The Avengers,1998,Action,Jeremiah S. Chechik
6899,Ultimate Avengers 2,2006,Adventure,Will Meugniot
7028,Next Avengers: Heroes of Tomorrow,2008,Animation,Jay Oliva
11084,The Avengers,2012,Science Fiction,Joss Whedon
37097,Avengers: Age of Ultron,2015,Action,Joss Whedon
47852,Scavengers,2013,Science Fiction,Travis Zariwny
58944,Avengers: Endgame,2019,Adventure,Anthony Russo
58945,Avengers: Infinity War,2018,Adventure,Joe Russo
60460,Avengers Grimm,2015,Action,Jeremy M. Inman
63877,LEGO Marvel Super Heroes: Avengers Reassembled!,2015,Family,Rob Silvestri


Unnamed: 0,movieId,tmdbId,release_year,original_title,tagline,overview,produced_by_company_1,produced_by_company_2,produced_in_country_1,produced_in_country_2,spoken_language_1,spoken_language_2,actor_1,actor_2,actor_3,director_1,writer_1,genre_1,genre_2,genre_3
3473,2153,9320,1998,The Avengers,Saving the World in Style.,"British Ministry agent John Steed, under direction from ""Mother"", investigates a diabolical plot...",Jerry Weintraub Productions,Warner Bros. Pictures,United States of America,,English,Spanish,Ralph Fiennes,Uma Thurman,Sean Connery,Jeremiah S. Chechik,Don MacPherson,Action,Adventure,Thriller
6899,170297,14611,2006,Ultimate Avengers 2,"To save humanity, the Earth's mightiest heroes must reunite for a rematch of heroic proportions.","Mysterious Wakanda lies in the darkest heart of Africa, unknown to most of the world. An isolate...",MLG Productions 2,Lionsgate,United States of America,,English,,Justin Gross,Jeffrey D. Sams,Grey DeLisle,Will Meugniot,Greg Johnson,Adventure,Animation,Action
7028,135979,14613,2008,Next Avengers: Heroes of Tomorrow,The children of heroes past are our only hope for the future!,The children of the Avengers hone their powers and go head to head with the very enemy responsib...,MLG Productions 5,Lionsgate,United States of America,,English,,Noah Crawford,Brenna O'Brien,Aidan Drummond,Jay Oliva,Christopher L. Yost,Animation,Family,Action
11084,89745,24428,2012,The Avengers,Some assembly required.,"When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director o...",Marvel Studios,,United States of America,,English,Hindi,Robert Downey Jr.,Chris Evans,Mark Ruffalo,Joss Whedon,Joss Whedon,Science Fiction,Action,Adventure
37097,122892,99861,2015,Avengers: Age of Ultron,A new age has come.,"When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mi...",Marvel Studios,,United States of America,,English,,Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo,Joss Whedon,Joss Whedon,Action,Adventure,Science Fiction
47852,169616,179105,2013,Scavengers,Survive the void.,A team of space scavengers discovers superior alien technology that threatens the balance of the...,California Pictures,,United States of America,,English,,John Lee Ames,Roark Critchlow,Brett Curtze,Travis Zariwny,Travis Zariwny,Science Fiction,Action,
58944,122914,299534,2019,Avengers: Endgame,Avenge the fallen.,"After the devastating events of Avengers: Infinity War, the universe is in ruins due to the effo...",Marvel Studios,,United States of America,,English,Japanese,Robert Downey Jr.,Chris Evans,Mark Ruffalo,Anthony Russo,Stephen McFeely,Adventure,Science Fiction,Action
58945,122912,299536,2018,Avengers: Infinity War,Destiny arrives all the same.,As the Avengers and their allies have continued to protect the world from threats too large for ...,Marvel Studios,,United States of America,,English,Xhosa,Robert Downey Jr.,Chris Evans,Chris Hemsworth,Joe Russo,Stephen McFeely,Adventure,Action,Science Fiction
60460,136257,323660,2015,Avengers Grimm,A battle of legendary proportions.,"When Rumpelstiltskin destroys the Magic Mirror and escapes to the modern world, the four princes...",The Asylum,,United States of America,,English,,Casper Van Dien,Lou Ferrigno,Kimo Leopoldo,Jeremy M. Inman,Jeremy M. Inman,Action,Fantasy,
63877,187221,368304,2015,LEGO Marvel Super Heroes: Avengers Reassembled!,,The Avengers are forced to “party” with Ultron when he seeks to disassemble the team by taking c...,Arc Productions,Marvel Entertainment,United States of America,,English,,Laura Bailey,Troy Baker,Eric Bauza,Rob Silvestri,Mark Hoffmeier,Family,Animation,


In [22]:
# 상세 정보 확인
show_movie_detail("Avengers: Endgame")

Avengers: Endgame (2019)
  movieId                       : 122914
  tmdbId                        : 299534
  release_year                  : 2019
  original_title                : Avengers: Endgame
  tagline                       : Avenge the fallen.
  overview                      : After the devastating events of Avengers: Infinity War, the universe is in ruins due to the efforts ...
  produced_by_company_1         : Marvel Studios
  produced_in_country_1         : United States of America
  spoken_language_1             : English
  spoken_language_2             : Japanese
  actor_1                       : Robert Downey Jr.
  actor_2                       : Chris Evans
  actor_3                       : Mark Ruffalo
  director_1                    : Anthony Russo
  writer_1                      : Stephen McFeely
  genre_1                       : Adventure
  genre_2                       : Science Fiction
  genre_3                       : Action


movieId                                                                                                               122914
tmdbId                                                                                                                299534
release_year                                                                                                            2019
original_title                                                                                             Avengers: Endgame
tagline                                                                                                   Avenge the fallen.
overview                 After the devastating events of Avengers: Infinity War, the universe is in ruins due to the effo...
produced_by_company_1                                                                                         Marvel Studios
produced_by_company_2                                                                                                   None


In [23]:
print("Done!")

Done!
