In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import json
from collections import Counter, defaultdict
import os

# Check if files exist


if not (os.path.exists(movies_file) and os.path.exists(credits_file)):
    raise FileNotFoundError("Ensure 'tmdb_5000_movies.csv' and 'tmdb_5000_credits.csv' are in the working directory.")

# Load and merge datasets
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), on='id')

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# Question 1: Total movies in top 3 genres
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# Question 2: Revenue of fifth most prolific production company
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# Question 3: Top actor's most frequent production company
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# Question 4: Total movies by artistic pairs (≥7 shared movies)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# Write output
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

NameError: name 'movies_file' is not defined

In [None]:
import pandas as pd
import json
from collections import Counter, defaultdict
import os

# Check if files exist
movies_file = '/content/drive/MyDrive/imdb.csv'
credits_file ='/content/drive/MyDrive/tmdb_5000_credits.csv'

if not (os.path.exists(movies_file) and os.path.exists(credits_file)):
    raise FileNotFoundError("Ensure 'tmdb_5000_movies.csv' and 'tmdb_5000_credits.csv' are in the working directory.")

# Load and merge datasets
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), on='id')

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# Question 1: Total movies in top 3 genres
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# Question 2: Revenue of fifth most prolific production company
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# Question 3: Top actor's most frequent production company
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# Question 4: Total movies by artistic pairs (≥7 shared movies)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# Write output
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

In [None]:
import pandas as pd
import json
from collections import Counter, defaultdict
import os

# Define file paths
movies_file = 'tmdb_5000_movies.csv'  # Corrected from imdb.csv
credits_file = 'tmdb_5000_credits.csv'

# Check if files exist
if not os.path.exists(movies_file):
    raise FileNotFoundError(f"File not found: {movies_file}. Ensure it is in the working directory or mount Google Drive.")
if not os.path.exists(credits_file):
    raise FileNotFoundError(f"File not found: {credits_file}. Ensure it is in the working directory or mount Google Drive.")

# Load and merge datasets
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), on='id')

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# Question 1: Total movies in top 3 genres
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# Question 2: Revenue of fifth most prolific production company
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# Question 3: Top actor's most frequent production company
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# Question 4: Total movies by artistic pairs (≥7 shared movies)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# Write output
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

In [None]:
import pandas as pd
import json
from collections import Counter, defaultdict
import os

# Check if files exist
movies_file = '/content/drive/MyDrive/imdb.csv'
credits_file ='/content/drive/MyDrive/tmdb_5000_credits.csv'

if not (os.path.exists(movies_file) and os.path.exists(credits_file)):
    raise FileNotFoundError("Ensure 'tmdb_5000_movies.csv' and 'tmdb_5000_credits.csv' are in the working directory.")

# Load and merge datasets
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), on='id')

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# Question 1: Total movies in top 3 genres
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# Question 2: Revenue of fifth most prolific production company
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# Question 3: Top actor's most frequent production company
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# Question 4: Total movies by artistic pairs (≥7 shared movies)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# Write output
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

In [None]:
import pandas as pd
import json
from collections import Counter, defaultdict
import os

# تعریف مسیر فایل‌ها
movies_file = 'tmdb_5000_movies.csv'  # اصلاح از imdb.csv
credits_file = 'tmdb_5000_credits.csv'

# بررسی وجود فایل‌ها
if not os.path.exists(movies_file):
    raise FileNotFoundError(f"فایل {movies_file} پیدا نشد. مطمئن شوید در دایرکتوری کاری موجود است یا Google Drive را متصل کنید.")
if not os.path.exists(credits_file):
    raise FileNotFoundError(f"فایل {credits_file} پیدا نشد. مطمئن شوید در دایرکتوری کاری موجود است یا Google Drive را متصل کنید.")

# بارگذاری و ادغام داده‌ها
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), on='id')

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# سؤال ۱: تعداد فیلم‌ها در سه ژانر برتر
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# سؤال ۲: درآمد شرکت پنجم در تولید فیلم
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# سؤال ۳: شرکت پرتکرار بازیگر برتر
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# سؤال ۴: مجموع فیلم‌های زوج‌های هنری (حداقل ۷ فیلم مشترک)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# نوشتن خروجی
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

In [None]:
# گام ۱: نصب و تنظیم Kaggle API (فرض: Drive متصل و kaggle.json آپلود شده)
from google.colab import drive
drive.mount('/content/drive')
!pip install kaggle
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/kaggle.json ~/.kaggle/  # مسیر kaggle.json را تنظیم کنید
!chmod 600 ~/.kaggle/kaggle.json

# گام ۲: دانلود و استخراج دیتاست
!kaggle datasets download -d tmdb/tmdb-movie-metadata -p /content/
import zipfile
with zipfile.ZipFile('/content/tmdb-movie-metadata.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# گام ۳: بارگذاری و تحلیل (کد اصلی شما)
import pandas as pd
import json
from collections import Counter, defaultdict
import os

movies_file = '/content/tmdb_5000_movies.csv'
credits_file = '/content/tmdb_5000_credits.csv'

# بررسی وجود فایل‌ها (حالا باید موجود باشند)
if not os.path.exists(movies_file):
    raise FileNotFoundError(f"فایل {movies_file} پیدا نشد.")
if not os.path.exists(credits_file):
    raise FileNotFoundError(f"فایل {credits_file} پیدا نشد.")

df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), on='id', suffixes=('', '_credits'))

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# سؤال ۱
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# سؤال ۲
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# سؤال ۳
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# سؤال ۴
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# خروجی
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

print("تحلیل کامل شد. فایل output.txt ایجاد شد.")

In [None]:
# گام ۱: دانلود مستقیم ZIP دیتاست از OSF.io (بدون نیاز به Kaggle)
!wget -O /content/tmdb-movie-metadata.zip https://osf.io/download/4wx2q/

# گام ۲: استخراج فایل‌ها
import zipfile
with zipfile.ZipFile('/content/tmdb-movie-metadata.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# گام ۳: بررسی فایل‌ها (اختیاری، برای تأیید)
!ls /content/ | grep tmdb

# گام ۴: بارگذاری و تحلیل (کد اصلی شما، با بهبودهای کوچک)
import pandas as pd
import json
from collections import Counter, defaultdict
import os

movies_file = '/content/tmdb_5000_movies.csv'
credits_file = '/content/tmdb_5000_credits.csv'

# بررسی وجود فایل‌ها
if not os.path.exists(movies_file):
    raise FileNotFoundError(f"فایل {movies_file} پیدا نشد. دانلود را چک کنید.")
if not os.path.exists(credits_file):
    raise FileNotFoundError(f"فایل {credits_file} پیدا نشد. دانلود را چک کنید.")

# بارگذاری و ادغام (توجه: در credits، ستون id به movie_id تغییر نام داده شده)
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), left_on='id', right_on='movie_id', suffixes=('', '_credits'))

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# سؤال ۱: تعداد فیلم‌ها در سه ژانر برتر
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# سؤال ۲: درآمد شرکت پنجم در تولید فیلم
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# سؤال ۳: شرکت پرتکرار بازیگر برتر
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# سؤال ۴: مجموع فیلم‌های زوج‌های هنری (حداقل ۷ فیلم مشترک)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# خروجی
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

print("تحلیل کامل شد! فایل output.txt ایجاد شد.")
print(f"خروجی:\n{num1}\n{num2}\n'{top_comp}', {num3}\n{total4}")

In [None]:
# Step 1: Download ZIP dataset directly from OSF.io (no Kaggle required)
!wget -O /content/tmdb-movie-metadata.zip https://osf.io/download/4wx2q/

# Step 2: Extract files
import zipfile
with zipfile.ZipFile('/content/tmdb-movie-metadata.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Step 3: Check files (optional, for verification)
!ls /content/ | grep tmdb

# Step 4: Load and analyze (original code with minor improvements)
import pandas as pd
import json
from collections import Counter, defaultdict
import os

movies_file = '/content/tmdb_5000_movies.csv'
credits_file = '/content/tmdb_5000_credits.csv'

# Check if files exist
if not os.path.exists(movies_file):
    raise FileNotFoundError(f"File {movies_file} not found. Check download.")
if not os.path.exists(credits_file):
    raise FileNotFoundError(f"File {credits_file} not found. Check download.")

# Load and merge (note: credits uses movie_id instead of id)
df = pd.read_csv(movies_file).merge(pd.read_csv(credits_file), left_on='id', right_on='movie_id', suffixes=('', '_credits'))

def get_list(row):
    return [d['name'] for d in json.loads(row)] if pd.notna(row) else []

# Question 1: Count movies in top 3 genres
genres_flat = [g for row in df['genres'] for g in get_list(row)]
genre_cnt = Counter(genres_flat)
top3 = [g for g, _ in genre_cnt.most_common(3)]
num1 = sum(any(g in get_list(row['genres']) for g in top3) for _, row in df.iterrows())

# Question 2: Revenue of fifth most prolific production company
comp_flat = [c for row in df['production_companies'] for c in get_list(row)]
comp_cnt = Counter(comp_flat)
sorted_comp = [c for c, _ in comp_cnt.most_common()]
fifth = sorted_comp[4] if len(sorted_comp) >= 5 else None
if fifth:
    total_rev = sum(row['revenue'] / len(get_list(row['production_companies']))
                    for _, row in df.iterrows() if fifth in get_list(row['production_companies']))
    num2 = int(total_rev)
else:
    num2 = 0

# Question 3: Most frequent company for top actor
actor_flat = [a for row in df['cast'] for a in get_list(row)]
actor_cnt = Counter(actor_flat)
sorted_actors = sorted(actor_cnt, key=lambda x: (-actor_cnt[x], x))
first_actor = sorted_actors[0] if sorted_actors else None
if first_actor:
    actor_movies = df[df['cast'].apply(lambda x: first_actor in get_list(x))]
    comp_actor = Counter(c for _, row in actor_movies.iterrows() for c in get_list(row['production_companies']))
    top_comp, num3 = comp_actor.most_common(1)[0] if comp_actor else ('Unknown', 0)
else:
    top_comp, num3 = 'Unknown', 0

# Question 4: Total movies by artistic pairs (≥7 shared movies)
pair_cnt = defaultdict(int)
for _, row in df.iterrows():
    cast_l = get_list(row['cast'])
    for i in range(len(cast_l)):
        for j in range(i+1, len(cast_l)):
            pair = tuple(sorted([cast_l[i], cast_l[j]]))
            pair_cnt[pair] += 1
total4 = sum(v for v in pair_cnt.values() if v >= 7)

# Output
with open('output.txt', 'w') as f:
    print(num1, num2, f"'{top_comp}', {num3}", total4, sep='\n', file=f)

print("Analysis completed! output.txt created.")
print(f"Output:\n{num1}\n{num2}\n'{top_comp}', {num3}\n{total4}")