In [31]:
# ÉTAPE 1 — Conversion TSV -> CSV (par chunks, mémoire safe)

import os
import pandas as pd

# === À ADAPTER ===
DATA_DIR   = "data/imdb"    # <-- mets ton chemin
OUTPUT_DIR = os.path.join(DATA_DIR, "csv_out")
CHUNK_SIZE = 500_000                   # nb de lignes par chunk (ajuste si besoin)

os.makedirs(OUTPUT_DIR, exist_ok=True)

# Fichiers attendus (convertira seulement ceux trouvés)
FILES = [
    "title.basics.tsv",
    "title.akas.tsv",
    "title.crew.tsv",
    "title.episode.tsv",
    "title.principals.tsv",
    "name.basics.tsv",
    "title.ratings.tsv"
]

NA_TOKEN = r"\N"

def tsv_to_csv(tsv_path: str, csv_path: str, chunk_size: int = CHUNK_SIZE):
    print(f"\n-> Conversion: {os.path.basename(tsv_path)}")
    first = True
    # lecture par chunks pour limiter la RAM
    for i, chunk in enumerate(pd.read_csv(
        tsv_path,
        sep="\t",
        na_values=[NA_TOKEN, ""],
        keep_default_na=False,
        low_memory=False,
        chunksize=chunk_size,
        encoding="utf-8",
        on_bad_lines="skip"
    )):
        mode = "w" if first else "a"
        header = first
        chunk.to_csv(csv_path, index=False, mode=mode, header=header, encoding="utf-8")
        first = False
        print(f"  - chunk {i+1} écrit")

    print(f"OK -> {csv_path}")

for fname in FILES:
    in_path = os.path.join(DATA_DIR, fname)
    if not os.path.exists(in_path):
        print(f"(!) Introuvable, je passe: {fname}")
        continue
    out_path = os.path.join(OUTPUT_DIR, fname.replace(".tsv", ".csv"))
    tsv_to_csv(in_path, out_path)



-> Conversion: title.basics.tsv
  - chunk 1 écrit
  - chunk 2 écrit
  - chunk 3 écrit
  - chunk 4 écrit
  - chunk 5 écrit
  - chunk 6 écrit
  - chunk 7 écrit
  - chunk 8 écrit
  - chunk 9 écrit
  - chunk 10 écrit
  - chunk 11 écrit
  - chunk 12 écrit
  - chunk 13 écrit
  - chunk 14 écrit
  - chunk 15 écrit
  - chunk 16 écrit
  - chunk 17 écrit
  - chunk 18 écrit
  - chunk 19 écrit
  - chunk 20 écrit
  - chunk 21 écrit
  - chunk 22 écrit
  - chunk 23 écrit
  - chunk 24 écrit
OK -> data/imdb\csv_out\title.basics.csv

-> Conversion: title.akas.tsv
  - chunk 1 écrit
  - chunk 2 écrit
  - chunk 3 écrit
  - chunk 4 écrit
  - chunk 5 écrit
  - chunk 6 écrit
  - chunk 7 écrit
  - chunk 8 écrit
  - chunk 9 écrit
  - chunk 10 écrit
  - chunk 11 écrit
  - chunk 12 écrit
  - chunk 13 écrit
  - chunk 14 écrit
  - chunk 15 écrit
  - chunk 16 écrit
  - chunk 17 écrit
  - chunk 18 écrit
  - chunk 19 écrit
  - chunk 20 écrit
  - chunk 21 écrit
  - chunk 22 écrit
  - chunk 23 écrit
  - chunk 24 écrit
 

In [33]:
# ÉTAPE 2 — Aperçus rapides des CSV (échantillon léger)

import os
import pandas as pd

# === À ADAPTER ===
DATA_DIR   = "data/imdb/csv_out"
SAMPLE_N   = 200_000                         # lignes lues par fichier pour l'aperçu

pd.options.display.max_columns = 120
pd.options.display.width = 140

FILES = {
    "title.basics.csv":     {"extra_counts": {"titleType": 15, "isAdult": 5}},
    "title.akas.csv":       {"extra_counts": {"region": 15, "language": 15}},
    "name.basics.csv":      {"extra_counts": {"primaryProfession": 15}},
    "title.principals.csv": {"extra_counts": {"category": 15}},
    "title.crew.csv":       {"extra_counts": {}},
    "title.episode.csv":    {"extra_counts": {}},
    "title.ratings.csv":    {"extra_counts": {}},     # nouveau fichier
}

def quick_preview_csv(path: str, sample_n: int = SAMPLE_N, extra_counts: dict | None = None):
    name = os.path.basename(path)
    if not os.path.exists(path):
        print(f"(!) Introuvable, je passe : {name}")
        return None

    print(f"\n=== {name} ===")
    print(f"Taille fichier: {os.path.getsize(path)/1e6:.1f} Mo")

    # lecture d'un échantillon (rapide)
    df = pd.read_csv(path, nrows=sample_n, low_memory=False)
    print(f"Échantillon lu: {len(df):,} lignes  |  Colonnes: {len(df.columns)}")

    # aperçu head
    print("\nHead:")
    display(df.head(10))

    # % de valeurs manquantes (sur l'échantillon)
    na_pct = (df.isna().mean() * 100).sort_values(ascending=False)
    print("\nTaux de valeurs manquantes (échantillon) — top 20 :")
    display(na_pct.head(20).to_frame("na_%").round(2))

    # stats numériques simples
    num_cols = df.select_dtypes(include=["number"]).columns
    if len(num_cols):
        print("\nRésumé numérique (échantillon):")
        display(df[num_cols].describe().T[["count", "mean", "std", "min", "50%", "max"]])

    # value_counts utiles
    extra_counts = extra_counts or {}
    for col, topk in extra_counts.items():
        if col in df.columns:
            print(f"\nValue counts — {col} (top {topk}) :")
            display(df[col].value_counts(dropna=False).head(topk))

    return df


# ----- Lancer les aperçus -----
samples = {}
for fname, opts in FILES.items():
    path = os.path.join(DATA_DIR, fname)
    samples[fname] = quick_preview_csv(path, SAMPLE_N, opts.get("extra_counts", {}))


# ----- Petit bonus utile si ratings dispo : top des notes (échantillon) -----
if samples.get("title.ratings.csv") is not None and samples.get("title.basics.csv") is not None:
    rat = samples["title.ratings.csv"][["tconst", "averageRating", "numVotes"]].dropna()
    bas = samples["title.basics.csv"][["tconst", "primaryTitle", "startYear", "titleType"]]
    tmp = (rat.merge(bas, on="tconst", how="left")
              .query("numVotes >= 1000")  # seuil simple
              .sort_values(["averageRating","numVotes"], ascending=[False, False])
              .head(20))
    print("\nTop titres (échantillon) — note moyenne (>=1000 votes) :")
    display(tmp[["primaryTitle", "startYear", "titleType", "averageRating", "numVotes"]])



=== title.basics.csv ===
Taille fichier: 1031.0 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 9

Head:


Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,Carmencita,0,1894.0,,1.0,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,Le clown et ses chiens,0,1892.0,,5.0,"Animation,Short"
2,tt0000003,short,Poor Pierrot,Pauvre Pierrot,0,1892.0,,5.0,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,Un bon bock,0,1892.0,,12.0,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,Blacksmith Scene,0,1893.0,,1.0,Short
5,tt0000006,short,Chinese Opium Den,Chinese Opium Den,0,1894.0,,1.0,Short
6,tt0000007,short,Corbett and Courtney Before the Kinetograph,Corbett and Courtney Before the Kinetograph,0,1894.0,,1.0,"Short,Sport"
7,tt0000008,short,Edison Kinetoscopic Record of a Sneeze,Edison Kinetoscopic Record of a Sneeze,0,1894.0,,1.0,"Documentary,Short"
8,tt0000009,movie,Miss Jerry,Miss Jerry,0,1894.0,,45.0,Romance
9,tt0000010,short,Leaving the Factory,La sortie de l'usine Lumière à Lyon,0,1895.0,,1.0,"Documentary,Short"



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
endYear,95.64
runtimeMinutes,19.52
genres,8.6
startYear,0.12
tconst,0.0
isAdult,0.0
originalTitle,0.0
primaryTitle,0.0
titleType,0.0



Résumé numérique (échantillon):


Unnamed: 0,count,mean,std,min,50%,max
isAdult,200000.0,0.06548,0.247372,0.0,0.0,1.0
startYear,199762.0,1964.936059,26.893977,1892.0,1971.0,2025.0
endYear,8721.0,1984.521385,14.437767,1945.0,1987.0,2025.0
runtimeMinutes,160968.0,75.13612,42.293484,1.0,83.0,1620.0



Value counts — titleType (top 15) :


titleType
movie           124375
short            34772
tvMovie          12741
tvSeries         11151
video            10184
tvEpisode         3798
tvMiniSeries      1944
tvShort            446
videoGame          400
tvSpecial          189
Name: count, dtype: int64


Value counts — isAdult (top 5) :


isAdult
0    186904
1     13096
Name: count, dtype: int64


=== title.akas.csv ===
Taille fichier: 2476.2 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 8

Head:


Unnamed: 0,titleId,ordering,title,region,language,types,attributes,isOriginalTitle
0,tt0000001,1,Carmencita,,,original,,1
1,tt0000001,2,Carmencita,DE,,,literal title,0
2,tt0000001,3,Carmencita,US,,imdbDisplay,,0
3,tt0000001,4,Carmencita - spanyol tánc,HU,,imdbDisplay,,0
4,tt0000001,5,Καρμενσίτα,GR,,imdbDisplay,,0
5,tt0000001,6,Карменсита,RU,,imdbDisplay,,0
6,tt0000001,7,Карменсіта,UA,,imdbDisplay,,0
7,tt0000001,8,カルメンチータ,JP,ja,imdbDisplay,,0
8,tt0000002,1,Le clown et ses chiens,,,original,,1
9,tt0000002,2,A bohóc és kutyái,HU,,imdbDisplay,,0



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
attributes,93.54
language,86.81
region,17.75
types,15.21
title,0.0
ordering,0.0
titleId,0.0
isOriginalTitle,0.0



Résumé numérique (échantillon):


Unnamed: 0,count,mean,std,min,50%,max
ordering,200000.0,6.911085,7.703975,1.0,4.0,92.0
isOriginalTitle,200000.0,0.16552,0.37165,0.0,0.0,1.0



Value counts — region (top 15) :


region
NaN    35508
US     34076
GB     11249
BR      9338
FR      8614
DK      7470
ES      7441
JP      6609
DE      6577
SE      6275
PT      5456
IT      5005
HU      4881
GR      4627
CA      4014
Name: count, dtype: int64


Value counts — language (top 15) :


language
NaN    173619
en       6792
ja       5951
ru       3084
fr       2596
sr       2494
qbn      1486
tr        767
bg        549
sv        471
hr        340
sl        336
es        329
cmn       236
ca        193
Name: count, dtype: int64


=== name.basics.csv ===
Taille fichier: 865.7 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 6

Head:


Unnamed: 0,nconst,primaryName,birthYear,deathYear,primaryProfession,knownForTitles
0,nm0000001,Fred Astaire,1899.0,1987.0,"actor,miscellaneous,producer","tt0072308,tt0050419,tt0027125,tt0025164"
1,nm0000002,Lauren Bacall,1924.0,2014.0,"actress,soundtrack,archive_footage","tt0037382,tt0075213,tt0038355,tt0117057"
2,nm0000003,Brigitte Bardot,1934.0,,"actress,music_department,producer","tt0057345,tt0049189,tt0056404,tt0054452"
3,nm0000004,John Belushi,1949.0,1982.0,"actor,writer,music_department","tt0072562,tt0077975,tt0080455,tt0078723"
4,nm0000005,Ingmar Bergman,1918.0,2007.0,"writer,director,actor","tt0050986,tt0069467,tt0050976,tt0083922"
5,nm0000006,Ingrid Bergman,1915.0,1982.0,"actress,producer,soundtrack","tt0034583,tt0038109,tt0036855,tt0038787"
6,nm0000007,Humphrey Bogart,1899.0,1957.0,"actor,producer,miscellaneous","tt0034583,tt0043265,tt0033870,tt0037382"
7,nm0000008,Marlon Brando,1924.0,2004.0,"actor,director,writer","tt0078788,tt0068646,tt0047296,tt0070849"
8,nm0000009,Richard Burton,1925.0,1984.0,"actor,producer,director","tt0061184,tt0087803,tt0059749,tt0057877"
9,nm0000010,James Cagney,1899.0,1986.0,"actor,director,producer","tt0029870,tt0031867,tt0042041,tt0034236"



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
deathYear,83.74
birthYear,70.54
knownForTitles,2.34
primaryProfession,2.0
primaryName,0.0
nconst,0.0



Résumé numérique (échantillon):


Unnamed: 0,count,mean,std,min,50%,max
birthYear,58927.0,1936.126377,32.479109,450.0,1940.0,1999.0
deathYear,32514.0,1990.223996,33.723725,388.0,1996.0,2025.0



Value counts — primaryProfession (top 15) :


primaryProfession
actor                      40828
actress                    29362
miscellaneous               6502
writer                      4521
NaN                         4010
camera_department           3882
art_department              3103
actor,archive_footage       3020
actress,archive_footage     2571
sound_department            2314
producer                    2252
make_up_department          1889
actor,miscellaneous         1518
actor,soundtrack            1041
actor,director,writer        989
Name: count, dtype: int64


=== title.principals.csv ===
Taille fichier: 4220.3 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 6

Head:


Unnamed: 0,tconst,ordering,nconst,category,job,characters
0,tt0000001,1,nm1588970,self,,"[""Self""]"
1,tt0000001,2,nm0005690,director,,
2,tt0000001,3,nm0005690,producer,producer,
3,tt0000001,4,nm0374658,cinematographer,director of photography,
4,tt0000002,1,nm0721526,director,,
5,tt0000002,2,nm1335271,composer,,
6,tt0000003,1,nm0721526,director,,
7,tt0000003,2,nm0721526,writer,,
8,tt0000003,3,nm1770680,producer,producer,
9,tt0000003,4,nm0721526,producer,producer,



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
job,81.71
characters,44.02
ordering,0.0
tconst,0.0
category,0.0
nconst,0.0



Résumé numérique (échantillon):


Unnamed: 0,count,mean,std,min,50%,max
ordering,200000.0,6.780865,4.247426,1.0,6.0,35.0



Value counts — category (top 15) :


category
actor                  83754
actress                43832
writer                 27970
director               19641
cinematographer        13002
producer                7357
editor                  2302
composer                1226
self                     419
production_designer      409
archive_footage           47
casting_director          41
Name: count, dtype: int64


=== title.crew.csv ===
Taille fichier: 389.1 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 3

Head:


Unnamed: 0,tconst,directors,writers
0,tt0000001,nm0005690,
1,tt0000002,nm0721526,
2,tt0000003,nm0721526,nm0721526
3,tt0000004,nm0721526,
4,tt0000005,nm0005690,
5,tt0000006,nm0005690,
6,tt0000007,"nm0005690,nm0374658",
7,tt0000008,nm0005690,
8,tt0000009,nm0085156,nm0085156
9,tt0000010,nm0525910,



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
writers,19.84
directors,5.53
tconst,0.0



=== title.episode.csv ===
Taille fichier: 269.5 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 4

Head:


Unnamed: 0,tconst,parentTconst,seasonNumber,episodeNumber
0,tt0031458,tt32857063,,
1,tt0041951,tt0041038,1.0,9.0
2,tt0042816,tt0989125,1.0,17.0
3,tt0042889,tt0989125,,
4,tt0043426,tt0040051,3.0,42.0
5,tt0043631,tt0989125,2.0,16.0
6,tt0043693,tt0989125,2.0,8.0
7,tt0043710,tt0989125,3.0,3.0
8,tt0044093,tt0959862,1.0,6.0
9,tt0044668,tt0044243,2.0,16.0



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
episodeNumber,16.94
seasonNumber,16.94
parentTconst,0.0
tconst,0.0



Résumé numérique (échantillon):


Unnamed: 0,count,mean,std,min,50%,max
seasonNumber,166129.0,4.039698,24.849055,1.0,2.0,2003.0
episodeNumber,166129.0,155.769787,971.621792,0.0,10.0,14802.0



=== title.ratings.csv ===
Taille fichier: 29.5 Mo
Échantillon lu: 200,000 lignes  |  Colonnes: 3

Head:


Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,2168
1,tt0000002,5.5,298
2,tt0000003,6.5,2229
3,tt0000004,5.2,192
4,tt0000005,6.2,2969
5,tt0000006,5.0,216
6,tt0000007,5.3,917
7,tt0000008,5.4,2317
8,tt0000009,5.3,228
9,tt0000010,6.8,8041



Taux de valeurs manquantes (échantillon) — top 20 :


Unnamed: 0,na_%
tconst,0.0
averageRating,0.0
numVotes,0.0



Résumé numérique (échantillon):


Unnamed: 0,count,mean,std,min,50%,max
averageRating,200000.0,6.204634,1.219757,1.0,6.3,10.0
numVotes,200000.0,2531.92006,32132.421957,5.0,65.0,3082234.0



Top titres (échantillon) — note moyenne (>=1000 votes) :


Unnamed: 0,primaryTitle,startYear,titleType,averageRating,numVotes
127283,The Legend of Zelda: Ocarina of Time,1998.0,videoGame,9.6,11431
125077,Metal Gear Solid,1998.0,videoGame,9.5,13774
139578,Final Fantasy VII,1997.0,videoGame,9.5,11818
151243,,,,9.5,1649
127999,Band of Brothers,2001.0,tvMiniSeries,9.4,569210
176668,,,,9.4,8615
126377,Fallout 2: A Post-Nuclear Role-Playing Game,1998.0,videoGame,9.4,4518
157042,,,,9.4,4439
140774,,,,9.4,3500
147635,,,,9.4,2440


In [37]:
# ÉTAPE 3 — Stats globales sur tous les fichiers CSV
import pandas as pd
import os

DATA_DIR = "data/imdb/csv_out"  # dossier CSV

files = [
    "title.basics.csv",
    "title.akas.csv",
    "name.basics.csv",
    "title.principals.csv",
    "title.crew.csv",
    "title.episode.csv",
    "title.ratings.csv",
]

summary_stats = []

def get_file_stats(fname):
    path = os.path.join(DATA_DIR, fname)
    if not os.path.exists(path):
        return None
    # Compte des lignes rapides
    total_rows = sum(1 for _ in open(path, "r", encoding="utf-8")) - 1
    # Lecture d'un mini échantillon juste pour colonnes
    cols = pd.read_csv(path, nrows=5).columns.tolist()
    return {"fichier": fname, "lignes": total_rows, "colonnes": len(cols), "noms_colonnes": cols}

for f in files:
    stats = get_file_stats(f)
    if stats:
        summary_stats.append(stats)

df_summary = pd.DataFrame(summary_stats)
print("\n=== Récap taille fichiers ===")
display(df_summary)

# --- Quelques agrégations globales ---
# Top genres (title.basics)
tb_path = os.path.join(DATA_DIR, "title.basics.csv")
tb = pd.read_csv(tb_path, usecols=["genres"])
genres_counts = tb["genres"].dropna().str.split(",").explode().value_counts().head(20)
print("\nTop genres (global) :")
display(genres_counts)

# Top régions (title.akas)
ta_path = os.path.join(DATA_DIR, "title.akas.csv")
ta = pd.read_csv(ta_path, usecols=["region"])
region_counts = ta["region"].value_counts().head(20)
print("\nTop régions (global) :")
display(region_counts)

# Top professions (name.basics)
nb_path = os.path.join(DATA_DIR, "name.basics.csv")
nb = pd.read_csv(nb_path, usecols=["primaryProfession"])
prof_counts = nb["primaryProfession"].dropna().str.split(",").explode().value_counts().head(20)
print("\nTop professions (global) :")
display(prof_counts)

# Top catégories (title.principals)
tp_path = os.path.join(DATA_DIR, "title.principals.csv")
tp = pd.read_csv(tp_path, usecols=["category"])
cat_counts = tp["category"].value_counts().head(20)
print("\nTop catégories (global) :")
display(cat_counts)

# Meilleurs films/series par note moyenne (>= 50 000 votes)
tr_path = os.path.join(DATA_DIR, "title.ratings.csv")
tr = pd.read_csv(tr_path)
tb_small = pd.read_csv(tb_path, usecols=["tconst", "primaryTitle", "startYear", "titleType"])
ratings_merge = tr.merge(tb_small, on="tconst", how="left")
top_rated = (
    ratings_merge.query("numVotes >= 50000")
    .sort_values(["averageRating", "numVotes"], ascending=[False, False])
    .head(20)
)
print("\nTop titres (>= 50k votes) :")
display(top_rated)



=== Récap taille fichiers ===


Unnamed: 0,fichier,lignes,colonnes,noms_colonnes
0,title.basics.csv,11836175,9,"[tconst, titleType, primaryTitle, originalTitle, isAdult, startYear, endYear, runtimeMinutes, genres]"
1,title.akas.csv,52816959,8,"[titleId, ordering, title, region, language, types, attributes, isOriginalTitle]"
2,name.basics.csv,14628926,6,"[nconst, primaryName, birthYear, deathYear, primaryProfession, knownForTitles]"
3,title.principals.csv,94059043,6,"[tconst, ordering, nconst, category, job, characters]"
4,title.crew.csv,11834005,3,"[tconst, directors, writers]"
5,title.episode.csv,9111019,4,"[tconst, parentTconst, seasonNumber, episodeNumber]"
6,title.ratings.csv,1601448,3,"[tconst, averageRating, numVotes]"



Top genres (global) :


genres
Drama          3335718
Comedy         2298638
Talk-Show      1475278
Short          1259277
Documentary    1130418
News           1117888
Romance        1099258
Family          865014
Reality-TV      662679
Animation       591599
Crime           504176
Action          493995
Adventure       453584
Game-Show       447056
Music           439355
Adult           380570
Sport           300258
Fantasy         258734
Mystery         247536
Horror          236903
Name: count, dtype: int64


Top régions (global) :


region
DE     5174155
JP     5159877
FR     5139316
IN     5095363
ES     5054126
IT     5030374
PT     4941176
US     1653540
GB      527964
CA      302025
XWW     203948
AU      192833
BR      137415
RU      123898
MX      119746
PL      101185
GR       98554
FI       93322
SE       87053
HU       82928
Name: count, dtype: int64


Top professions (global) :


primaryProfession
actor                   3368319
actress                 2036720
miscellaneous           1512943
producer                1294218
writer                   968765
camera_department        860445
director                 792406
art_department           515523
cinematographer          423695
sound_department         420496
editor                   390011
composer                 368139
music_department         304587
assistant_director       281364
visual_effects           265137
make_up_department       244683
animation_department     237870
production_manager       225897
archive_footage          217699
editorial_department     205278
Name: count, dtype: int64


Top catégories (global) :


category
actor                  22303385
actress                16848366
self                   13879625
writer                 11266540
director                8088856
producer                7061085
editor                  4966314
cinematographer         3782838
composer                3044674
production_designer     1118950
casting_director        1096504
archive_footage          591117
archive_sound             10789
Name: count, dtype: int64


Top titres (>= 50k votes) :


Unnamed: 0,tconst,averageRating,numVotes,titleType,primaryTitle,startYear
987038,tt2301451,10.0,256946,tvEpisode,Ozymandias,2013.0
1293980,tt4283088,9.9,242555,tvEpisode,Battle of the Bastards,2016.0
1293982,tt4283094,9.9,174426,tvEpisode,The Winds of Winter,2016.0
987040,tt2301455,9.9,163723,tvEpisode,Felina,2013.0
955247,tt2178784,9.9,133055,tvEpisode,The Rains of Castamere,2013.0
830014,tt1683088,9.9,89006,tvEpisode,Face Off,2011.0
1214399,tt34952922,9.9,78427,tvEpisode,I Thought You'd Never Shut Up,2025.0
604227,tt12187040,9.9,67358,tvEpisode,Plan and Execution,2022.0
1600954,tt9906260,9.8,150120,tvEpisode,Hero,2019.0
1267243,tt3866850,9.8,116946,tvEpisode,Hardhome,2015.0
