In [31]:
import pandas as pd

# Load basics and ratings
basics = pd.read_csv(r"C:\imdb\title.basics.tsv", sep="\t", na_values="\\N")
ratings = pd.read_csv(r"C:\imdb\title.ratings.tsv", sep="\t", na_values="\\N")

# Filter only TV shows
tv_shows = basics[basics['titleType'].isin(['tvSeries', 'tvMiniSeries'])]

# Merge with ratings
tv_data = pd.merge(tv_shows, ratings, on="tconst")

  basics = pd.read_csv(r"C:\imdb\title.basics.tsv", sep="\t", na_values="\\N")


In [32]:
# Sort by popularity and keep top 500
tv_data = tv_data.sort_values(by="numVotes", ascending=False).head(500)

# Keep useful columns
tv_data = tv_data[['primaryTitle', 'startYear', 'genres', 'averageRating', 'numVotes']]

tv_data.to_csv("top500_tv_shows.csv", index=False)
tv_data = pd.read_csv("top500_tv_shows.csv")

tv_data.head()

Unnamed: 0,primaryTitle,startYear,genres,averageRating,numVotes
0,Game of Thrones,2011.0,"Action,Adventure,Drama",9.2,2530154
1,Breaking Bad,2008.0,"Crime,Drama,Thriller",9.5,2468242
2,Stranger Things,2016.0,"Drama,Fantasy,Horror",8.6,1650794
3,Friends,1994.0,"Comedy,Romance",8.9,1179466
4,The Walking Dead,2010.0,"Drama,Horror,Thriller",8.1,1179419


In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert genres into a text format suitable for vectorization
tv_data['genres'] = tv_data['genres'].fillna('')

# Use TF-IDF to encode genres
vectorizer = TfidfVectorizer(token_pattern='[a-zA-Z]+')
genre_matrix = vectorizer.fit_transform(tv_data['genres'])


from sklearn.metrics.pairwise import cosine_similarity

# Compute similarity matrix
similarity_matrix = cosine_similarity(genre_matrix)


def recommend_show(title, n=10):
    # Find index of the show
    idx = tv_data[tv_data['primaryTitle'] == title].index[0]

    # Get similarity scores
    scores = list(enumerate(similarity_matrix[idx]))

    # Sort by similarity (highest first)
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Get top n recommendations (skip the first one because it's the show itself)
    top_indices = [i[0] for i in scores[1:n+1]]

    return tv_data.iloc[top_indices][['primaryTitle', 'genres', 'averageRating']]

print(recommend_show("Breaking Bad", n=5))

       primaryTitle                genres  averageRating
43            Fargo  Crime,Drama,Thriller            8.8
44        Mr. Robot  Crime,Drama,Thriller            8.5
51         The Wire  Crime,Drama,Thriller            9.3
57            Ozark  Crime,Drama,Thriller            8.4
68  Sons of Anarchy  Crime,Drama,Thriller            8.5


In [34]:
# Filter shows with startYear == 2025
shows_2025 = tv_data[tv_data['startYear'] == 2025]

# Display them
print(shows_2025)


              primaryTitle  startYear                  genres  averageRating  \
97             Adolescence     2025.0    Crime,Drama,Thriller            8.1   
268   Dexter: Resurrection     2025.0    Crime,Drama,Thriller            9.0   
313               Pluribus     2025.0            Drama,Sci-Fi            8.0   
326                Dept. Q     2025.0     Crime,Drama,Mystery            8.1   
328                MobLand     2025.0             Crime,Drama            8.3   
339               The Pitt     2025.0                   Drama            8.9   
359           Alien: Earth     2025.0  Horror,Sci-Fi,Thriller            7.1   
402   IT: Welcome to Derry     2025.0          Fantasy,Horror            8.0   
412  Daredevil: Born Again     2025.0      Action,Crime,Drama            8.1   
426      American Primeval     2025.0   Action,Drama,Thriller            8.0   
470         Heated Rivalry     2025.0     Drama,Romance,Sport            9.0   

     numVotes  
97     280778  
268    

In [37]:
print(recommend_show("The Pitt", n=20))

          primaryTitle        genres  averageRating
28               House         Drama            8.7
76           Aspirants         Drama            9.1
93            Euphoria         Drama            8.2
94             Mad Men         Drama            8.7
218   The Morning Show         Drama            8.1
252       The Newsroom         Drama            8.6
262    The Good Doctor         Drama            8.0
290               Maid         Drama            8.3
294           Billions         Drama            8.3
339           The Pitt         Drama            8.9
349              Skins         Drama            8.2
351         Unorthodox         Drama            8.0
353      The West Wing         Drama            8.9
380            Landman         Drama            8.2
385           Dopesick         Drama            8.6
391     Inventing Anna         Drama            6.9
453     Sandeep Bhaiya         Drama            9.1
461  Sapne Vs Everyone         Drama            9.2
482       Ch