# Movie Recommendation System
Preprocessing dan Penanganan Nilai NaN

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Membaca dataset
df = pd.read_csv('9000plus.csv')

# Cek informasi NaN
print("Informasi NaN dalam dataset:")
print(df.isnull().sum())

# Preprocessing dan Handling NaN
def clean_dataframe(df):
    # Ganti NaN dengan string kosong
    df['Title'] = df['Title'].fillna('')
    df['Overview'] = df['Overview'].fillna('')
    df['Genre'] = df['Genre'].fillna('')
    
    # Hapus baris dengan semua kolom kosong
    df = df.dropna(how='all', subset=['Title', 'Overview', 'Genre'])
    
    # Gabungkan kolom untuk vectorization
    df['Kombinasi'] = df['Title'] + ' ' + df['Overview'] + ' ' + df['Genre']
    
    return df

# Bersihkan dataframe
df_clean = clean_dataframe(df)

# Cek ulang informasi setelah cleaning
print("\nInformasi dataset setelah cleaning:")
print(df_clean.isnull().sum())
print(f"Jumlah baris awal: {len(df)}, Jumlah baris setelah cleaning: {len(df_clean)}")

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df_clean['Kombinasi'])

# Fungsi rekomendasi film
def rekomendasi_film(judul, df, tfidf_matrix, top_n=5):
    try:
        # Cari film yang judulnya mirip
        film_cocok = df[df['Title'].str.contains(judul, case=False, na=False)]
        
        if len(film_cocok) == 0:
            print("Tidak ada film yang ditemukan.")
            return None
        
        # Ambil indeks film pertama
        idx = film_cocok.index[0]
        
        # Hitung similarity
        similarity_scores = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()
        
        # Dapatkan top-n rekomendasi
        similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]
        
        rekomendasi = df.iloc[similar_indices]
        return rekomendasi[['Title', 'Overview', 'Poster_Url', 'Release_Date', 'Genre']]
    
    except Exception as e:
        print(f"Terjadi kesalahan: {e}")
        return None