In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
# Muat data film
file_path = 'indonesian_movies.csv'  # Ganti dengan jalur ke file CSV Anda
data = pd.read_csv(file_path)

In [3]:
# Langkah 2: Pra-pemrosesan Data
data['description'] = data['description'].fillna('')
data['genre'] = data['genre'].str.lower()

In [4]:
# Langkah 3: Menggunakan TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['description'])

In [5]:
# Langkah 4: Menghitung Kesamaan Kosinus
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [6]:
# Langkah 5: Membuat Fungsi Rekomendasi
def get_recommendations(genre_filter=None, age_rating=None, min_rating=None, year=None, cosine_sim=cosine_sim):
    recommendations = data.copy()

    # Mengisi nilai NaN terlebih dahulu
    recommendations['genre'] = recommendations['genre'].fillna('')
    recommendations['age_rating'] = recommendations['age_rating'].fillna('')
    recommendations['users_rating'] = recommendations['users_rating'].fillna(0)
    recommendations['year'] = recommendations['year'].fillna(0)

    if genre_filter:
        recommendations = recommendations[recommendations['genre'].str.contains(genre_filter.lower(), na=False)]

    if age_rating is not None:
        if age_rating == 'remaja':
            recommendations = recommendations[recommendations['age_rating'] == '13+']
        elif age_rating == 'dewasa':
            recommendations = recommendations[recommendations['age_rating'] == '17+']

    if min_rating:
        recommendations = recommendations[recommendations['users_rating'] >= min_rating]

    if year:
        recommendations = recommendations[recommendations['year'] == year]

    recommendations = recommendations.fillna('-')
    recommendations = recommendations.sort_values(by='users_rating', ascending=False)

    top_recommendation = recommendations.iloc[0].to_dict() if not recommendations.empty else None
    other_recommendations = recommendations.iloc[1:].to_dict(orient='records') if len(recommendations) > 1 else []

    return top_recommendation, other_recommendations

In [7]:
# Langkah 6: Menguji Fungsi Rekomendasi
# Misalnya, kita ingin rekomendasi film dengan genre 'drama' untuk penonton remaja, dengan rating minimum 7.0, dari tahun 2020
genre = 'drama'
age_rating = 'remaja'
min_rating = 7.0
year = 2020

top_recommendation, other_recommendations = get_recommendations(genre_filter=genre, age_rating=age_rating, min_rating=min_rating, year=year)

print("Top Recommendation:")
print(top_recommendation)

print("\nOther Recommendations:")
for rec in other_recommendations:
    print(rec)

Top Recommendation:
{'movie_id': 100010, 'title': 'Mariposa', 'year': 2020, 'description': 'Iqbal (Angga Yunanda) is like a Mariposa butterfly to Acha (Adhisty Zara). Each time someone approach, he always runs away. Acha is determined to win Iqbal, a man known to be handsome, smart, yet cold.', 'genre': 'drama', 'age_rating': '13+', 'users_rating': 8.5, 'votes': '54', 'languages': 'Indonesian', 'directors': 'Fajar Bustomi', 'actors': "['Angga Yunanda', 'Adhisty Zara', 'Dannia Salsabila', 'Abun Sungkar', 'Junior Roberts', 'Syakir Daulay', 'Ariyo Wahab', 'Irgi Fahrezi', 'Ersa Mayori', 'Baim', 'Iszur Muchtar', 'Ruth Permatasari', 'Yudha Keling', 'Diaz Danar', 'Aris Nugraha']", 'runtime': '117 min'}

Other Recommendations:
{'movie_id': 100013, 'title': 'Nanti Kita Cerita Tentang Hari Ini', 'year': 2020, 'description': 'Three siblings live in happy-looking families, but one of them had changed and got warned by his parents. This prompted the rebellion of the three siblings which led to the 