# **IMPORT LIBRARY**

In [1]:
import requests
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **DATASET**



*Dataset* dapat diakses pada tautan berikut ini.

*   https://drive.google.com/file/d/1VUIeb380DGdK9CIY9rpI1zXt7eVvA5vP/view?usp=sharing


In [3]:
# load dataset
df = pd.read_csv("/content/drive/MyDrive/MK SISTEM REKOMENDASI/DatasetLagu.csv")
df

Unnamed: 0,Artis,Judul,Lirik
0,Niki,Ocean and Engines,Saturday sunset\nWe're lying on my bed with fi...
1,Taylor Swift,August,"Salt air, and the rust on your door\nI never n..."
2,One Direction,Steal My Girl,She's been my queen since we were sixteen\nWe ...
3,The 1975,Robbers,She had a face straight outta magazine\nGod on...
4,Rex Orange County,Pluto Projector,[Verse 1]\nThe great protector\nIs that what I...
...,...,...,...
73,Red Hot Chili Peppers,Soul To Squeeze,I got a bad disease\nOut from my brain is wher...
74,Post Malone,Chemical,Oxytocin makin' it all okay\nWhen I come back ...
75,Michael Learns to Rock,The Actor,He takes you out and he takes you up\n'Cause h...
76,Bon Jovi,It's my life,This ain't a song for the broken-hearted\r\nNo...


# **TF-IDF**

In [4]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [5]:
# vektorizer TF-IDF untuk mengubah fitur teks menjadi vektor numerik
tfidf = TfidfVectorizer(stop_words='english')

# menerapkan vektorisasi TF-IDF
tfidf_matrix = tfidf.fit_transform(df['Judul'].fillna('') + ' ' + df['Lirik'].fillna(''))

In [6]:
feature_names = tfidf.get_feature_names_out()
tfidf_values = tfidf_matrix.toarray()

# nilai rata-rata TF-IDF untuk setiap kata
avg_tfidf_values = np.mean(tfidf_values, axis=0)

# indeks 100
top_100_indices = np.argsort(avg_tfidf_values)[-100:]
top_100_words = [feature_names[i] for i in top_100_indices]

for word, tfidf_value in zip(top_100_words, avg_tfidf_values[top_100_indices]):
    print(f'Kata "{word}" memiliki nilai rata-rata TF-IDF {tfidf_value}')

Kata "whoa" memiliki nilai rata-rata TF-IDF 0.010185981933434108
Kata "place" memiliki nilai rata-rata TF-IDF 0.010234842071486675
Kata "better" memiliki nilai rata-rata TF-IDF 0.010280692386593735
Kata "sky" memiliki nilai rata-rata TF-IDF 0.010438419896461982
Kata "sing" memiliki nilai rata-rata TF-IDF 0.01050196396918566
Kata "forget" memiliki nilai rata-rata TF-IDF 0.010543071876727891
Kata "chorus" memiliki nilai rata-rata TF-IDF 0.010640208737729842
Kata "loved" memiliki nilai rata-rata TF-IDF 0.011037686660093074
Kata "song" memiliki nilai rata-rata TF-IDF 0.011057521498265119
Kata "door" memiliki nilai rata-rata TF-IDF 0.01106190191573084
Kata "pretty" memiliki nilai rata-rata TF-IDF 0.011077513277280682
Kata "good" memiliki nilai rata-rata TF-IDF 0.011140112250954163
Kata "line" memiliki nilai rata-rata TF-IDF 0.011171856619989212
Kata "hard" memiliki nilai rata-rata TF-IDF 0.011233265382245691
Kata "bothering" memiliki nilai rata-rata TF-IDF 0.011248192812568333
Kata "perfect

***Item Similarity***

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

# menghitung cosine similaruty antar film berdasarkan isinya
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# membuat DataFrame hasil similarity
item_sim = pd.DataFrame(cosine_sim, index=df['Judul'], columns=df['Judul'])
item_sim

Judul,Ocean and Engines,August,Steal My Girl,Robbers,Pluto Projector,AMAZING,SHADE,Uno,Untitled,Can i Open a window,...,Favorite Girl,Sleep Well,August,SkyFall,Starman,Soul To Squeeze,Chemical,The Actor,It's my life,Danger line
Judul,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ocean and Engines,1.000000,0.070576,0.021430,0.092945,0.114891,0.069963,0.054329,0.118124,0.090686,0.031113,...,0.032505,0.049966,0.070576,0.021204,0.032777,0.104342,0.035892,0.073934,0.055841,0.060989
August,0.070576,1.000000,0.015160,0.020093,0.082059,0.079251,0.028249,0.059977,0.078740,0.039582,...,0.014955,0.005543,1.000000,0.008314,0.044914,0.039712,0.041380,0.047405,0.062620,0.025022
Steal My Girl,0.021430,0.015160,1.000000,0.013943,0.006716,0.010663,0.003238,0.020711,0.011220,0.007090,...,0.025882,0.012974,0.015160,0.004637,0.008021,0.016244,0.009409,0.009522,0.008549,0.014197
Robbers,0.092945,0.020093,0.013943,1.000000,0.121046,0.145493,0.008376,0.058740,0.040717,0.017431,...,0.012191,0.030071,0.020093,0.019480,0.027662,0.101360,0.024935,0.179137,0.024744,0.086079
Pluto Projector,0.114891,0.082059,0.006716,0.121046,1.000000,0.189864,0.049402,0.129286,0.099778,0.072140,...,0.011204,0.094549,0.082059,0.013264,0.028355,0.099509,0.050024,0.071098,0.077277,0.087000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Soul To Squeeze,0.104342,0.039712,0.016244,0.101360,0.099509,0.079438,0.054280,0.101692,0.093060,0.027456,...,0.014830,0.044881,0.039712,0.034258,0.044903,1.000000,0.111393,0.071170,0.072656,0.046723
Chemical,0.035892,0.041380,0.009409,0.024935,0.050024,0.044974,0.002455,0.068275,0.050836,0.028559,...,0.007588,0.013125,0.041380,0.053005,0.062885,0.111393,1.000000,0.047896,0.024666,0.031205
The Actor,0.073934,0.047405,0.009522,0.179137,0.071098,0.089960,0.074775,0.073242,0.061227,0.036501,...,0.032146,0.034150,0.047405,0.005127,0.019330,0.071170,0.047896,1.000000,0.021442,0.052304
It's my life,0.055841,0.062620,0.008549,0.024744,0.077277,0.034319,0.062813,0.183821,0.104282,0.107779,...,0.082352,0.039029,0.062620,0.050455,0.028064,0.072656,0.024666,0.021442,1.000000,0.054660


***Top N Recommendations***

In [8]:
# lagu yang direkomendasikan Berlian Muhammad G. A
judul_lagu = "Stand Out Fit In"
recom = item_sim[judul_lagu].sort_values(ascending=False)[1:11] # Top 10
print(f"Rekomendasi untuk judul '{judul_lagu}':")
print(recom)

Rekomendasi untuk judul 'Stand Out Fit In':
Judul
SkyFall                  0.193074
Robbers                  0.112705
Piledriver Waltz         0.077532
It's my life             0.075267
Sit Still Look Pretty    0.073721
You & I                  0.072678
AMAZING                  0.063241
Right There              0.063106
Sleep Well               0.062564
you & I                  0.061482
Name: Stand Out Fit In, dtype: float64


In [9]:
# lagu yang direkomendasikan Bayu Surya Dharma S
judul_lagu = "Fix You"
recom = item_sim[judul_lagu].sort_values(ascending=False)[1:11] # Top 10
print(f"Rekomendasi untuk judul '{judul_lagu}':")
print(recom)

Rekomendasi untuk judul 'Fix You':
Judul
You are the reason            0.117132
Bones                         0.113252
it will rain                  0.091938
You & I                       0.091593
Untitled                      0.071021
A Place for my head           0.070899
On Bended Knee                0.069638
Payphone                      0.069315
Ocean and Engines             0.068019
Saving all my love for you    0.065099
Name: Fix You, dtype: float64
