In [2]:
# Import necessary libraries
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import random

In [3]:
df = pd.read_csv("cleaned_songs.csv")

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1032 entries, 0 to 1031
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   1032 non-null   object
 1   album   1032 non-null   object
 2   year    1032 non-null   int64 
 3   artist  1027 non-null   object
 4   genre   1032 non-null   object
 5   text    1027 non-null   object
dtypes: int64(1), object(5)
memory usage: 48.5+ KB


In [5]:
# Drop unnecessary columns
columns_to_drop = ['id', 'image', 'url', 'duration']
columns_to_drop = [col for col in columns_to_drop if col in df.columns]
df = df.drop(columns_to_drop, axis=1)

In [6]:
# Add genre column
genres = ['Pop', 'Rock', 'Classical', 'Jazz', 'Hip-hop', 'Country', 'Electronic', 'Blues']
df['genre'] = [random.choice(genres) for _ in range(len(df))]  # Random genres for all songs


In [7]:
# Ensure the dataset includes the genre column
if 'genre' not in df.columns:
    raise ValueError("The dataset must include a 'genre' column.")

In [8]:
# Tokenization and text preprocessing
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

[nltk_data] Downloading package punkt to C:\Users\RAM
[nltk_data]     KISHOR\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [9]:
def tokenization(txt):
    txt = str(txt).lower()  # Convert text to lowercase
    tokens = nltk.word_tokenize(txt)  # Tokenize the text
    stemming = [stemmer.stem(w) for w in tokens]  # Apply stemming
    return " ".join(stemming)

In [10]:
# Combine 'title', 'album', and 'artist' into one text column
df['text'] = df['title'] + ' ' + df['album'] + ' ' + df['artist']
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [11]:

# Initialize TF-IDF Vectorizer and transform the text column
tfidvector = TfidfVectorizer(analyzer='word', stop_words='english')
matrix = tfidvector.fit_transform(df['text'])

In [12]:
# Calculate the cosine similarity matrix
similarity = cosine_similarity(matrix)

In [13]:
# Recommendation function for genre-based filtering
def genre_based_recommendation(selected_genre):
    # Filter the DataFrame based on the user's selected genre
    genre_df = df[df['genre'] == selected_genre]

    if genre_df.empty:
        return f"No songs found for the genre: {selected_genre}"

    # Recompute TF-IDF and similarity matrix for the filtered dataset
    genre_tfidvector = TfidfVectorizer(analyzer='word', stop_words='english')
    genre_matrix = genre_tfidvector.fit_transform(genre_df['text'])
    genre_similarity = cosine_similarity(genre_matrix)

    # Recommend the top songs in the selected genre
    top_recommendations = genre_df.head(10)['title'].tolist()  # Get top 10 songs
    return top_recommendations


In [14]:
def song_based_recommendation(selected_song):
    # Get the genre of the selected song
    selected_song_data = df[df['title'] == selected_song]
    if selected_song_data.empty:
        return [], []

    selected_genre = selected_song_data.iloc[0]['genre']
    # Recommend songs in the same genre
    genre_recommendations = genre_based_recommendation(selected_genre)

    return genre_recommendations

In [15]:
# Example usage:
selected_song = "Ola Olaala Ala"  # Replace with the user's input
recommendations = song_based_recommendation(selected_song)
print(f"Recommendations based on the song {selected_song}:")
print(recommendations)

Recommendations based on the song Ola Olaala Ala:
['Ola Olaala Ala', 'Maha Muddu', 'Kalalo Kooda', 'Adhento Gaani Vunnapaatuga', 'Kollagottey', 'Bhairava Anthem (From &quot;Kalki 2898 Ad&quot;) (Telugu)', 'Tella Tellani Cheera', 'Nee Yadalo Naaku', 'Sokuladi Sittammi', 'Tharagathi Gadhi']


In [16]:
student_info = df[df['title'] == 'Nalupu']

In [17]:
student_info

Unnamed: 0,title,album,year,artist,genre,text
37,Nalupu,Kanchana,2011,Thaman SRita ThyagarajanSuchith Santoshan,Country,nalupu kanchana thaman srita thyagarajansuchit...


In [18]:
# Save the similarity matrix and the dataframe for later use
pickle.dump(similarity, open('similarity.pkl', 'wb'))
pickle.dump(df, open('df.pkl', 'wb'))

In [19]:
df.head()

Unnamed: 0,title,album,year,artist,genre,text
0,Anuvanuvuu,Om Bheem Bush,2024,Sunny M.R.Arijit Singh,Classical,anuvanuvuu om bheem bush sunni m.r.arijit singh
1,Suttamla Soosi,Gangs Of Godavari,2024,Anurag KulkarniYuvan Shankar Raja,Pop,suttamla soosi gang of godavari anurag kulkarn...
2,Ola Olaala Ala,Orange,2010,Ranina ReddyN.C. Karunya,Hip-hop,ola olaala ala orang ranina reddyn.c . karunya
3,Radhika,Tillu Square,2023,Ram Miriyala,Pop,radhika tillu squar ram miriyala
4,Nijame Ne Chebutunna,Ooru Peru Bhairavakona,2023,Sid Sriram,Pop,nijam ne chebutunna ooru peru bhairavakona sid...
