In [75]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from inltk.inltk import setup
from inltk.inltk import tokenize
from collections import Counter
import pandas as pd
import re
from langdetect import detect
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from collections import Counter
import seaborn as sns
import os


class HindiRecommender:
    def __init__(self):
        sns.set(rc={'figure.figsize':(10,7)})
        pd.set_option('max_colwidth', 400)
        self.df = pd.read_csv(os.path.join(os.getcwd(), 'data/transliterated_data.csv'))
        self.df.drop(['Unnamed: 0'] , axis = 1, inplace = True)
        self.df.drop(['level_0'] , axis = 1, inplace = True)
        print(self.df.columns)
        self.df = self.df.drop_duplicates(subset='artist', keep='first')
        self.df = self.df.reset_index(level=0)
        self.df.rename(columns = {'Song name'  : 'song_name'}, inplace = True)

        self.train()

    # Calculating cosine similarities from lyrics and storing similar song results in results dict

    def train(self):
        self.tf = TfidfVectorizer(analyzer='word', min_df=0, max_features= 100 , lowercase=True)
        self.tfidf_matrix = self.tf.fit_transform(self.df['lyrics'])

        self.cosine_similarities = linear_kernel(self.tfidf_matrix, self.tfidf_matrix)
        self.results = {}

        for idx, row in self.df.iterrows():
            similar_indices = self.cosine_similarities[idx].argsort()[:-100:-1]
            similar_items = [(self.cosine_similarities[idx][i], self.df['id'][i]) for i in similar_indices]
            self.results[row['id']] = similar_items[1:]



    def item(self,id):
        return self.df.loc[self.df['id'] == id]['song_name']

    def recommend(self, artist, song_name, num):
        # filter df to find id of the song with name and artist
        song = self.df.query('song_name == @song_name and artist == @artist')
        # if doesnt exist, return False for now TODO : change this..
        # if exists, return the recommendations
        if(not song.empty):
            id = song['id']
            id = id.item()
            recs = self.results[id][:num]
            i=0
            recommendations = []
            for rec in recs:
                recommendation = {}
                track = self.df.query('id==@rec[1]')
                recommendation["artist"] = str(track["artist"].values[0]) 
                recommendation["name"] =str(track["song_name"].values[0])
                
                recommendation["spotify_id"] = ""
                recommendation["image_url"] = ""
                recommendation["preview_url"] = ""
                recommendations.append(recommendation)
            return(recommendations)
        else:
            # get song data from api
            # if data not found, return error
            # make sure lyrics is in hindi script
            
            # add to the data 
            #self.addRow( songData )
            
            # call the same function with same arguments and return
            #return(self.recommend(artist, song_name, num))
            print("Song not found in the dataset")

    
    def addRow(self, song_name, type, artist, lyrics):
        id = len(self.df['id'])
        newRow = [0,song_name, type, artist, lyrics, id]
        print(len(self.df.loc[len(self.df['id']) - 1]))
        # add the new song data to the df
        self.df.loc[len(self.df['id'])] = newRow
        #train again
        self.train()
        

In [76]:
t = HindiRecommender()
t.addRow(song_name = 'kesariya', type = 'romantic', artist =  'arijit singh', lyrics =  '''मुझको... इतना बताये कोई कैसे तुझसे दिल ना लगाए कोई रब्बा ने तुझको बनाने में कर दी है हुस्न की खाली तिजोरियाँ काजल की सिहाई से लिखी है तूने जाने कितनो की लव स्टोरियाँ केसरिया तेरा इश्क़ है पिया रंग जाऊं जो मैं हाथ लगाऊँ दिन बीते सारा तेरी फ़िक्र में रैन सारी तेरी खैर मनाऊँ शिवा... ईशा का मतलब जानते हो तुम ? पार्वती अब शिवा का साथ पार्वती नही देगी तो कौन देगा पतझड़ के मौसम में भी रंगी चनारो जैसी झनके सन्नाटो में तू वीना के तारो जैसी... हम्म... सदियों से भी लम्बी ये मन की अमावसे है और तू फुलझड़ियों वाले त्योहारों जैसी चंदा भी दीवाना है तेरा जलती है तुझसे सारी चकोरियाँ... काजल की सिहाई से लिखी है तूने जाने कितनो की लव स्टोरियाँ... लव स्टोरियाँ... केसरिया तेरा इश्क़ है पिया रंग जाऊं जो मैं हाथ लगाऊँ दिन बीते सारा तेरी फ़िक्र में रैन सारी तेरी खैर मनाऊँ केसरिया तेरा इश्क़ है पिया इश्क़ है पिया केसरिया तेरा इश्क़ है पिया इश्क़ है पिया केसरिया तेरा''')


Index(['song_name', 'type', 'artist', 'lyrics', 'id'], dtype='object')
6


In [77]:
test = t.recommend('arijit singh', 'kesariya', 3)

In [78]:
test

[{'artist': 'ar rahman', 'name': 'ishk bina'},
 {'artist': 'jubin nouttyal s', 'name': 'isk da mara'},
 {'artist': 'sukhbir',
  'name': 'tara gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin gin)'}]

6


In [79]:
import requests
import json
import spotipy

from spotipy.oauth2 import SpotifyClientCredentials # Recommendation Generator

def get_musixmatch_api_url(url):
        return 'http://api.musixmatch.com/ws/1.1/{}&format=json&apikey={}'.format(url, \
        os.getenv("MUSIX_API_KEY"))
def find_track_info(artist, title):
    url = 'matcher.track.get?q_track={}&q_artist={}'.format(title,artist)
    matched_res = requests.get(get_musixmatch_api_url(url))
    matched_data = json.loads(matched_res.text)

    if matched_data["message"]["header"]["status_code"] == 200:
        #Get initial Musixmatch information
        artist = matched_data["message"]["body"]["track"]["artist_name"]
        title = matched_data["message"]["body"]["track"]["track_name"]
        track_id = matched_data["message"]["body"]["track"]["track_id"]

        #Make another API call for the lyrics
        url = 'track.lyrics.get?track_id={}'.format(track_id)
        lyrical_res = requests.get(get_musixmatch_api_url(url))
        lyrical_data = json.loads(lyrical_res.text)
        lyrics =  lyrical_data["message"]["body"]["lyrics"]["lyrics_body"].split("...")[0]

        #Access Spotify API
        client_credentials_manager = SpotifyClientCredentials(\
        client_id=os.getenv("SPOTIFY_CLIENT_ID"), \
        client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"))
        spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

        #Get album art and a preview url from Spotify
        results = spotify.search(q='track:' + title + \
        ' artist:' + artist, type='track')
        print(results)
        out = {"artist" : artist, "name" : title}
        try:
            track = results['tracks']['items'][0]
            image_url = track["album"]["images"][1]["url"]
            preview_url = track["preview_url"]
            spotify_url = track['external_urls']['spotify']

            return{}
        except:
            return False

In [38]:
find_track_info("arijit sing", "kesariya")

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=track%3AKesariya+artist%3APritam+feat.+Arijit+Singh+%26+Amitabh+Bhattacharya&type=track&offset=0&limit=10', 'items': [], 'limit': 10, 'next': None, 'offset': 0, 'previous': None, 'total': 0}}
