In [5]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from inltk.inltk import setup
from inltk.inltk import tokenize
from collections import Counter
import pandas as pd
import re


In [9]:
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from collections import Counter

import seaborn as sns
import os
import requests
import json
import spotipy

from spotipy.oauth2 import SpotifyClientCredentials # Recommendation Generator


class HindiRecommender:
    def __init__(self):
        sns.set(rc={'figure.figsize':(10,7)})
        pd.set_option('max_colwidth', 400)
        self.df = pd.read_csv(os.path.join(os.getcwd(), 'data/transliterated_data.csv'))
        self.df.drop(['Unnamed: 0'] , axis = 1, inplace = True)
        self.df.drop(['level_0'] , axis = 1, inplace = True)
        print(self.df.columns)
        self.df = self.df.drop_duplicates(subset='artist', keep='first')
        self.df = self.df.reset_index(level=0)
        self.df.rename(columns = {'Song name'  : 'song_name'}, inplace = True)

        self.train()

    # Calculating cosine similarities from lyrics and storing similar song results in results dict

    def train(self):
        self.tf = TfidfVectorizer(analyzer='word', min_df=0, max_features= 100 , lowercase=True)
        self.tfidf_matrix = self.tf.fit_transform(self.df['lyrics'])

        self.cosine_similarities = linear_kernel(self.tfidf_matrix, self.tfidf_matrix)
        self.results = {}

        for idx, row in self.df.iterrows():
            similar_indices = self.cosine_similarities[idx].argsort()[:-100:-1]
            similar_items = [(self.cosine_similarities[idx][i], self.df['id'][i]) for i in similar_indices]
            self.results[row['id']] = similar_items[1:]



    def item(self,id):
        return self.df.loc[self.df['id'] == id]['song_name']
        
    def get_musixmatch_api_url(self, url):
            return 'http://api.musixmatch.com/ws/1.1/{}&format=json&apikey={}'.format(url, \
            os.getenv("MUSIX_API_KEY"))
    def find_track_info(self, artist, title):
        url = 'matcher.track.get?q_track={}&q_artist={}'.format(title,artist)
        matched_res = requests.get(self.get_musixmatch_api_url(url))
        matched_data = json.loads(matched_res.text)

        if matched_data["message"]["header"]["status_code"] == 200:
            #Get initial Musixmatch information
            artist = matched_data["message"]["body"]["track"]["artist_name"]
            title = matched_data["message"]["body"]["track"]["track_name"]
            track_id = matched_data["message"]["body"]["track"]["track_id"]

            #Access Spotify API
            client_credentials_manager = SpotifyClientCredentials(\
            client_id=os.getenv("SPOTIFY_CLIENT_ID"), \
            client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"))
            spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

            #Get album art and a preview url from Spotify
            results = spotify.search(q='track:' + title + \
            ' artist:' + artist, type='track')
            out = {"artist" : artist, "name" : title}
            try:
                track = results['tracks']['items'][0]
                out["image_url"] = track["album"]["images"][1]["url"]
                out["preview_url"] = track["preview_url"]
                out["spotify_url"] = track['external_urls']['spotify']
                
                return out
            except:
                return False

    def recommend(self, artist, song_name, num):
        # filter df to find id of the song with name and artist
        song = self.df.query('song_name == @song_name and artist == @artist')
        # if doesnt exist, return False for now TODO : change this..
        # if exists, return the recommendations
        if(not song.empty):
            id = song['id']
            id = id.item()
            recs = self.results[id][:num]
            i=0
            recommendations = []
            for rec in recs:
                track = self.df.query('id==@rec[1]')
                artist = str(track["artist"].values[0]) 
                track =str(track["song_name"].values[0])
                newRecommendation = self.find_track_info(artist, track)
                if(newRecommendation):
                    recommendations.append(newRecommendation)
                else:
                    pass
            return(recommendations)
        else:
            False

    
    def addRow(self, song_name, type, artist, lyrics):
        id = len(self.df['id'])
        newRow = [0,song_name, type, artist, lyrics, id]
        print(len(self.df.loc[len(self.df['id']) - 1]))
        # add the new song data to the df
        self.df.loc[len(self.df['id'])] = newRow
        #train again
        self.train()
        

In [10]:
t = HindiRecommender()
t.addRow(song_name = 'kesariya', type = 'romantic', artist =  'arijit singh', lyrics =  '''मुझको... इतना बताये कोई कैसे तुझसे दिल ना लगाए कोई रब्बा ने तुझको बनाने में कर दी है हुस्न की खाली तिजोरियाँ काजल की सिहाई से लिखी है तूने जाने कितनो की लव स्टोरियाँ केसरिया तेरा इश्क़ है पिया रंग जाऊं जो मैं हाथ लगाऊँ दिन बीते सारा तेरी फ़िक्र में रैन सारी तेरी खैर मनाऊँ शिवा... ईशा का मतलब जानते हो तुम ? पार्वती अब शिवा का साथ पार्वती नही देगी तो कौन देगा पतझड़ के मौसम में भी रंगी चनारो जैसी झनके सन्नाटो में तू वीना के तारो जैसी... हम्म... सदियों से भी लम्बी ये मन की अमावसे है और तू फुलझड़ियों वाले त्योहारों जैसी चंदा भी दीवाना है तेरा जलती है तुझसे सारी चकोरियाँ... काजल की सिहाई से लिखी है तूने जाने कितनो की लव स्टोरियाँ... लव स्टोरियाँ... केसरिया तेरा इश्क़ है पिया रंग जाऊं जो मैं हाथ लगाऊँ दिन बीते सारा तेरी फ़िक्र में रैन सारी तेरी खैर मनाऊँ केसरिया तेरा इश्क़ है पिया इश्क़ है पिया केसरिया तेरा इश्क़ है पिया इश्क़ है पिया केसरिया तेरा''')


Index(['song_name', 'type', 'artist', 'lyrics', 'id'], dtype='object')
6


In [11]:
test = t.recommend('kishor kumar','neelanillaiyambar', 3)

In [12]:
test

[{'artist': 'Akhil & Dhvani Bhanushali',
  'name': 'Duniyaa (From "Luka Chuppi")',
  'image_url': 'https://i.scdn.co/image/ab67616d00001e02c3b01eb89e34e1f2838e425a',
  'preview_url': 'https://p.scdn.co/mp3-preview/653f3afac1a294560f5a20ce1dce71fa5f186da1?cid=ded2ad68754742df8ef897de2e835c0e',
  'spotify_url': 'https://open.spotify.com/track/3s3J70eRpRH2hcYq4mEXTJ'},
 {'artist': 'Shiraz Uppal',
  'name': 'Mere Khuda',
  'image_url': 'https://i.scdn.co/image/ab67616d00001e02cd8e501bd6887a5482d11296',
  'preview_url': 'https://p.scdn.co/mp3-preview/f712bc659288ba899689ebcd0941336d279a3baf?cid=ded2ad68754742df8ef897de2e835c0e',
  'spotify_url': 'https://open.spotify.com/track/46FAFrZrQHLte6NEN4yNbB'}]

In [13]:
import requests
def get_musixmatch_api_url(url):
        return 'http://api.musixmatch.com/ws/1.1/{}&format=json&apikey={}'.format(url, \
        os.getenv("MUSIX_API_KEY"))
def find_track_info(artist, title):
    url = 'matcher.track.get?q_track={}&q_artist={}'.format(title,artist)
    matched_res = requests.get(get_musixmatch_api_url(url))
    matched_data = json.loads(matched_res.text)

    if matched_data["message"]["header"]["status_code"] == 200:
        #Get initial Musixmatch information
        artist = matched_data["message"]["body"]["track"]["artist_name"]
        title = matched_data["message"]["body"]["track"]["track_name"]
        track_id = matched_data["message"]["body"]["track"]["track_id"]

        #Access Spotify API
        client_credentials_manager = SpotifyClientCredentials(\
        client_id=os.getenv("SPOTIFY_CLIENT_ID"), \
        client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"))
        spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

        #Get album art and a preview url from Spotify
        results = spotify.search(q='id:'+str(track_id), type='track', limit=1)
        out = {"artist" : artist, "name" : title}
        print(results)
        try:
            track = results['tracks']['items'][0]
            out["image_url"] = track["album"]["images"][1]["url"]
            out["preview_url"] = track["preview_url"]
            out["spotify_url"] = track['external_urls']['spotify']
            
            return out
        except:
            return False


In [26]:
find_track_info("o chaki saki","latha mangeshkeshkar")

In [32]:
df = pd.read_csv("data/hindi_data.csv")

In [35]:
df = df.drop_duplicates(subset='artist', keep='first')
df = df.reset_index(level=0)
df.rename(columns = {'Song name'  : 'song_name'}, inplace = True)


In [36]:
df.columns

Index(['level_0', 'index', 'song_name', 'type', 'artist', 'lyrics'], dtype='object')

In [37]:
temp = df[["index", "song_name", "artist"]]

In [38]:
temp.to_csv("data/song_name_artist_name.csv")