* Input : influencer name
* o/p 1 : list of top 10 songs; 
    1. such that song used by celebrity with more followers, best performance is returned
* o/p 2: list of top 10 songs;
    1. celebrity : influencer more than 80k followers
    2. return top 10 songs such that most of the celebrities have used them as reels

## Import

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

## DataFrame

In [54]:
df = pd.read_excel('Dummy Data2.xlsx')
df.head(2)

Unnamed: 0,Profile URL,Likes,Comments,Time,Description,Song Name,Followers
0,https://www.instagram.com/abhishekothari/,37230,30470,5d,Bombae💋 Loved by Millions🙌🏻🙈 #teamhk Managed b...,SOAAADD12AB018A9DD,69310.0
1,https://www.instagram.com/an_anandrk/,37139,18263,2d,#PyaarMangdi official video👇🏻👇🏻,SOAAADE12A6D4F80CC,120502.0


## Operate on Time

In [55]:
def convert_week_to_days(s):
    num = int(s[0])
    if s[1]=='w':
        num *= 7
    return num
df['Time'] = df['Time'].apply(convert_week_to_days)
df.head(2)

Unnamed: 0,Profile URL,Likes,Comments,Time,Description,Song Name,Followers
0,https://www.instagram.com/abhishekothari/,37230,30470,5,Bombae💋 Loved by Millions🙌🏻🙈 #teamhk Managed b...,SOAAADD12AB018A9DD,69310.0
1,https://www.instagram.com/an_anandrk/,37139,18263,2,#PyaarMangdi official video👇🏻👇🏻,SOAAADE12A6D4F80CC,120502.0


In [56]:
df['Likes'] = df['Likes']/df['Time']  #likes per days
df['Comments'] = df['Comments']/df['Time']  #follower per day
df.drop('Time', axis=1, inplace=True)
df.head(2)

Unnamed: 0,Profile URL,Likes,Comments,Description,Song Name,Followers
0,https://www.instagram.com/abhishekothari/,7446.0,6094.0,Bombae💋 Loved by Millions🙌🏻🙈 #teamhk Managed b...,SOAAADD12AB018A9DD,69310.0
1,https://www.instagram.com/an_anandrk/,18569.5,9131.5,#PyaarMangdi official video👇🏻👇🏻,SOAAADE12A6D4F80CC,120502.0


## Engagement Rate

In [63]:
df['ER'] = 100*(df['Likes'] + df['Comments'])/df['Followers']
df.drop(['Likes', 'Comments', 'Description'], axis=1, inplace = True)
df.head(2)

Unnamed: 0,Profile URL,Song Name,Followers,ER
0,https://www.instagram.com/abhishekothari/,SOAAADD12AB018A9DD,69310.0,19.535421
1,https://www.instagram.com/an_anandrk/,SOAAADE12A6D4F80CC,120502.0,22.988


## sort by follower, ER

In [68]:
df = df.sort_values(["Followers", "ER"], ascending = (False, False) )
df.head(5)

Unnamed: 0,Profile URL,Song Name,Followers,ER
38,https://www.instagram.com/iamkasturi/,SOAAALJ12AB01828B4,150984.0,8.960022
14,https://www.instagram.com/iamkasturi/,SOAAADE12A6D4F80CC,150984.0,2.628566
40,https://www.instagram.com/sahilart/,SOAAADD12AB018A9DD,139664.0,1.465132
16,https://www.instagram.com/sahilart/,SOAAADZ12A8C1334FB,139664.0,0.91582
1,https://www.instagram.com/an_anandrk/,SOAAADE12A6D4F80CC,120502.0,22.988


## Implement

* create map from user to songs

In [75]:
user_to_songs = {}
length = df.shape[0]
for i in range(length):
    user = df.iloc[i]['Profile URL']
    song = df.iloc[i]['Song Name']
    
    if user in user_to_songs: 
        user_to_songs[user].add(song) 
    else: 
        user_to_songs[user] = set([song])
        pass
    pass

In [76]:
user_to_songs['https://www.instagram.com/abhishekothari/']

{'SOAAADD12AB018A9DD', 'SOAAALJ12AB01828B4'}

### Method 1:

* solely on popularity of influencer
* Assumptions : 
    1. Celebrity with more follower is more influential
    2. same follower => compare ER
    3. Even a single Celebrity with more performance can trigger a trend

In [92]:
def recommend1(user):
    recommended_songs = []
    for song in df[df['Followers']>=80000]['Song Name']:
        
        #just limit ourselves to 10 songs
        if len(recommended_songs)==10:
            break
        
        if not song in user_to_songs[user]:
            if not song in recommended_songs:
                recommended_songs.append(song)
        
    return recommended_songs

In [80]:
print(recommend1('https://www.instagram.com/an_anandrk/') )

['SOAAALJ12AB01828B4', 'SOAAADD12AB018A9DD', 'SOAAADZ12A8C1334FB', 'SOAAAGQ12A8C1420C8', 'SOAAAGO12A67AE0A0E', 'SOAAAKE12A8C1397E9', 'SOAABYG12AB01876F4', 'SOAAAGN12AB017D672', 'SOAAAGK12AB0189572', 'SOAAADF12A8C13DF62']


In [83]:
print(recommend1('https://www.instagram.com/abhishekothari/') )

['SOAAADE12A6D4F80CC', 'SOAAADZ12A8C1334FB', 'SOAAAFI12A6D4F9C66', 'SOAAAGQ12A8C1420C8', 'SOAAAGO12A67AE0A0E', 'SOAAAKE12A8C1397E9', 'SOAABYG12AB01876F4', 'SOAAAGN12AB017D672', 'SOAAAGK12AB0189572', 'SOAAADF12A8C13DF62']


In [93]:
print(recommend1('https://www.instagram.com/abhishekothari/') )

['SOAAADE12A6D4F80CC', 'SOAAADZ12A8C1334FB', 'SOAAAFI12A6D4F9C66', 'SOAAAGQ12A8C1420C8', 'SOAAAGO12A67AE0A0E', 'SOAAAKE12A8C1397E9', 'SOAABYG12AB01876F4', 'SOAAAGN12AB017D672', 'SOAAAGK12AB0189572', 'SOAAADF12A8C13DF62']


### Method 2:

* based on no. of celebrities that have used the song as reel
* Assumption:
    1. define celebrity: influencer with follower count > 80k
    2. Celebrities tend to follow the trends
    3. more celebrities using the song, more likely it to be trending
    4. more trending is a song, more useful for influencer

In [95]:
dic = {}

for song in df[df['Followers']>=80000]['Song Name']:
    
    if song in dic: 
        dic[song]+=1 
    else: 
        dic[song]=1
        pass
    
#create dictionary to count celebrities used a particular song

In [97]:
songs_ordered = dict(sorted(dic.items(), key=lambda item: item[1], reverse=True))
songs_ordered

{'SOAAAKE12A8C1397E9': 6,
 'SOAAADE12A6D4F80CC': 5,
 'SOAAALJ12AB01828B4': 4,
 'SOAAADD12AB018A9DD': 3,
 'SOAAAGQ12A8C1420C8': 3,
 'SOAAAGO12A67AE0A0E': 3,
 'SOAAAGK12AB0189572': 2,
 'SOAAAGP12A6D4F7D1C': 2,
 'SOAAADZ12A8C1334FB': 1,
 'SOAAAFI12A6D4F9C66': 1,
 'SOAABYG12AB01876F4': 1,
 'SOAAAGN12AB017D672': 1,
 'SOAAADF12A8C13DF62': 1}

In [98]:
def recommend2(user):
    recommended_songs = []
    for song in songs_ordered.keys():
        
        #just limit ourselves to 10 songs
        if len(recommended_songs)==10:
            break
        
        if not song in user_to_songs[user]:
            if not song in recommended_songs:
                recommended_songs.append(song)
                
    return recommended_songs

In [99]:
print(recommend2('https://www.instagram.com/an_anandrk/') )

['SOAAAKE12A8C1397E9', 'SOAAALJ12AB01828B4', 'SOAAADD12AB018A9DD', 'SOAAAGQ12A8C1420C8', 'SOAAAGO12A67AE0A0E', 'SOAAAGK12AB0189572', 'SOAAAGP12A6D4F7D1C', 'SOAAADZ12A8C1334FB', 'SOAABYG12AB01876F4', 'SOAAAGN12AB017D672']


In [100]:
print(recommend2('https://www.instagram.com/abhishekothari/') )

['SOAAAKE12A8C1397E9', 'SOAAADE12A6D4F80CC', 'SOAAAGQ12A8C1420C8', 'SOAAAGO12A67AE0A0E', 'SOAAAGK12AB0189572', 'SOAAAGP12A6D4F7D1C', 'SOAAADZ12A8C1334FB', 'SOAAAFI12A6D4F9C66', 'SOAABYG12AB01876F4', 'SOAAAGN12AB017D672']
