# Project Functionality

In [11]:
import pickle
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from bokeh.plotting import figure, output_notebook, show

output_notebook()

from ipywidgets import interact

In [2]:
supported_genres = {}

supported_genres['pop'] = pickle.load(open('model_cache/pop_model.p', 'rb'))
supported_genres['country'] = pickle.load(open('model_cache/country_model.p', 'rb'))
supported_genres['other'] = pickle.load(open('model_cache/other_model.p', 'rb'))

In [3]:
genre = input("What genre is your music? ")
while genre not in supported_genres:
    print(f"That genre is not currently supported. Please choose from {', '.join(supported_genres[:-1])}, or {supported_genres[-1]}.")
    genre = input("What genre is your music? ")

What genre is your music? other


In [4]:
date_string = input("Please enter the date that your song was released (MM-DD-YYYY): ")
release_date = datetime.strptime(date_string, '%m-%d-%Y')

Please enter the date that your song was released (MM-DD-YYYY): 05-31-2020


In [5]:
def predictions(release_date, num_tweets=10, avg_char=40, t_rt_ratio=0.5, avg_freq=2):

    time_since_release = (datetime.now() - release_date).days // 7
    total_chars = num_tweets * avg_char
    num_t_plus_rt = (num_tweets / t_rt_ratio)
    num_rts = num_t_plus_rt - num_tweets

    X = [[time_since_release, num_tweets, total_chars, avg_char, num_rts, t_rt_ratio, avg_freq]]
    return(supported_genres[genre].predict(X)[0])

In [17]:
tweets_range = range(10, 110, 10)
char_range = range(20, 81, 10)
ratio_range = [0.1 * i for i in range(1, 10)]
freq_range = [0.5 * i for i in range(1, 7)]

pred_cache = {}

max_so_far = -10000
target_tweets, target_char, target_ratio, target_freq = 0, 0, 0, 0
for tweets in tweets_range:
    for char in char_range:
        for ratio in ratio_range:
            ratio = round(ratio, 1)
            for freq in freq_range:
                curr = predictions(release_date, tweets, char, ratio, freq)
                pred_cache[(tweets, char, ratio, freq)] = curr
                if curr > max_so_far:
                    max_so_far = curr
                    target_tweets, target_char, target_ratio, target_freq = tweets, char, ratio, freq

def make_plot(char = target_char, ratio = target_ratio, freq = target_freq):
    x = []
    y = []
    for i in range(1, 11):
        num_tweets = i * 10
        x.append(num_tweets)
        y.append(pred_cache[(num_tweets, char, ratio, freq)])
    
    p = figure(plot_width = 550, plot_height = 350)
    p.xaxis[0].axis_label = 'Number of Tweets'
    p.yaxis[0].axis_label = 'Popularity'
    p.circle(x, y, color = 'olivedrab', fill_color = 'white', size = 6)
    p.line(x, y, line_color = 'olivedrab', line_width = 2)
    show(p)
    
print("Optimal Parameters:")
print(f"\tNumber of tweets (per week): {target_tweets}")
print(f"\tCharacters per tweet: {target_char}")
print(f"\tRatio of tweets to retweets: {target_ratio}")
print(f"\tAverage frequency (tweets per day): {target_freq}")

f = interact(make_plot, ratio = (0.1, 0.9), char = (20, 80, 10), freq = (0.5, 3, 0.5))

Optimal Parameters:
	Number of tweets (per week): 70
	Characters per tweet: 40
	Ratio of tweets to retweets: 0.2
	Average frequency (tweets per day): 1.5


interactive(children=(IntSlider(value=40, description='char', max=80, min=20, step=10), FloatSlider(value=0.2,…

# Project Description

### Data Ingestion Sources:
#### Rolling Stone Trending 25 Charts (built my own web scraper)

<p> These charts listed the percentage growth for the 25 artists with the largest percentage growth in song popularity over the previous week. In weeks where an artist did not appear on the charts, I assigned a growth of 1 less than the minimum growth listed that week. </p>
         
#### Spotify API

<p>  </p>

#### Twitter API

### Model Description

Features (for a single week data point):
- Number of original tweets by the user
- Number of retweets by the user
- Ratio of original tweets to retweets (tweets / (tweets + retweets))
- Average number of characters per original tweet
- Number of weeks since release of song
- Average frequency of original tweets by the user in units of tweets per day

In [77]:
# TODO: Complete project description

Genre selection: A word cloud representing the frequency of Spotify-provided genres across all artists in my dataset

![title](images/wordcloud.png)

In [None]:
# TODO: Improve image (remove duplicates)