In [379]:
import os
import requests
import pandas as pd
import csv
import time
import datetime
import dateutil
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta, FR
import json
from pathlib import Path
from difflib import SequenceMatcher
import operator
import re
from functools import reduce

import plotly 
import plotly.plotly as py
import plotly.graph_objs as go

from numpy import arange,array,ones
from scipy import stats
import spacy
from spacy_langdetect import LanguageDetector
from langdetect import detect_langs
nlp = spacy.load('en')
nlp.add_pipe(LanguageDetector(), name='language_detector', last=True)
with open('keys.json', 'r') as fp:
    our_keys = json.load(fp)
key = our_keys["musixmatch_key"]
plotly_key = our_keys["plotly_key"]
plotly.tools.set_credentials_file(username = 'jagluck',
                                  api_key = plotly_key)

In [412]:
#save songs
def save_songs(songs):
    if songs != {}:
        with open('data/songs.json', 'w') as fp:
            json.dump(songs, fp)
        
def load_songs():
    #load songs
    with open('data/songs.json', 'r') as fp:
        songs = json.load(fp)
        
    return songs
       
#compare similarity of two strings
def similar(a, b):
    sep = "feat"
    arest = a.split(sep, 1)[0]
    brest = b.split(sep, 1)[0]
    return SequenceMatcher(None, arest, brest).ratio()

def get_lang(text):
    doc = nlp(text)
    # document level language detection. Think of it like average language of the document!
    if (doc._.language['score'] > .9):
        langs = detect_langs(text)
        if (langs[0].lang == doc._.language['language']):
            if (langs[0].prob > .9):
                return doc._.language['language']

  
    return 'X'

#get language/lyric information for a single track
def get_track(song, artist):
    songs = load_songs()
    search_url = 'http://api.musixmatch.com/ws/1.1/track.search'
    params = {'q_track' : song, 'page_size' : '30', 'page' : '1', 's_track_rating' : 'desc', 'apikey': key}
    resp = requests.get(search_url, params=params)
    message = json.loads(resp.text)['message']
    
    try_count = 0
    while (message["header"]["status_code"] != 200):
        time.sleep(.5)  
        resp = requests.get(search_url, params=params)
        message = json.loads(resp.text)['message']
        if (try_count > 5):
            break
        try_count = try_count + 1
        
    if (try_count > 5):
        id = (song + " by " + artist)
        song_info = {}
        song_info['song'] = song
        song_info['artist'] = artist
        song_info['lyrics_body'] = "no lyrics"
        lg = "X"
        song_info['lyrics_language'] = lg
        songs[id] = song_info
    else:
        tracks = json.loads(resp.text)['message']['body']['track_list']

        artist_results = {}

        index = 0
        for track in tracks:
            track_id = track['track']['track_id']
            artist_results[index] = similar(track['track']['artist_name'], artist)
            index = index + 1

        sorted_index = sorted(artist_results.items(), key=operator.itemgetter(1), reverse=True)

        if ((len(sorted_index) > 0) and (sorted_index[0][1] > .6)):
            winningIndex = sorted_index[0][0]
            print(tracks[winningIndex]['track'])
            track_id = tracks[winningIndex]['track']['track_id']
            commontrack_id = tracks[winningIndex]['track']['commontrack_id']
            print(track_id)

            search_url = 'http://api.musixmatch.com/ws/1.1/track.lyrics.get'
            params = {'track_id' : track_id, "commontrack_id" : commontrack_id, 'apikey': key}
            resp = requests.get(search_url, params=params)

            body = json.loads(resp.text)['message']['body']
            if ('lyrics' in body):
                lyrics = body['lyrics']
                lyrics_body  = lyrics['lyrics_body']
    #             lyrics_language = lyrics['lyrics_language']

                id = (song + " by " + artist)
                song_info = {}
                song_info['song'] = song
                song_info['artist'] = artist
                song_info['lyrics_body'] = lyrics['lyrics_body'].replace('\n', ' ')[:-59]
    #             song_info['lyrics_language'] = lyrics['lyrics_language']
                song_info['lyrics_language'] = get_lang(song_info['lyrics_body'])
                songs[id] = song_info
            else:
                print("empty resp")
                id = (song + " by " + artist)
                song_info = {}
                song_info['song'] = song
                song_info['artist'] = artist
                song_info['lyrics_body'] = "no lyrics"
                lg = "X"
                song_info['lyrics_language'] = lg
                songs[id] = song_info
        else:
            id = (song + " by " + artist)
            song_info = {}
            song_info['song'] = song
            song_info['artist'] = artist
            song_info['lyrics_body'] = "no lyrics"
            lg = "X"
            song_info['lyrics_language'] = lg
            songs[id] = song_info
        
    save_songs(songs)
       
#go through every song in a file
def get_songs(df, fileName):
    songs = load_songs()
    languages = []
    lyrics = []
    for songTitle, artist in zip(df['Track Name'], df['Artist']):
        songTitle = str(songTitle)
        id = (songTitle + ' by ' + artist) 
#         print(id)
        #if we do not alreay have the songs language and info archived, then query for it
        if id not in songs:
            get_track(songTitle, artist)
#         else:
#             print("already have")
        songs = load_songs()
        songInfo = songs[id]
        lyrics.append(songInfo['lyrics_body'])
        languages.append(songInfo['lyrics_language'])    
    #add new info to dataframe
    df['language'] = languages
    df['lyrics'] = lyrics
    
    df.to_csv(fileName)
    
#create urls and download files for a country
def download_charts(country):
    
    #find last friday date
    last_friday= datetime.now() + relativedelta(weekday=FR(-1))

    if (datetime.now().strftime('%Y-%m-%d') == last_friday.strftime('%Y-%m-%d')):
        last_friday = datetime.now() + relativedelta(weekday=FR(-1))

    files = []
    urls = []
    newer_date = last_friday
    older_date = newer_date - timedelta(days=7)
    oldest_date = datetime(2016, 12, 23) 
    #create uls by counting back a friday at a time
    while (older_date > oldest_date): 
        newer_str = newer_date.strftime('%Y-%m-%d')
        older_str = older_date.strftime('%Y-%m-%d')
        newer_date = older_date
        older_date = newer_date - timedelta(days=7)
        url = 'https://spotifycharts.com/regional/' + country + '/weekly/' + older_str + '--' + newer_str + '/download'
        file_name = "data/" + country + "/" + country + "_" + older_str + '--' + newer_str + '.csv'
#         print(url)
#         print(file_name)
        urls.append(url)
        files.append(file_name)
        
    #if we do not already have the file download it
    for file, url in zip(files,urls):
        my_file = Path(file)
        if not my_file.is_file():
            resp = requests.get(url)
            
            # if the directory does not exist create it
            directory = os.path.dirname(my_file)
            if not os.path.exists(directory):
                os.makedirs(directory)

            with open(file, 'w') as f:
                writer = csv.writer(f)
                reader = csv.reader(resp.text.splitlines())

                for row in reader:
                    writer.writerow(row)
                
    return files

#take file and add language/lyric info if needed
def add_language(files):

    for file in files:
        #load and reformat this file
#         print(file)
        try:
            df = pd.read_csv(file,skiprows=[0])
            if (list(df) != ['Position', 'Track Name', 'Artist', 'Streams', 'URL']):
                df = pd.read_csv(file,skiprows=[])
                if 'Unnamed: 0' in list(df):
                    df = df.drop('Unnamed: 0', axis=1)


            df.Artist = df.Artist.astype(str)

            #use api and add language/lyrics only if you do not already have them
            if ('language' not in list(df)):
                get_songs(df, file) 
                songs = load_songs()
                print(len(songs))
  
        except:
            pass
        
            
        
def get_data(country):
       
    #download chart files
    files = download_charts(country)   
    
    #add language information to files
    add_language(files)

    #analyze language for every week
    country = []
    week = []
    english_percent = []
    german_percent = []
    spanish_percent = []
    songs_found = []

    for file in files:
        try:
            df = pd.read_csv(file,skiprows=[0])
            if (list(df) != ['Position', 'Track Name', 'Artist', 'Streams', 'URL']):
                df = pd.read_csv(file,skiprows=[])
                if 'Unnamed: 0' in list(df):
                    df = df.drop('Unnamed: 0', axis=1)


            df.Artist = df.Artist.astype(str)
            lang = {}
            total = 0
            for l in df['language']:
                if l is not "X":
                    total = total + 1
                if l in lang:
                    lang[l] = lang[l] + 1
                else:
                    lang[l] = 1
            eng = 0
            if "en" in lang:
                eng = lang['en']
            esp = 0
            if "es" in lang:
                esp = lang['es']
            ger = 0
            if "de" in lang:
                ger = lang['de']
            english_percent.append(round((eng/total),2))
            spanish_percent.append(round((esp/total),2))
            german_percent.append(round((ger/total),2))
            country.append(file[5:7])
            week.append(file[-14:][0:10])
            songs_found.append(total)
        except:
            print(file)

    data = pd.DataFrame(
            {'country': country,
             'week': week,
             "english_percent": english_percent,
             "spanish_percent": spanish_percent,
             "german_percent": german_percent,
             "total_songs_found" : songs_found
            })
    
    return data

In [413]:
us = get_data('us')
us.sort_values(by=['week'], ascending=False)
us.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,us,2019-07-19,0.95,0.05,0.0,149
1,us,2019-07-12,0.95,0.05,0.0,148
2,us,2019-07-05,0.91,0.09,0.0,162
3,us,2019-06-28,0.96,0.04,0.0,163
4,us,2019-06-21,0.96,0.04,0.0,165
5,us,2019-06-14,0.96,0.04,0.0,166
6,us,2019-06-07,0.96,0.04,0.0,166
7,us,2019-05-31,0.97,0.03,0.0,165
8,us,2019-05-24,0.97,0.03,0.0,161
9,us,2019-05-17,0.97,0.03,0.0,163


In [None]:
do = get_data('do')
do.sort_values(by=['week'], ascending=False)
do.head(200)

In [415]:
mx = get_data('mx')
mx.sort_values(by=['week'], ascending=False)
mx.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,mx,2019-07-19,0.18,0.82,0.0,175
1,mx,2019-07-12,0.17,0.83,0.0,176
2,mx,2019-07-05,0.18,0.82,0.0,179
3,mx,2019-06-28,0.21,0.79,0.0,177
4,mx,2019-06-21,0.20,0.80,0.0,177
5,mx,2019-06-14,0.22,0.78,0.0,176
6,mx,2019-06-07,0.21,0.79,0.0,178
7,mx,2019-05-31,0.19,0.81,0.0,176
8,mx,2019-05-24,0.22,0.78,0.0,174
9,mx,2019-05-17,0.21,0.79,0.0,175


In [416]:
cr = get_data('cr')
cr.sort_values(by=['week'], ascending=False)
cr.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,cr,2019-07-19,0.29,0.71,0.00,165
1,cr,2019-07-12,0.31,0.69,0.00,170
2,cr,2019-07-05,0.30,0.70,0.00,171
3,cr,2019-06-28,0.31,0.69,0.00,172
4,cr,2019-06-21,0.30,0.70,0.00,171
5,cr,2019-06-14,0.30,0.70,0.00,171
6,cr,2019-06-07,0.30,0.70,0.00,172
7,cr,2019-05-31,0.29,0.71,0.00,171
8,cr,2019-05-24,0.30,0.70,0.00,174
9,cr,2019-05-17,0.33,0.67,0.00,172


In [417]:
ar = get_data('ar')
ar.sort_values(by=['week'], ascending=False)
ar.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,ar,2019-07-19,0.14,0.86,0.0,162
1,ar,2019-07-12,0.14,0.85,0.0,162
2,ar,2019-07-05,0.14,0.86,0.0,166
3,ar,2019-06-28,0.17,0.83,0.0,167
4,ar,2019-06-21,0.15,0.85,0.0,169
5,ar,2019-06-14,0.14,0.86,0.0,168
6,ar,2019-06-07,0.16,0.84,0.0,168
7,ar,2019-05-31,0.15,0.85,0.0,168
8,ar,2019-05-24,0.16,0.84,0.0,167
9,ar,2019-05-17,0.17,0.83,0.0,167


In [418]:
gt = get_data('gt')
gt.sort_values(by=['week'], ascending=False)
gt.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,gt,2019-07-19,0.15,0.85,0.0,175
1,gt,2019-07-12,0.15,0.85,0.0,176
2,gt,2019-07-05,0.15,0.85,0.0,177
3,gt,2019-06-28,0.18,0.82,0.0,177
4,gt,2019-06-21,0.18,0.82,0.0,174
5,gt,2019-06-14,0.20,0.80,0.0,172
6,gt,2019-06-07,0.17,0.83,0.0,173
7,gt,2019-05-31,0.19,0.81,0.0,172
8,gt,2019-05-24,0.19,0.81,0.0,175
9,gt,2019-05-17,0.20,0.80,0.0,175


In [419]:
co = get_data('co')
co.sort_values(by=['week'], ascending=False)
co.head(200)

data/co/co_2017-02-24--2017-03-03.csv


Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,co,2019-07-19,0.17,0.83,0.0,168
1,co,2019-07-12,0.15,0.85,0.0,168
2,co,2019-07-05,0.12,0.88,0.0,168
3,co,2019-06-28,0.14,0.86,0.0,167
4,co,2019-06-21,0.15,0.85,0.0,169
5,co,2019-06-14,0.15,0.85,0.0,170
6,co,2019-06-07,0.13,0.87,0.0,168
7,co,2019-05-31,0.16,0.84,0.0,167
8,co,2019-05-24,0.17,0.83,0.0,165
9,co,2019-05-17,0.18,0.82,0.0,169


In [420]:
es = get_data('es')
es.sort_values(by=['week'], ascending=False)
es.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,es,2019-07-19,0.13,0.86,0.0,170
1,es,2019-07-12,0.13,0.86,0.0,173
2,es,2019-07-05,0.14,0.85,0.0,177
3,es,2019-06-28,0.14,0.85,0.0,172
4,es,2019-06-21,0.15,0.84,0.0,174
5,es,2019-06-14,0.16,0.84,0.0,172
6,es,2019-06-07,0.18,0.81,0.0,172
7,es,2019-05-31,0.18,0.81,0.0,173
8,es,2019-05-24,0.21,0.79,0.0,170
9,es,2019-05-17,0.20,0.80,0.0,171


In [421]:
de = get_data('de')
de.sort_values(by=['week'], ascending=False)
de.head(200)

Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,de,2019-07-19,0.42,0.03,0.54,117
1,de,2019-07-12,0.48,0.05,0.47,126
2,de,2019-07-05,0.45,0.05,0.49,130
3,de,2019-06-28,0.44,0.04,0.51,138
4,de,2019-06-21,0.43,0.04,0.52,136
5,de,2019-06-14,0.48,0.03,0.49,134
6,de,2019-06-07,0.49,0.03,0.47,138
7,de,2019-05-31,0.46,0.02,0.51,139
8,de,2019-05-24,0.49,0.02,0.48,138
9,de,2019-05-17,0.55,0.02,0.43,133


In [422]:
glbl = get_data('global')
glbl.sort_values(by=['week'], ascending=False)
glbl.head(200)

data/global/global_2017-06-02--2017-06-09.csv
data/global/global_2017-05-26--2017-06-02.csv


Unnamed: 0,country,week,english_percent,spanish_percent,german_percent,total_songs_found
0,gl,2019-07-19,0.79,0.18,0.01,157
1,gl,2019-07-12,0.79,0.18,0.01,163
2,gl,2019-07-05,0.76,0.23,0.01,164
3,gl,2019-06-28,0.81,0.18,0.01,168
4,gl,2019-06-21,0.80,0.19,0.01,165
5,gl,2019-06-14,0.78,0.18,0.02,166
6,gl,2019-06-07,0.81,0.18,0.01,167
7,gl,2019-05-31,0.80,0.18,0.01,168
8,gl,2019-05-24,0.82,0.15,0.02,168
9,gl,2019-05-17,0.82,0.18,0.00,170


In [423]:
# takes a list of results for different countries and averages the percents for each week

def get_all_data(countries):
    
    week = []
    english_percent = []
    spanish_percent = []
    german_percent = []
    
     #find last friday date
    last_friday= datetime.now() + relativedelta(weekday=FR(-1))

    if (datetime.now().strftime('%Y-%m-%d') == last_friday.strftime('%Y-%m-%d')):
        last_friday = datetime.now() + relativedelta(weekday=FR(-1))

    newer_date = last_friday
    older_date = newer_date - timedelta(days=7)
    oldest_date = datetime(2016, 12, 23) 
    
    # go back through every week until we reach the end of available data
    while (newer_date > oldest_date):
      
        newer_str = newer_date.strftime('%Y-%m-%d')
        older_str = older_date.strftime('%Y-%m-%d')
        newer_date = older_date
        older_date = newer_date - timedelta(days=7)
        
        week_english_percents = []
        week_spanish_percents = []
        week_german_percents = []
        
        eng_res = []
        sp_res = []
        ger_res = []
        
        # go through each country and add percent for this week to a list
        for c in countries:
            cur_week_row = c.loc[c['week'] == newer_str]
            if (len( cur_week_row) == 1):
                week_english_percents.append(cur_week_row['english_percent'].values)
                week_spanish_percents.append(cur_week_row['spanish_percent'].values)
                week_german_percents.append(cur_week_row['german_percent'].values)
        
        # calculate averages for all countries in this list
        if (len(week_english_percents) > 0):
            eng_res = reduce(lambda x, y: x + y, week_english_percents) / len(week_english_percents)
            sp_res = reduce(lambda x, y: x + y, week_spanish_percents) / len(week_spanish_percents)
            ger_res = reduce(lambda x, y: x + y, week_german_percents) / len(week_german_percents)
        
        if(len(eng_res) == 1):
            week.append(newer_str)
            english_percent.append(eng_res[0])
            spanish_percent.append(sp_res[0])
            german_percent.append(ger_res[0])
            
    # create and return our final data structure    
    data = pd.DataFrame(
        {
         'week': week,
         "english_percent": english_percent,
         "spanish_percent": spanish_percent,
         "german_percent": german_percent,
        })

    return data

In [424]:
all_countries = get_all_data([mx,gt,ar,do,co,es])
all_countries.to_csv("data/all_countries.csv")
all_countries.head(200)

Unnamed: 0,week,english_percent,spanish_percent,german_percent
0,2019-07-19,0.145000,0.853333,0.0
1,2019-07-12,0.140000,0.855000,0.0
2,2019-07-05,0.140000,0.858333,0.0
3,2019-06-28,0.160000,0.838333,0.0
4,2019-06-21,0.155000,0.843333,0.0
5,2019-06-14,0.161667,0.838333,0.0
6,2019-06-07,0.160000,0.838333,0.0
7,2019-05-31,0.166667,0.831667,0.0
8,2019-05-24,0.178333,0.821667,0.0
9,2019-05-17,0.185000,0.815000,0.0


In [394]:
# Create a trace
trace = go.Scatter(
    x = us['week'],
    y = us['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(us))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,us['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=us['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys US weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [395]:
# Create a trace
trace = go.Scatter(
    x = de['week'],
    y = de['spanish_percent'],
    name='Spanish Percent',
)

# Create a trace
trace2 = go.Scatter(
    x = de['week'],
    y = de['german_percent'],
    name='German Percent',
    line = dict(
        color = ('green'),
        width = 2
      )
)

xi = arange(0,len(de))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,de['german_percent'])
line = slope*xi+intercept

trace3 = go.Scatter(
                  x=de['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='German Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2, trace3]

layout = dict(title = 'Percent of Songs in Spotifys Germanys weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [396]:
# Create a trace
trace = go.Scatter(
    x = do['week'],
    y = do['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(do))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,do['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=do['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys Dominican Republic weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [397]:
# Create a trace
trace = go.Scatter(
    x = co['week'],
    y = co['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(co))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,co['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=co['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys Colombia weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [398]:
# Create a trace
trace = go.Scatter(
    x = mx['week'],
    y = mx['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(mx))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,mx['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=mx['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys Mexico weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [399]:
# Create a trace
trace = go.Scatter(
    x = ar['week'],
    y = ar['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(ar))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,ar['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=ar['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys Argentina weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [400]:
# Create a trace
trace = go.Scatter(
    x = gt['week'],
    y = gt['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(gt))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,gt['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=gt['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys Guatemala weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [401]:
# Create a trace
trace = go.Scatter(
    x = es['week'],
    y = es['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(es))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,es['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=es['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spains weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [402]:
# Create a trace
trace = go.Scatter(
    x = glbl['week'],
    y = glbl['spanish_percent'],
    name='Spanish Percent',
)

# Create a trace
trace2 = go.Scatter(
    x = glbl['week'],
    y = glbl['german_percent'],
    name='German Percent',
)

xi = arange(0,len(glbl))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,glbl['spanish_percent'])
line = slope*xi+intercept

trace3 = go.Scatter(
                  x=glbl['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Spanish Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2
                  )
                )

data = [trace, trace2, trace3]

layout = dict(title = 'Percent of Spanish songs in Spotifys Global weekly top 200 Chart',
              xaxis = dict(title = 'Week'),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'


In [425]:
# Create a trace
trace = go.Scatter(
    x = all_countries['week'],
    y = all_countries['spanish_percent'],
    name='Spanish Percent',
)

xi = arange(0,len(all_countries))

# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(xi,all_countries['spanish_percent'])
line = slope*xi+intercept

trace2 = go.Scatter(
                  x=all_countries['week'],
                  y=line,
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Fit',
                  line = dict(
                    color = ('rgb(205, 12, 24)'),
                    width = 2)
                  )

data = [trace, trace2]

layout = dict(title = 'Percent of Spanish songs in Spotifys Combined Spanish Speaking Countries weekly top 200 Chart',
              xaxis = dict(title = ''),
              yaxis = dict(title = 'Percent of Spanish songs', tickformat = '.0%'),
              )

fig = dict(data=data, layout=layout)
py.iplot(figure_or_data=fig, filename='basic-line')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jagluck/0 or inside your plot.ly account where it is named 'basic-line'
