In [2]:
#import spotipy
#from spotipy.oauth2 import SpotifyOAuth
#pip install spotipy --upgrade
#spotify = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id='a6204bb2c7d44e98af9ab2c2d6518a09', client_secret='df3f58e414574e79b104f6e2ce25fd1c', redirect_uri='http://localhost/', scope = "user-read-recently-played"))

In [1]:
import pandas as pd
from matplotlib import pyplot as plt

In [30]:
def get_history(filename):
    """
    filename: (str) the path to the .json file containing the spotify listening history
    loads in the listening history as a pandas dataframe
    """
    df = pd.read_json(filename)
    return df

def date_formatting(df):
    """
    df = (dataframe) the name of the dataframe with the listening history
    adds columns to the dataframe based on the information in the 'endTime' column:
    'Date' contains the year-month-day date that the song was played
    'Time' contains the hour-minute-second time that the song finished playing
    'Month-Year' contains the year-month date that the song was played
    """
    df['Date'] = pd.to_datetime(df['endTime']).dt.date
    df['Time'] = pd.to_datetime(df['endTime']).dt.time
    df['Month-Year'] = pd.to_datetime(df['endTime']).dt.to_period('m')
    return df

def song_play_frequencies(df, dates, date_column_name):
    """
    df = (dataframe) the name of the dataframe with the listening history
    dates = (list) the range of periods to use (days or months)
    date_column_name = (str) the name of the time period column to use ('Date' or 'Month-Year')
    returns a new dataframe with the number of times each song was played within the time period specified (each day or month)
    """
    song_freqs_df = pd.DataFrame()
    for date in dates:
        value_counts_df = df[df[date_column_name]==date][['trackName','artistName']].value_counts().to_frame().reset_index().rename(columns={0:'Number of Plays'})
        value_counts_df[date_column_name] = date
        song_freqs_df = song_freqs_df.append(value_counts_df)
    song_freqs_df.reset_index(drop=True)
    return song_freqs_df

def merge_songs(song_to_merge, original_song):
    """
    song_to_merge = (str) the alternate name for the song (will be replaced)
    original song = (str) the name of the song that you want to display (what to replace with)
    Use if the same song exists in two different versions on Spotify and is being counted separately but you want them to be counted as one
    e.g. the original version and extended version of a song
    """
    df.replace(song_to_merge,original_song,inplace=True)
    return df

def print_top_songs_per_month(df, months, n=5):
    """
    df = (dataframe) the name of the dataframe with the listening history
    months = (list) the range of months to use
    n (optional) = (int) the number of top songs to display for each month. default is set to 5.
    Prints out the top n songs for each month with the name of the song, name of the artist, and the number of times the song was played in that month.
    """
    for month in months:
        months_songs = df[df["Month-Year"] == month]
        freq = months_songs[['trackName','artistName']].value_counts()
        print(month)
        print(freq.head(n))
        print()
        
def top_songs(df, months, n=5):
    """
    df = (dataframe) the name of the dataframe with the listening history
    months = (list) the range of months to use
    n (optional) = (int) the number of top songs to display for each month. default is set to 5.
    Returns a dataframe containing the top n songs for each month with the name of the song, name of the artist, number of times the song was played in the month, and month.
    """
    top_songs_df = pd.DataFrame()
    for month in months:
        months_songs = df[df["Month-Year"] == month]
        freq = months_songs[['trackName','artistName']].value_counts().to_frame().reset_index().rename(columns={0:'Number of Plays'})
        top_n = freq.head(n)
        top_n['Month-Year'] = month
        top_songs_df = top_songs_df.append(top_n)
    return top_songs_df
    

In [3]:
df = get_history('MyData/StreamingHistory0.json')
df = date_formatting(df)
df = merge_songs("MONTERO (Call Me By Your Name) - SATAN'S EXTENDED VERSION",'MONTERO (Call Me By Your Name)')
df

Unnamed: 0,endTime,artistName,trackName,msPlayed,Date,Time,Month-Year
0,2021-01-07 00:57,Fall Out Boy,Thnks fr th Mmrs,73066,2021-01-07,00:57:00,2021-01
1,2021-01-11 16:40,Maccabeats,Mi Yemallel,15978,2021-01-11,16:40:00,2021-01
2,2021-01-14 00:58,Best Instrumentals,Hey There Delilah - Karaoke,232355,2021-01-14,00:58:00,2021-01
3,2021-01-14 01:01,Original Broadway Cast of Hamilton,That Would Be Enough - Instrumental,179141,2021-01-14,01:01:00,2021-01
4,2021-01-14 01:04,ProSound Karaoke Band,A Little Fall of Rain (Karaoke Instrumental Tr...,202867,2021-01-14,01:04:00,2021-01
...,...,...,...,...,...,...,...
3633,2022-01-10 19:09,Panic! At The Disco,Don't Threaten Me with a Good Time,213093,2022-01-10,19:09:00,2022-01
3634,2022-01-10 19:14,Nirvana,Smells Like Teen Spirit,301920,2022-01-10,19:14:00,2022-01
3635,2022-01-10 19:18,Twenty One Pilots,Car Radio,267720,2022-01-10,19:18:00,2022-01
3636,2022-01-10 19:21,Panic! At The Disco,Emperor's New Clothes,158666,2022-01-10,19:21:00,2022-01


In [4]:
top_10_songs = df['trackName'].value_counts()
top_10_artists = df['artistName'].value_counts()
top_10_songs.head(10), top_10_artists.head(10)

(MONTERO (Call Me By Your Name)    102
 good 4 u                           70
 drivers license                    67
 deja vu                            46
 SUN GOES DOWN                      45
 The Night We Met                   43
 Happier Than Ever                  43
 traitor                            42
 Angel With A Shotgun               40
 brutal                             39
 Name: trackName, dtype: int64,
 Lil Nas X              417
 Olivia Rodrigo         393
 Billie Eilish          349
 Twenty One Pilots      129
 Panic! At The Disco     95
 Fall Out Boy            77
 Maccabeats              69
 Cardi B                 65
 Lil Dicky               48
 Andrew Garfield         47
 Name: artistName, dtype: int64)

In [9]:
days = df['Date'].unique()
months = df['Month-Year'].unique()

daily_song_freqs_df = song_play_frequencies(df, days, 'Date')
monthly_song_freqs_df = song_play_frequencies(df, months, 'Month-Year')

In [10]:
songs = daily_song_freqs_df['trackName'].unique()

In [6]:
print_top_songs_per_month(df, months)

2021-01
Angel With A Shotgun      4
The Night We Met          4
drivers license           4
Blueberry Faygo           2
Mood (feat. iann dior)    2
Name: trackName, dtype: int64

2021-02
Prologue                                                    6
That Would Be Enough - Instrumental                         6
Total Eclipse of the Heart - Karaoke                        6
Bohemian Rhapsody (Piano Verison) [Made Famous By Queen]    5
I'll Cover You (In the Style of Rent) [Karaoke Version]     5
Name: trackName, dtype: int64

2021-03
Chad Gadya              7
Yedid Nefesh            7
One Is Hashem           5
Echad Mi Yodea          4
Chad Gadya (One Kid)    4
Name: trackName, dtype: int64

2021-04
MONTERO (Call Me By Your Name)     33
Up                                 10
WAP (feat. Megan Thee Stallion)     9
The Night We Met                    9
SICKO MODE                          8
Name: trackName, dtype: int64

2021-05
good 4 u                          21
MONTERO (Call Me By Your Name

In [32]:
top_songs_df = top_songs(df, months, n=5)
top_songs_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_n['Month-Year'] = month


Unnamed: 0,trackName,artistName,Number of Plays,Month-Year
0,The Night We Met,Lord Huron,4,2021-01
1,Angel With A Shotgun,The Cab,4,2021-01
2,drivers license,Olivia Rodrigo,4,2021-01
3,WITHOUT YOU,The Kid LAROI,2,2021-01
4,YEET,YEET,2,2021-01
...,...,...,...,...
0,Therapy,Andrew Garfield,12,2022-01
1,"30/90 (from ""tick, tick... BOOM!"" Soundtrack f...",Andrew Garfield,7,2022-01
2,Boho Days,Andrew Garfield,5,2022-01
3,Come to Your Senses,Alexandra Shipp,3,2022-01


In [33]:
len(top_songs_df['trackName'].unique())

47

In [15]:
top_per_month_df = monthly_song_freqs_df.groupby(['Month-Year'])['Number of Plays'].max()
top_per_month_df

Month-Year
2021-01     4
2021-02     6
2021-03     4
2021-04    33
2021-05    21
2021-06    20
2021-07    11
2021-08    31
2021-09     9
2021-10     3
2021-11    14
2021-12     8
2022-01    12
Freq: M, Name: Number of Plays, dtype: int64

In [19]:
def make_plots(songs, song_freqs_df, dates, date_column_name):
    plt.figure(figsize=(20,15))

    for song in songs:
        one_song_freqs = song_freqs_df[song_freqs_df['trackName']==song]
        freq_list = []
        for date in dates:
            if (one_song_freqs[date_column_name]==date).any():
                that_day = one_song_freqs[one_song_freqs[date_column_name]==date]
                freq_list.append(int(that_day['Number of Plays']))
            else:
                freq_list.append(0)
        plt.plot(dates, freq_list, label=song)
        plt.legend()
 
    plt.show()
    
make_plots(songs, months, monthly_song_freqs_df, 'Month-Year')

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

<Figure size 1440x1080 with 0 Axes>