### Importing Libraries

In [139]:
import numpy as np
import pandas as pd

### Data Import

In [140]:
df0 = pd.read_json("data/MyData - 24 Oct 2023/StreamingHistory0.json")
df1 = pd.read_json("data/MyData - 24 Oct 2023/StreamingHistory1.json")
df2 = pd.read_json("data/MyData - 24 Oct 2023/StreamingHistory2.json")

df = pd.concat([df0, df1, df2], ignore_index = True)
del df0, df1, df2

print(f"Data import successful: {len(df)} rows, {len(df.columns)} columns")

Data import successful: 21254 rows, 4 columns


### Data Timeframe

In [141]:
from datetime import datetime

def better_date(date):
    date_obj = datetime.strptime(date, "%Y-%m-%d")
    return date_obj.strftime("%d %B %Y")

print(f"Oldest date: {better_date(min(df["endTime"].tolist())[0:10])}\nMost recent date: {better_date(max(df["endTime"].tolist())[0:10])}")

Oldest date: 01 October 2022
Most recent date: 22 October 2023


### Top 10 Tracks

In [142]:
top_tracks = df.groupby(["artistName", "trackName"]).agg(time_played_min = ("msPlayed", "sum")).reset_index().sort_values("time_played_min", ascending = False)

to_omit = ["Heavy Thunder and Rain Sounds 8 Hours | Thunderstorm Sounds for Study and Sleep",
           "Sleep Sounds Rain & Thunderstorm White Noise 8 Hours | Fall Asleep with Rainstorm Sound Masking",
           "Long Rumbling Thunder - 10 hours",
           "Tropical Beach | Relaxing Ocean Sounds 8 Hours",
           "#1847 - Theo Von", "#1836 - Ryan Holiday"]

top_tracks = top_tracks[~ top_tracks["trackName"].isin(to_omit)]
top_tracks["time_played_min"] = round(top_tracks["time_played_min"] / 1000 / 60, 2)

top_tracks.head(n = 10).reset_index(drop = True).rename(columns = {"artistName": "Artist Name", "trackName": "Track Name", "time_played_min": "Minutes Played"})

Unnamed: 0,Artist Name,Track Name,Minutes Played
0,Radiohead,Let Down - Remastered,283.21
1,Ethel Cain,American Teenager,229.07
2,Carly Rae Jepsen,The Loneliest Time (feat. Rufus Wainwright),219.92
3,Julian Casablancas,River of Brakelights,211.01
4,The Strokes,Life Is Simple in the Moonlight,194.55
5,Radiohead,Jigsaw Falling Into Place,189.38
6,Weyes Blood,Children of the Empire,181.84
7,Ethel Cain,A House In Nebraska,179.8
8,Cigarettes After Sex,Apocalypse,176.09
9,Ethel Cain,Thoroughfare,165.9


### Top 10 Artists

In [143]:
top_artists = df.groupby("artistName").agg(min_played = ("msPlayed", "sum")).reset_index().sort_values("min_played", ascending = False).reset_index(drop = True)

top_artists["min_played"] = round(top_artists["min_played"] / 1000 / 60, 2)

to_omit = ["Wet Jeans", "No Laying Up - Golf Podcast", "Binchtopia", "History That Doesn't Suck",
           "HBO's The Last of Us Podcast", "The Analytics Power Hour", "S-Town",
           "Last Podcast On The Left", "Two Hot Takes", "The Shotgun Start", "DataFramed", "SmartLess",
           "Relaxing White Noise"]

top_artists = top_artists[(~ top_artists["artistName"].isin(to_omit)) & (top_artists["min_played"] >= 10)]

top_artists.head(n = 10).reset_index(drop = True).rename(columns = {"artistName": "Artist Name", "min_played": "Minutes Played"})

Unnamed: 0,Artist Name,Minutes Played
0,The Strokes,3221.39
1,Radiohead,3175.52
2,Taylor Swift,2234.47
3,Ethel Cain,1720.6
4,Weyes Blood,1675.3
5,Cigarettes After Sex,1369.78
6,Travis Scott,1107.8
7,Carly Rae Jepsen,729.77
8,Lana Del Rey,677.91
9,The Voidz,665.84


### Which months did I listen to the most music?

In [144]:
def get_month(ts):
    dt_obj = datetime.strptime(ts, "%Y-%m-%d %H:%M")
    return dt_obj.strftime("%B")

df["month"] = [get_month(x) for x in df["endTime"].tolist()]
df["month"] = pd.Categorical(df["month"], categories = ["January", "February", "March", "April", "May", "June",
                                                        "July", "August", "September", "October", "November", "December"])

df.groupby("month", observed = False).agg(min_played = ("msPlayed", "sum")).reset_index().assign(min_played = lambda x: round(x["min_played"] / 1000 / 60, 2)).sort_values("min_played", ascending = False).reset_index(drop = True)

Unnamed: 0,month,min_played
0,August,5075.66
1,May,4683.3
2,September,4494.58
3,July,4291.78
4,January,4201.04
5,April,4189.81
6,December,4027.46
7,October,3752.71
8,June,3731.13
9,March,3611.68
