Sources
* https://jhrcook.github.io/spotify-data-analysis/10_005_analyzing-my-streaming-history.html
* https://strftime.org/

In [None]:
# %pip install pandas numpy matplotlib seaborn rapidfuzz statsmodels

In [None]:
import os
from typing import cast, Literal
from pprint import pp
from datetime import datetime

import numpy as np
import pandas as pd
from matplotlib.ticker import MultipleLocator, StrMethodFormatter

import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option("display.expand_frame_repr", False)

from preprocessing import read_file, extract_filtered, SongAttributes
from utils.fuzzy_searchers import *
from utils.plots import Plots
from utils.filters import Filters
from utils.extract_from_timestamp import * 
from utils.smoothen import *
from utils.series_textwrap import *

In [None]:
# extract the "Spotify Extended Streaming History.zip" in a folder named data, sibling to this script

# data_folder = os.path.join(os.path.abspath(''), "niharika")
data_folder = os.path.join(os.path.abspath(''), "kirtika")
# data_folder = os.path.join(os.path.abspath(''), "prithika")
print(data_folder)
# only reading Audio History files
files = [os.path.join(data_folder, file) for file in os.listdir(data_folder) if file.startswith("Streaming_History_Audio")]
songs = [song for file in files for song in read_file(file)]

In [None]:
filters: list[SongAttributes] = ["ts", "ms_played", "master_metadata_track_name", "master_metadata_album_artist_name", "master_metadata_album_album_name", "spotify_track_uri"]
filtered = [extract_filtered(song, filters) for song in songs]

In [None]:
df = pd.DataFrame(filtered)
df["ts"] = pd.to_datetime(df["ts"])
df.to_json("df.json", orient="records")

In [None]:
fig, ax = plt.subplots(2,1, figsize=(10,5))
fig.suptitle("KDE Distributions for all tracks", fontsize=23)
Plots.track_playtime_kde_dist(df, ax[0], ax[1])
plt.tight_layout()
plt.show()

In [None]:
# artists vs songs listend each month
# number of artists listend each month
copy = df[["ts","master_metadata_album_artist_name", "master_metadata_track_name"]].copy(True)
# copy["month-year"] = copy["ts"].apply(lambda ts: datetime.strftime(ts, "%b %Y"))
copy["month"] = copy["ts"].dt.to_period("M")
copy.sort_values("ts", inplace=True, ascending=True)

monthwise_unique_artists_count = copy.groupby("month")["master_metadata_album_artist_name"].nunique()
monthwise_unique_track_count = copy.groupby("month")["master_metadata_track_name"].nunique()
aggr = pd.concat([monthwise_unique_artists_count, monthwise_unique_track_count], axis=1)
aggr["month-year"] = aggr.index.strftime("%b %Y")
aggr.rename({"master_metadata_album_artist_name":"Artist Count", "master_metadata_track_name":"Track Count"}, inplace=True, axis=1)



y1 = aggr["Artist Count"]
y2 = aggr["Track Count"]

fig, ax = plt.subplots(1, 1, figsize=(10, 4))
ax.axhline(y1.max(), color="#5EABD6", alpha=0.5, linestyle='--')
ax.axhline(y2.max(), color="#EF5A6F", alpha=0.5, linestyle='--')

sns.lineplot(
    x=aggr["month-year"],
    y=y1,
    label=["Artist Count"],
    color="#5EABD6",
    marker=".",
    ax=ax
)
sns.lineplot(
    x=aggr["month-year"],
    y=y2,
    label=["Track Count"],
    color="#EF5A6F",
    marker=".",
    ax=ax
)
dates  = copy["ts"].apply(lambda ts: datetime.strftime(ts, "%Y-%m-%d"))
min_date = dates.min()
max_date = dates.max()

date_ticks = pd.date_range(start=min_date, end=max_date, periods=7, inclusive="both")
ax.set_xticks(date_ticks.strftime("%b %Y"), labels=[d.strftime("%b %Y") for d in date_ticks])
ax.legend(
    loc="upper left",
)
sns.despine()

plt.xlabel("")
plt.ylabel("")
plt.title("Variablilty in artists and tracks")
plt.tight_layout()
plt.show()

In [None]:
copy = df.copy(True)

In [None]:
copy["month"] = pd.to_datetime(copy["ts"]).apply(lambda ts: datetime.strftime(ts,"%b"))
may = copy[copy["month"] == "May"].copy(True)

In [None]:
may["date"] = pd.to_datetime(copy["ts"]).apply(lambda ts: datetime.strftime(ts,"%d"))

In [None]:
grouped = may.groupby("date").count().sort_values('ts', ascending=False)
grouped["ts"]

In [None]:
len(df[df["ms_played"] > 0])