**DO NOT BLINDLY RUN ALL IN THIS NOTEBOOK!!**

## Todo
ANALYSIS
- fix `chronology()` and `averages()` in `Analyzer`
- restore most active day (currently broken) in `Analyzer`
- add `sessions()` in `Analyzer` for longest listening session

NEW FEATURES
- matplotlib & GUI libraries for data vis (until more aesthetic things can be found) (**WIP**)
- better skip detection via timestamps of listen & duration of song (in Loader)
- option to store/load database when loading for API call optimization
- add musicbrainz (or another) API to get genre and decade and album and other really good song data (**WIP**)
- analyze user playlists (this will be a little tougher)
- bring back proper pretty generated reports

GENERAL USABILITY
- make `use_duration` mandatory, remove relevant flags
- store `time` as datetime and not string
- fix `merge_jsons()` so that it works in all cases
- fix functions to account for
  - days when music wasn't listened to
  - leap years
  - duplicates (e.g. same songs but different URLs)
  - timezones
- move imports to necessary sections
- add basic info and `input()`s to help any user generate their report

LONG TERM
- understand songs' relation to one another by their sequence (start doing ML using song metadata, essentially)

In [None]:
# imports for the ENTIRE program (not all are necessary for each individual section)
import datetime
import sys
import getopt
import json
import requests
import re
import pandas as pd
import collections
import os
import itertools
import matplotlib.pyplot as plt
import pytz
import itertools
import numpy as np
import urllib.parse
from google.colab import drive
drive.mount('/content/drive')
from IPython.display import HTML, display
import time
import ast
from collections import defaultdict

In [None]:
# force remount if drive connection is broken/not syncing
drive.mount('/content/drive', force_remount=True)

### Loader class

In [None]:
class Loader():
    def __init__(self, use_duration, analyze_years, apikey, filepath, loadfp, ignores={"Title": [], "Artist": [], "URL": []}):
        self.use_duration, self.analyze_years, self.apikey, self.filepath, self.loadfp, self.ignores = use_duration, analyze_years, apikey, filepath, loadfp, ignores
        self.load_reports = filepath != ""
        self.use_loadfp = loadfp != ""
        if self.load:
            self.file = self.open_file(self.filepath)
        else:
            self.file = None
        self.out = display(Loader.progress(0, 100), display_id=True)

    # utility methods
    def should_not_ignore(self, obj):
        if 'subtitles' in obj and obj['header'] == "YouTube Music" and obj['title'][:7] == "Watched" and obj['time'][:4] in self.analyze_years:
            return True
        return False

    @staticmethod
    def open_file(filepath):
        try:
            file = open(filepath, "r", encoding="utf8")
            return file
        except:
            print("There was an error opening your report files")

    # takes two watch history files and merges them together (useful for takeout files which have limited date range)
    # only works if original jsons are already sorted by date and the first argument is chronologically ahead of the second argument and the two jsons overlap
    @staticmethod
    def merge_jsons(file1path, file2path, outputfilepath):
        print("WARN: merge_jsons() was called. Be sure that its criteria are met before using the output of this method!")
        file2 = Loader.open_file(file2path)
        json2 = json.load(file2)
        date, datestr = "", ""
        for obj in json2:
            if 'subtitles' in obj:
                datestr = obj['time'][:-5]
                date = datetime.datetime.strptime(datestr, "%Y-%m-%dT%H:%M:%S")
                break

        combined = ""
        file1 = Loader.open_file(file1path)
        file2 = Loader.open_file(file2path)
        for i in file1.readlines():
            combined += i
            if datestr in i:
                combined+="\"merged\":\"here\"},"
                break

        for c, i in enumerate(file2.readlines()):
            if c == 0:
                i = i[1:]
            combined += i

        new_file = open(outputfilepath, "wt")
        new_file.write(combined)
        new_file.close()

    # processes bulk lists
    def parse_json(self):
        self.history = {"Title": [], "Artist": [], "Year": [], "URL": [], "Duration": []}
        json_object = json.load(self.file)
        for obj in json_object:
            if (self.should_not_ignore(obj)):
                self.history["Title"].append(obj['title'][8:])
                self.history["Artist"].append(obj['subtitles'][0]['name'].replace('- Topic ', '').replace('- Topic', ''))
                self.history["Year"].append(obj['time'])
                self.history["URL"].append(obj['titleUrl'][obj['titleUrl'].index('v=')+2:])
                self.history["Duration"].append(0)

        # preprocessing
        for i in range(len(self.history["Title"])):
            if i >= len(self.history["Title"]):
                break # contingency
            if self.history["Title"][i] in self.ignores["Title"] or self.history["Artist"][i] in self.ignores["Artist"] or self.history["URL"][i] in self.ignores["URL"]:
                del self.history["Title"][i]
                del self.history["Artist"][i]
                del self.history["Year"][i]
                del self.history["URL"][i]
                del self.history["Duration"][i]
                i-= 1

        occurrences = collections.Counter(self.history['URL'])
        self.history['Occurrences'] = []
        for i in self.history['URL']:
            self.history['Occurrences'].append(occurrences[i])

        occurrences = collections.Counter(self.history['Artist'])
        duration = [0]*len(occurrences.keys())
        self.artists = {"Artist": occurrences.keys(), "Occurrences": occurrences.values(), "Duration": duration}

    # generates dataframes and csv files
    def gen_dataframes(self):
        if self.use_loadfp:
            os.chdir("generated-reports") # this directory better exist lol
            datestring = "-" + str(datetime.datetime.now().strftime("%d.%m.%Y-%H.%M.%S"))
            os.mkdir(datestring[1:])
            os.chdir(datestring[1:])
        else:
            datestring = ""

        self.historyDF = pd.DataFrame(self.history)
        self.historyDF.to_csv("report-history.csv")

        self.artistsDF = pd.DataFrame(self.artists)
        self.artistsDF.to_csv("report-artists.csv")

        self.songsDF = pd.DataFrame(self.history)
        self.total_songs = len(self.songsDF)
        # here is where a better duplicate-dropping method would go to detect same songs
        self.songsDF.drop_duplicates(subset=['URL'], inplace=True)
        self.unique_songs = len(self.songsDF)
        self.songsDF = self.songsDF.reset_index(drop=True)
        self.songsDF.to_csv("report-songs.csv")

        if self.use_loadfp:
            os.chdir("../../")

    # API management functions
    @staticmethod
    def parse_duration(duration):
        # lol i hate this garbage but its not my code --> not my problem
        timestr = duration
        time = re.findall(r'\d+', timestr)
        length = len(time)
        if length > 4:
            return 0
        if length == 4:
            return ((int(time[0])*24*60*60)+(int(time[1])*60*60)+int(time[2]*60)+(int(time[3])))
        elif length == 3:
            return ((int(time[0])*60*60)+(int(time[1])*60)+(int(time[2])))
        elif length == 2:
            return ((int(time[0])*60)+(int(time[1])))
        elif length == 1:
            return (int(time[0]))
        else:
            return 0

    def call_api(self, idlist):
        parameters = {"part": "contentDetails,snippet", "id": ','.join(idlist), "key": self.apikey}
        response = requests.get("https://www.googleapis.com/youtube/v3/videos", params=parameters)

        if (response.status_code == 200):
            json_parsed = response.json()
            for item in json_parsed['items']:
                duration = Loader.parse_duration(item['contentDetails']['duration'])
                url = item['id']
                # for whatever reason this is necessary (assumes low value durations are stored in mins (which they ARE?! usually))
                if duration < 10:
                    duration = duration * 60

                # update by url
                for (j, i) in enumerate(self.history["URL"]):
                    if i == url:
                        if duration >= 10:
                            self.history["Duration"][j] = duration
        else:
            print("Failed API call at", idlist)

    @staticmethod
    def progress(value, max=100):
        return HTML("""
            <progress
                value='{value}'
                max='{max}',
                style='width: 100%'
            >
                {value}
            </progress>
        """.format(value=value, max=max))

    def gen_durations(self):
        # Count duration
        idlist = []
        calls = 0
        unique_song_urls = set(self.history['URL'])
        len_usurl = len(unique_song_urls)
        print("Getting durations. This may take a while. Awaiting", len_usurl, "requests.")
        for url in unique_song_urls:
            idlist.append(url)
            if len(idlist) == 50:
                self.out.update(Loader.progress(((1+50*calls)*100)/len_usurl, 100))
                self.call_api(idlist)
                calls += 1
                idlist = []
        self.out.update(Loader.progress(100, 100))
        self.call_api(idlist)

        # update artist durations
        artist_durations = defaultdict(int)
        for i in range(len(self.history["Artist"])):
            artist = self.history["Artist"][i]
            duration = self.history["Duration"][i]
            artist_durations[artist] += duration

        occurrences = collections.Counter(self.history["Artist"])
        artists_dict = collections.defaultdict(list)
        for i in (artist_durations, occurrences):
            for key, val in i.items():
                artists_dict[key].append(val)

        durations = []
        occurrences = []
        for i, j in artists_dict.values():
            durations.append(i)
            occurrences.append(j)

        self.artists = {"Artist": artists_dict.keys(), "Occurrences": occurrences, "Duration": durations}

        self.gen_dataframes()

    def gen_blank_artists(self):
        occurrences = dict(collections.Counter(self.history["Artist"]))
        artists_list = []
        occurrences_list = []
        for i, j in occurrences.items():
            artists_list.append(i)
            occurrences_list.append(j)

        self.artists = {"Artist": artists_list, "Occurrences": occurrences_list}

    def outs(self):
        if self.load_reports:
            self.historyDF, self.artistsDF, self.songsDF = self.load()
        else:
            print("We are now processing your file")
            self.parse_json()
            if self.use_duration:
                self.gen_durations()
            else:
                self.gen_blank_artists()
            self.gen_dataframes() # generates dataframes and writes to CSV

        return self.historyDF, self.artistsDF, self.songsDF

    def load(self):
        print("Loading your preprocessed history files")
        historyDF = pd.read_csv(self.open_file(os.path.join(self.loadfp, "report-history.csv")))
        artistsDF = pd.read_csv(self.open_file(os.path.join(self.loadfp, "report-artists.csv")))
        songsDF = pd.read_csv(self.open_file(os.path.join(self.loadfp, "report-songs.csv")))
        return historyDF, artistsDF, songsDF

### Analyzer class

In [None]:
class Analyzer():
    def __init__(self, historyDF, artistsDF, songsDF, use_duration=True):
        self.history = historyDF
        self.artists = artistsDF
        self.songs = songsDF
        self.use_duration = use_duration

    def tops(self, n=10):
        # Top 10 Songs
        tops = {}
        tops['Top ' + str(n) + ' songs by count'] = self.songs.nlargest(n, ['Occurrences'])
        tops['Top ' + str(n) + ' artists by count'] = self.artists.nlargest(n, ['Occurrences'])
        tops['Top ' + str(n) + ' artists by time listened'] = self.artists.nlargest(n, ['Duration'])
        return tops

    def basic(self):
        basic = {}
        basic["Total seconds"] = sum(self.history["Duration"])
        basic["Total songs"] = len(self.history["Title"])
        basic["Unique songs"] = len(self.songs["Title"])
        basic["Unique artists"] = len(self.artists["Artist"])
        return basic

    def uniques(self, n=10):
        uniques = {}
        uniques['Top ' + str(n) + ' artists by count of unique songs in history'] = collections.Counter(self.songs["Artist"]).most_common(n)
        return uniques

    def repeats(self, n=10):
        repeats = {}
        grouped_history = [(_, len(list(i))) for _,i in itertools.groupby(self.history["URL"])]
        grouped_history.sort(key = lambda x : x[1], reverse=True)
        grouped_songs = self.songs[self.songs['URL'].isin([i[0] for i in grouped_history[:n]])]
        repeats[str(n) + ' most consecutively repeated songs'] = grouped_songs

        return repeats

    # TODO FIX BELOW
    def chronology(self, n=3):
        # this stuff will break if you are analyzing a period longer than a year

        chronology = {}
        top_songs_per_month = []
        for month in range(12):
            songs_for_month = {"Title": [], "Artist": [], "URL": [], "Duration": []}
            for j,i in enumerate(self.history["Year"]):
                if int(i[5:7]) == month+1:
                    songs_for_month["Title"].append(self.history["Title"][j])
                    songs_for_month["Artist"].append(self.history["Artist"][j])
                    songs_for_month["URL"].append(self.history["URL"][j])
                    songs_for_month["Duration"].append(self.history["Duration"][j])

            occurrences = collections.Counter(songs_for_month['URL'])
            songs_for_month['Occurrences'] = []
            for i in songs_for_month['URL']:
                songs_for_month['Occurrences'].append(occurrences[i])
            songs_for_month_DF = pd.DataFrame(songs_for_month)
            songs_for_month_DF.drop_duplicates(subset=['URL'], inplace=True)
            top_songs_per_month.append(songs_for_month_DF.nlargest(n, ['Occurrences']))

        chronology["Top n Songs Per Month"] = top_songs_per_month

        days = {}
        for j, i in enumerate(self.history["Year"]):
            try:
                days[i[5:10]]["Title"].append(self.history["Title"][j])
                days[i[5:10]]["Artist"].append(self.history["Artist"][j])
                days[i[5:10]]["URL"].append(self.history["URL"][j])
                days[i[5:10]]["Duration"].append(self.history["Duration"][j])
            except:
                songs_for_day = {"Title": [self.history["Title"][j]], "Artist": [self.history["Artist"][j]], "URL": [self.history["URL"][j]], "Duration": [self.history["Duration"][j]]}
                days[i[5:10]] = songs_for_day

        """
        # TEMPORARILY BLOCKED BECAUSE ITS SUPER BROKEN

        day_most_listened = -1 # day you listened to the most music function
        durations_per_day = [] # durations per day function
        songs_listened_dml = -1
        for j, i in enumerate(days.values()):
            urls = collections.Counter(i["URL"])
            if ((len(urls.values())) > songs_listened_dml):
                day_most_listened = j
                songs_listened_dml = len(urls.values())
            durations_per_day.append(sum(i["Duration"]))

        # since it counts from NOW to the past, this is in reverse order (this breaks of day 1 of the dataset isn't jan 1... uh oh)
        chronology["Most Diverse Day"] = [len(days) - day_most_listened , durations_per_day[day_most_listened]//60, songs_listened_dml] # this is the Nth day of the year
        chronology["Durations Per Day"] = durations_per_day
        chronology["Most Musical Day"] = [len(days) - durations_per_day.index(max(durations_per_day)), max(durations_per_day)//60]
        """

        times = {}
        localtime = pytz.timezone("US/Eastern")
        hrs = [i for i in range(0, 24)]
        errs = 0
        chronology["Songs Per Time of Day"] = dict()
        for i in range(24):
            chronology["Songs Per Time of Day"][i] = 0
        for i in self.history["Year"]:
            try:
                # am I tripping or does this DST thing make no sense at all
                isDST = bool(localtime.localize(datetime.datetime.strptime(i[:-5], "%Y-%m-%dT%H:%M:%S")).dst())
                x = int(i[11:13])
                if isDST:
                    x += 1
                x -= 5 # needs to better account for time zone!
                x = hrs[x]
                try:
                    times[x] += 1
                except:
                    times[x] = 1
            except:
                errs += 1

        for i, j in times.items():
            chronology["Songs Per Time of Day"][i] = j
        chronology["Songs Per Time of Day Errors"] = errs

        weekdays = {}
        errs = 0
        for i in self.history["Year"]:
            try:
                wd = datetime.datetime.strptime(i[:-5], "%Y-%m-%dT%H:%M:%S").weekday()
                try:
                    weekdays[wd] += 1
                except:
                    weekdays[wd] = 1
            except:
                errs += 1

        chronology["Days of the Week"] = weekdays
        chronology["Days of the Week Errors"] = errs

        # this code can technically be inserted earlier when we process months
        months = {}
        for i in self.history["Year"]:
            try:
                months[int(i[5:7])] += 1
            except:
                months[int(i[5:7])] = 1

        chronology["Songs Per Month"] = months

        return chronology

    def averages(self):
        averages = {}
        averages["Average Song Length"] = sum(self.history["Duration"]) / len(self.history["Duration"])
        averages["Average Song Length Unique"] = sum(self.songs["Duration"]) / len(self.songs["Duration"])
        years = []
        for i in self.history["Year"]:
            years.append(i[5:10])
        averages["Average Seconds per Day"] = sum(self.history["Duration"]) / len(collections.Counter(years))

        min_song_length = min(self.songs["Duration"])
        max_song_length = max(self.songs["Duration"])
        min_song_idx = list(self.songs["Duration"]).index(min_song_length)
        max_song_idx = list(self.songs["Duration"]).index(max_song_length)

        averages["Shortest Song"] = [min_song_length, self.songs["Title"][min_song_idx], self.songs["Artist"][min_song_idx]]
        averages["Longest Song"] = [max_song_idx, self.songs["Title"][max_song_idx], self.songs["Artist"][max_song_idx]]

        # 5th percentile song by duration?
        # 95th percentile song by duration?
        # median song length

        history_duration_sorted = self.history["Duration"].copy()
        songs_duration_sorted = self.songs["Duration"].copy()
        history_duration_sorted = list(history_duration_sorted)
        songs_duration_sorted = list(songs_duration_sorted)
        history_duration_sorted.sort()
        songs_duration_sorted.sort()
        averages["Median Song Length"] = history_duration_sorted[int(len(history_duration_sorted)/2)]
        averages["Median Song Length Unique"] = songs_duration_sorted[int(len(songs_duration_sorted)/2)]

        #plt.hist(list(self.songs["Duration"]), 30, (0, 600))
        #plt.show()

        averages["Average Replays"] = sum(self.songs["Occurrences"]) / len(self.songs["Occurrences"])
        averages["Max Replays"] = max(self.songs["Occurrences"]) # yes this statistic is already calculated somewhere else

        frqtable = collections.Counter(self.songs["Occurrences"])

        averages["Occurrences Frequency Table"] = frqtable

        return averages

    def sessions(self):
        # split history into sessions (need to decide what distinguishes 1 session from another, most likely 30mins)
        # get longest session as a statistic
        pass

### Execution code

In [None]:
os.chdir("drive/My Drive/Colab Notebooks/ytmwrapped")

In [None]:
Loader.merge_jsons("watch-history-new.json", "watch-history.json", "watch-history-merged.json")

In [None]:
# loader code
apikey = open("apitoken.txt", "r").read()
filepath = "watch-history-merged.json"
loadfp = os.getcwd()+"/generated-reports/22.12.2023-20.13.01"

ignore = {}
ignore["Title"] = open("ignore-title.txt", "r").read().split(",")
ignore["Artist"] = open("ignore-artist.txt", "r").read().split(",")
ignore["URL"] = open("ignore-url.txt", "r").read().split(",")

loader = Loader(use_duration=True, analyze_years=["2023"], apikey=apikey, filepath=filepath, loadfp=loadfp, ignores=ignore)
history, artists, songs = loader.outs()

In [None]:
analyzer = Analyzer(history, artists, songs)
basic, tops, uniques, repeats = analyzer.basic(), analyzer.tops(), analyzer.uniques(), analyzer.repeats()
print("BASIC")
for i, j in basic.items():
    print(i)
    print(j)
print("TOPS")
for i, j in tops.items():
    print(i)
    print(j)
print("UNIQUES")
for i, j in uniques.items():
    print(i)
    print(j)
print("REPEATS")
for i, j in repeats.items():
    print(i)
    print(j)

In [None]:
# BROKENS
print(" - Chronology - ")
chrono = analyzer.chronology(n=5)
for j, i in enumerate(chrono["Top n Songs Per Month"]):
    print(j)
    print(i)
    pass
print(list(chrono.items())[1])
print(list(chrono.items())[3])
print(" - Averages - ")
averages = analyzer.averages()
print(averages)

print("All done!")

In [None]:
# songs per times of the day

times = chrono["Songs Per Time of Day"]
x = sorted(times.keys())
y = []
for i in x:
    y.append(times[i])

x = list(reversed(x))
y = list(reversed(y))

def rightRotate(lists, num):
    output_list = []

    for item in range(len(lists) - num, len(lists)):
        output_list.append(lists[item])

    for item in range(0, len(lists) - num):
        output_list.append(lists[item])

    return output_list

x = rightRotate(x, 6)
y = rightRotate(y, 6)

plt.figure(figsize=(20,10))
ax = plt.subplot(111, polar=True)
plt.axis('off')

upperLimit = max(y)
lowerLimit = min(y)

maxval = max(y)

slope = (maxval - lowerLimit) / maxval
heights = [slope * i + lowerLimit for i in y]

width = 2*np.pi / 24

indexes = list(range(1, 25))
angles = [element * width for element in indexes]

grey_heights = [slope*maxval + lowerLimit] * 24

# Draw bars
bars = ax.bar(
    x=angles,
    height=grey_heights,
    width=width,
    bottom=lowerLimit,
    linewidth=2,
    edgecolor="white",
    color="#d3d3d3",
)

bars = ax.bar(
    x=angles,
    height=heights,
    width=width,
    bottom=lowerLimit,
    linewidth=2,
    edgecolor="white",
    color="#61a4b2",
)

labelPadding = 5

for bar, angle, height, label in zip(bars,angles, heights, [str(i) for i in x]):

    rotation = np.rad2deg(angle)

    alignment = ""
    if angle >= np.pi/2 and angle < 3*np.pi/2:
        alignment = "right"
        rotation = rotation + 180
    else:
        alignment = "left"

    if (height > 100):
        ax.text(
            x=angle,
            y=lowerLimit + height + labelPadding,
            s=label,
            ha=alignment,
            va='center',
            rotation=rotation,
            rotation_mode="anchor")

In [None]:
# listens per day of the week
weekdays = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]
plt.bar(weekdays, chrono["Days of the Week"].values())
print("Made", chrono["Days of the Week Errors"]. "errors")

In [None]:
# listens per month
plt.bar(chrono["Songs Per Month"].keys(), chrono["Songs Per Month"].values())

In [None]:
plt.bar(averages['Occurrences Frequency Table'].keys(), averages['Occurrences Frequency Table'].values())
plt.yscale("linear") # "log"

# MusicBrainz API

## Genres

In [None]:
allowed_vals_f = open('genres.txt', 'r')
allowed_vals = []
for i in allowed_vals_f:
    allowed_vals.append(i.replace("\n", ""))

In [None]:
artist_tags = {}
print(len(artists['Artist']), "Artists")
for c, i in enumerate(artists['Artist']):
    artist_tags[i] = []
    response = requests.get("https://musicbrainz.org/ws/2/artist/?fmt=json&query=name:" + urllib.parse.quote(i.strip()))
    if c % 20 == 0:
        print("Got", c, "artists")
    try:
        tags = response.json()['artists'][0]['tags']
        #print(tags)
        for ii in tags:
            if ii['name'] in allowed_vals:
                artist_tags[i].append(ii['name'])
    except:
        #print("No tags for artist: " + i)
        pass

In [None]:
alltags = itertools.chain(*artist_tags.values())
counter = collections.Counter(alltags).most_common(10)
genres = []
counts = []
for (i, j) in counter:
    genres.append(i)
    counts.append(j)
plt.pie(counts, labels=genres, autopct="%1.1f%%")
plt.show()

In [None]:
alltags = []
artist_tags = tags2
for i in history['Artist']:
    if isinstance(artist_tags[i], str):
        alltags.append(artist_tags[i])
    else:
        for ii in artist_tags[i]:
            alltags.append(ii)
counter = collections.Counter(alltags).most_common(10)
genres = []
counts = []
for (i, j) in counter:
    genres.append(i)
    counts.append(j)
plt.pie(counts, labels=genres, autopct="%1.1f%%")
plt.show()

In [None]:
for i, j in tags2.items():
    if j == []:
        print(i)
        tags2[i] = input()

In [None]:
with open('artist_tags.txt', 'w') as convert_file:
     convert_file.write(json.dumps(artist_tags))

In [None]:
with open('artist_tags.txt', 'r') as convert_file:
     artist_tags = ast.literal_eval(convert_file.read())

## Year

In [None]:
# just getting genre by genre of artist, not release (because release definitely has some issues)
arid = {}
ardate = {}
count = {}
for c, i in enumerate(artists['Artist']):
    i = i.strip()
    if c%40 == 0:
        print("Got", c, "artists")
    try:
        response = requests.get("https://musicbrainz.org/ws/2/artist/?fmt=json&query=name:" + urllib.parse.quote(i))
        arid[i] = response.json()['artists'][0]['id']
        # store the date in the thing
        response = requests.get("https://musicbrainz.org/ws/2/release-group/?fmt=json&query=arid:" + arid[i])
        frd = int(response.json()['release-groups'][0]['first-release-date'][0:4])
        for ii in response.json()['release-groups']:
            try:
                t = ii['first-release-date']
                if int(t[0:4]) < frd:
                    frd = int(t[0:4])
            except:
                continue
        ardate[i] = frd
    except:
        pass

In [None]:
count = {}
for i in history['Artist']:
    i = i.strip()
    if i not in ardate.keys():
        continue
    if ardate[i] in count.keys():
        count[ardate[i]] += 1
    else:
        count[ardate[i]] = 1

In [None]:
plt.bar(count.keys(), count.values())