In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

import db

In [2]:
edition = 2024

sql = """
SELECT O1.position, A.name, S.rowid AS song_id, S.title, S.year, (
	SELECT position
	FROM Occurance O2
	WHERE edition = {last}
	AND O1.song = O2.song
) AS pos_last, (
    SELECT COUNT(rowid)
    FROM OCCURANCE O3
    WHERE O3.edition <= {current} AND O1.song = O3.song
) As occurance_count
FROM OCCURANCE O1 
INNER JOIN Song S ON S.rowid = O1.song
INNER JOIN Artist A ON A.rowid = S.artist
WHERE O1.edition = {current}
ORDER BY O1.position ASC
""".format(last=edition - 1, current=edition)

df = db.dataframe(sql).set_index("position")
df.describe()

Unnamed: 0,song_id,year,pos_last,occurance_count
count,2000.0,2000.0,1895.0,2000.0
mean,2547.1015,1991.6615,955.324538,16.249
std,1478.984099,16.672759,557.30373,8.721068
min,1.0,1956.0,1.0,1.0
25%,1239.75,1978.0,474.5,9.0
50%,2623.5,1989.0,949.0,16.0
75%,3858.25,2005.0,1426.5,26.0
max,4860.0,2024.0,2000.0,26.0


# De top 10

In [3]:
df.head(10)

Unnamed: 0_level_0,name,song_id,title,year,pos_last,occurance_count
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,Queen,3123,Bohemian Rhapsody,1975,1.0,26
2,Coldplay,831,Fix You,2005,5.0,16
3,Eagles,1202,Hotel California,1977,3.0,26
4,Danny Vera,4682,Roller Coaster,2019,2.0,6
5,Billy Joel,417,Piano Man,1973,4.0,26
6,Dermot Kennedy,4849,Better Days,2021,,1
7,Led Zeppelin,2284,Stairway To Heaven,1971,6.0,26
8,Boudewijn de Groot,569,Avond,1997,8.0,26
9,Queen,3125,Love Of My Life,1975,10.0,25
10,Pearl Jam,2984,Black,1992,7.0,16


# Grootse stijgers/zakkers

In [4]:
df['climb_rate'] = df['pos_last'] - df.index

# Grootste stijgers

In [5]:
df.sort_values(by=['climb_rate'], ascending=False).head(20)

Unnamed: 0_level_0,name,song_id,title,year,pos_last,occurance_count,climb_rate
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
323,Billie Eilish,4691,What Was I Made For?,2023,1719.0,2,1396.0
209,Teddy Swims,4816,Lose Control,2023,1598.0,2,1389.0
438,DI-RECT,1116,How My Heart Was Won,2023,1796.0,2,1358.0
518,Taylor Swift,4531,Cruel Summer,2023,1846.0,2,1328.0
601,Acda en De Munnik,72,Morgen Wordt Fantastisch,2023,1878.0,2,1277.0
249,Bee Gees,354,Tragedy,1979,1068.0,24,819.0
637,Natasha Bedingfield,2757,Unwritten,2004,1364.0,8,727.0
901,Dotan,1167,Home,2014,1618.0,11,717.0
799,Porcupine Tree,4827,Arriving Somewhere But Not Here,2005,1478.0,2,679.0
1195,Karin Bloemen,2152,Geen Kind Meer,1996,1869.0,26,674.0


## Grootste zakkers

In [6]:
df.sort_values(by=['climb_rate'], ascending=True).head(20)

Unnamed: 0_level_0,name,song_id,title,year,pos_last,occurance_count,climb_rate
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1943,Coldplay,863,Higher Power,2021,1144.0,3,-799.0
1412,"Thomas Acda, Paul de Munnik, Maan & Typhoon",4796,Als Ik Je Weer Zie,2021,649.0,3,-763.0
1872,The Beatles,3817,Sgt. Pepper's Lonely Hearts Club Band,1967,1119.0,26,-753.0
1600,The Pogues,4822,Dirty Old Town,1985,899.0,2,-701.0
1884,Coldplay x BTS,4801,My Universe,2021,1185.0,3,-699.0
1625,Snelle & Maan,4795,Blijven Slapen,2021,1053.0,3,-572.0
1924,Phil Collins,3033,You Can't Hurry Love,1982,1373.0,24,-551.0
1962,Suzan & Freek,4710,Dromen In Kleur,2021,1427.0,3,-535.0
1830,Prince,3093,Let's Go Crazy,1984,1303.0,14,-527.0
1713,Prince,3094,I Would Die 4 U,1984,1195.0,19,-518.0


# Nieuwe binnenkomers

In [7]:
new_entries = df[df.pos_last.isnull()].drop(columns=['pos_last'])
print(new_entries['name'].count())

105


## Re-entries
De volgende nieuwe binnenkomers hebben al eens eerder in de top2000 gestaan: 

In [8]:
re_entries = new_entries[new_entries['occurance_count'] > 1]
row_count = len(re_entries.index)
print("Aantal re-entries: {}".format(row_count))
pd.set_option('display.max_rows', row_count)
display(re_entries)

Aantal re-entries: 43


Unnamed: 0_level_0,name,song_id,title,year,occurance_count,climb_rate
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1386,BLØF,474,Dichterbij Dan Ooit,2001,8,
1486,Hall & Oates,1732,Maneater,1982,7,
1543,Michael Jackson,2633,Rock With You,1979,15,
1587,Tom Odell,4178,Grow Old With Me,2013,7,
1635,Tim Hardin,4155,How Can We Hang On To A Dream,1967,25,
1667,The Rolling Stones,4043,Little Red Rooster,1965,24,
1680,U.S.A. For Africa,4294,We Are The World,1985,22,
1752,BLØF,473,Hier,2000,9,
1772,U2,4265,Angel Of Harlem,1988,9,
1786,Rihanna ft. Mikky Ekko,3292,Stay,2013,10,


## Echt nieuwe liedjes
De volgende nieuwe binnenkomers hebben nog nooit in de top2000 gestaan

In [9]:
real_new = new_entries[new_entries['occurance_count'] == 1].drop(columns=['occurance_count'])
row_count = len(real_new.index)
print("Aantal echt nieuwe liedjes: {}".format(row_count))
pd.set_option('display.max_rows', row_count)
display(real_new)

Aantal echt nieuwe liedjes: 62


Unnamed: 0_level_0,name,song_id,title,year,climb_rate
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6,Dermot Kennedy,4849,Better Days,2021,
139,Joost,4841,Europapa,2024,
194,Lady Gaga & Bruno Mars,4842,Die With A Smile,2024,
302,Yves Berendse,4843,Terug In De Tijd,2024,
453,Hannah Mae,4850,Waterdicht,2023,
498,Linkin Park,2334,The Emptiness Machine,2024,
524,Teddy Swims,4817,The Door,2023,
564,Noah Kahan,4851,Stick Season,2023,
780,Billie Eilish,4693,Birds Of A Feather,2024,
806,Froukje,4790,Als Ik God Was,2023,


# Verdwenen uit de lijst
De volgende liedjes komen dit jaar niet meer voor in de top2000. De lijst is gesorteerd op jaar van uitgaven van het liedje.

In [10]:
sql = """
SELECT O.position, A.name, S.title, S.year
FROM OCCURANCE O 
INNER JOIN Song S ON S.rowid = O.song
INNER JOIN Artist A ON A.rowid = S.artist
WHERE O.edition = {last}
AND O.song NOT IN (SELECT song FROM Occurance WHERE edition={current})
ORDER  BY S.year DESC
""".format(last=edition - 1, current=edition)

ex_entries = db.dataframe(sql).set_index("position")
row_count = len(ex_entries.index)
print("Aantal uit de lijst verdwenen: {}".format(row_count))
pd.set_option('display.max_rows', row_count)
display(ex_entries)

Aantal uit de lijst verdwenen: 105


Unnamed: 0_level_0,name,title,year
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1950,Ed Sheeran,Celestial,2022
1941,George Ezra,Green Green Grass,2022
1533,Lewis Capaldi,Forget Me,2022
1323,Suzan & Freek,Kwijt,2022
1580,Suzan & Freek,Honderd Keer,2022
1849,Son Mieux,This Is The Moment,2022
1497,Conan Gray,Astronomy,2022
1979,"Thomas Acda, Kraantje Pappie & Rolf Sanchez",Missen Zou,2022
1672,Ed Sheeran,Overpass Graffiti,2021
1794,Snelle,In M'n Bloed,2021
