In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

song_data = pd.read_csv('data/song_data_processed.csv')
eurovision = pd.read_csv('data/eurovision_processed.csv')
finals = pd.read_csv('data/finals_processed.csv')
ogae_polls = pd.read_csv('data/ogae_polls.csv')
world_polls = pd.read_csv('data/eurovisionworld_polls.csv')

In [2]:
# Scatterplot: OGAE poll results and televoting points and total points in the final
years = list(range(2016,2023))
years.remove(2020)

for year in years:
    filename = 'data/points_' + str(year) + '.csv'
    points = pd.read_csv(filename)
    poll_countries = list(ogae_polls['Contestant'])

    for country in poll_countries:
        if country not in list(points['Contestant']):
            points.loc[len(points.index)] = [country, 0, 0, 0]
    
    poll_points = list(ogae_polls[str(year)])
    points['OGAE poll score'] = poll_points

    titlename = 'Televoting Points and OGAE Poll Score in ' + str(year)
    fig = px.scatter(points, x='OGAE poll score', y='Televoting score', hover_name='Contestant', text='Contestant', 
                     color='Total score', color_continuous_scale='Viridis',title=titlename)
    fig.update_traces(textposition='top center')
    #fig.show()
    name = "ogae_poll_and_tele_and_total_points_scatter/ogae_poll_and_tele_and_total_points_scatter_" + str(year) + ".html"
    fig.write_html(name)

In [3]:
# Scatterplot: Eurovisionworld poll results and televoting points and total points in the final
years = list(range(2016,2023))
years.remove(2020)

for year in years:
    filename = 'data/points_' + str(year) + '.csv'
    points = pd.read_csv(filename)
    poll_countries = list(world_polls['Contestant'])
    
    for country in poll_countries:
        if country not in list(points['Contestant']):
            points.loc[len(points.index)] = [country, 0, 0, 0]

    poll_points = list(world_polls[str(year)])
    points['Eurovisionworld poll score'] = poll_points

    titlename = 'Televoting Points and Eurovisionworld Poll Score in ' + str(year)
    fig = px.scatter(points, x='Eurovisionworld poll score', y='Televoting score', hover_name='Contestant', text='Contestant', 
                     color='Total score', color_continuous_scale='Viridis',title=titlename)
    fig.update_traces(textposition='top center')
    #fig.show()
    name = "eurovisionworld_poll_and_tele_and_total_points_scatter/eurovisionworld_poll_and_tele_and_total_points_scatter_" + str(year) + ".html"
    fig.write_html(name)

In [4]:
# Scatterplot: televote and jury points (in percentages) in the final by country
years = list(range(2016,2023))
years.remove(2020)

for year in years:
    filename = 'data/points_' + str(year) + '.csv'
    points = pd.read_csv(filename)
    titlename = 'Jury points and Televoting Points in ' + str(year)
    fig = px.scatter(points, x='Jury score', y='Televoting score', hover_name='Contestant', text='Contestant', color='Total score', 
                 color_continuous_scale='Viridis',
                 title=titlename)
    fig.update_traces(textposition='top center')
    #fig.show()
    name = "jury_and_tele_points_scatter/jury_and_tele_points_scatter_" + str(year) + ".html"
    fig.write_html(name)

In [5]:
# Choropleth Map: number of victories
data = finals[['Place', 'Country']]
df = data.loc[data['Place'] == 1]

country_counts = df.groupby('Country').count()['Place'].reset_index()
country_counts.columns = ['Country', 'Victories']

countries = data.groupby('Country').count().reset_index()
countries.columns = ['Country', 'Number']
countries_list = list(countries['Country'])

for country in countries_list:
    if country not in list(country_counts['Country']):
        country_counts.loc[len(country_counts.index)] = [country, 0]

fig = px.choropleth(
    country_counts,
    locations='Country', 
    color='Victories',
    hover_name='Country',
    color_continuous_scale='Viridis',
    locationmode = 'country names',
    title='Number of victories by Country'
)
#fig.show()
name = "numer_of_victories_map.html"
fig.write_html(name)

In [6]:
# Scatter: country’s debut in the competition and the number of participations
df = finals.groupby('Country').agg({'Year': 'min', 'Countries': 'count'}).reset_index()

fig = px.line(df, x="Year", y="Countries", color='Country', markers=True, 
              title='Debut in the competition and the number of participations')
#fig.show()

name = "countrys_debut_and_numer_of_participations.html"
fig.write_html(name)

In [103]:
# Column histogram: Number of participating countries each year
import plotly.graph_objs as go
yearly_counts = finals.groupby('Year')['Country'].nunique()

fig = go.Figure(
    go.Bar(
        x=yearly_counts.index,
        y=yearly_counts.values,
    )
)

fig.update_layout(
    title='Number of participating countries each year',
    xaxis_title='Year',
    yaxis_title='Number of participants'
)

#fig.show()
name = "number_of_participating_countries.html"
fig.write_html(name)

In [None]:
# Line chart: tempo (BPM) distribution by country and all time
data = song_data[['country','BPM','year']]
data = data.dropna()
data = data.drop(data[data['BPM'] == '-'].index)
data['BPM'] = [int(bpm) for bpm in data['BPM']]

countries = list(data['country'].unique())
countries = [x for x in countries if str(x) != 'nan']
countries = sorted(countries)

for country in countries:
    df = data.loc[data['country'] == country]
    counts = df.groupby('year').mean().reset_index()
    pie_title = "Tempo Distribution of Songs of " + str(country) + " Since 2009"
    fig = px.line(df, x="year", y="BPM", title=pie_title)
    #fig.show()
    name = "tempo_line_country/tempo_line_country_" + str(country) + ".html"
    fig.write_html(name)

grouped_data = data.groupby('year').mean().reset_index()
fig = px.line(grouped_data, x="year", y="BPM", title='Tempo Distribution of All Songs Since 2009')
#fig.show()
name = "tempo_line_country/tempo_line_country_All.html"
fig.write_html(name)

In [55]:
# Pie chart: tempo distribution by year
data = song_data[['country','BPM','year']]
data = data.dropna()
data = data.drop(data[data['BPM'] == '-'].index)

years = list(range(2009,2023))

for year in years:
    df = data.loc[data['year'] == year]
    counts = df['BPM'].value_counts().reset_index()
    counts.columns = ['BPM', 'count']
    pie_title = "Tempo Distribution of Songs in " + str(year)
    fig = px.pie(counts, values='count', names='BPM', title=pie_title)
    #fig.show()
    name = "tempo_pie_year/tempo_pie_year_" + str(year) + ".html"
    fig.write_html(name) 


In [41]:
# Pie chart: key distribution by year and country
data = song_data[['country','key','year']]
data = data.dropna()
data = data.drop(data[data['key'] == '-'].index)

years = list(range(2009,2023))

for year in years:
    df = data.loc[data['year'] == year]
    counts = df['key'].value_counts().reset_index()
    counts.columns = ['key', 'count']
    pie_title = "Key Distribution of Songs in " + str(year)
    fig = px.pie(counts, values='count', names='key', title=pie_title)
    #fig.show()
    name = "key_pie_year/key_pie_year_" + str(year) + ".html"
    fig.write_html(name) 

countries = list(data['country'].unique())
countries = [x for x in countries if str(x) != 'nan']
countries = sorted(countries)

for country in countries:
    df = data.loc[data['country'] == country]
    counts = df['key'].value_counts().reset_index()
    counts.columns = ['key', 'count']
    pie_title = "Key Distribution of Songs of " + str(country) + " Since 2009"
    fig = px.pie(counts, values='count', names='key', title=pie_title)
    #fig.show()
    name = "key_pie_country/key_pie_country_" + str(country) + ".html"
    fig.write_html(name) 

counts = data['key'].value_counts().reset_index()
counts.columns = ['key', 'count']
pie_title = "Key Distribution of All Songs"
fig = px.pie(counts, values='count', names='key', title=pie_title)
#fig.show()
name = "key_pie_year/key_pie_year_All.html"
fig.write_html(name) 

name = "key_pie_country/key_pie_country_All.html"
fig.write_html(name)

In [17]:
# Column histogram: running order number in the final and number of points
import plotly.express as px

data = song_data[['final_draw_position','final_total_points','year']]
data = data.dropna()
data = data.drop(data[data['final_draw_position'] == '-'].index)
data['Final running order'] = [int(pos) for pos in data['final_draw_position']]
data['Final points'] = [int(point) for point in data['final_total_points']]
data = data.reset_index()
grouped_data = data.groupby('Final running order').mean().reset_index()

fig = px.bar(grouped_data, x='Final running order', y='Final points',
                 title='Final Running Order and the Number of Points Since 2009')
#fig.show()
name = "running_order_in_final_and_points_column_histogram.html"
fig.write_html(name)


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [16]:
# Column histogram: running order number in the final and number of victories
import pandas as pd
import plotly.express as px
import numpy as np

data = song_data[['final_draw_position','final_place']]
data = data.dropna()
data = data.drop(data[data['final_place'] == '-'].index)
data['Final running order'] = [int(pos) for pos in data['final_draw_position']]
data['final_place'] = [int(place) for place in data['final_place']]

grouped_data = data.groupby('Final running order').mean().reset_index()

counts = []
for pos in grouped_data['Final running order']:
    count = 0
    df = data[data['Final running order'] == pos]
    for place in df['final_place']:
        if place == 1: count += 1
    counts.append(count)

grouped_data['Number of victories'] = counts

fig = px.bar(grouped_data, x='Final running order', y='Number of victories',
                 title='Final Running Order and the Number of Victories Since 2009')
#fig.show()
name = "running_order_in_final_and_number_victories_column_histogram.html"
fig.write_html(name)


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [15]:
# Column histogram: running order number and average placement in the final
import pandas as pd
import plotly.express as px
import numpy as np

data = song_data[['final_draw_position','final_place']]
data = data.dropna()
data = data.drop(data[data['final_place'] == '-'].index)
data['Final running order'] = [int(pos) for pos in data['final_draw_position']]
data['Final place'] = [int(place) for place in data['final_place']]

grouped_data = data.groupby('Final running order').mean().reset_index()

fig = px.bar(grouped_data, x='Final running order', y='Final place',
                 title='Final Running Order and the Average placement Since 2009')
#fig.show()
name = "running_order_and_average_placement_in_final_column_histogram.html"
fig.write_html(name)


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [14]:
# Column histogram: running order number in the semi-finals and number of qualifications
import pandas as pd
import plotly.express as px
import numpy as np

data = song_data[['semi_draw_position','qualified']]
data = data.dropna()
data = data.drop(data[data['qualified'] == '-'].index)
data['Semi running order'] = [int(pos) for pos in data['semi_draw_position']]

grouped_data = data.groupby('Semi running order').mean().reset_index()

q = []
for place in grouped_data['Semi running order']:
    q_place = []
    count = data[data['Semi running order'] == place]
    for value in count['qualified']:
        if value == '1': q_place.append(1)
    q.append(sum(q_place))
grouped_data['Qualified'] = q
grouped_data['In final'] = (grouped_data['Qualified'] / max(grouped_data['Qualified'])) * 100

fig = px.bar(grouped_data, x='Semi running order', y='Qualified', color='In final', 
                 color_continuous_scale='Viridis',
                 title='Semi-Final Running Order and Number of Qualifications')
#fig.show()
name = "running_order_in_semi_and_qualifications_column_histogram.html"
fig.write_html(name)

In [124]:
# Scatterplot: final and nonqualified numbers by country
import pandas as pd
import plotly.express as px
import numpy as np

data = song_data[['country','qualified']]
data = data.dropna()

grouped_data = data.groupby('country').mean().reset_index()
grouped_data = grouped_data.sort_values(by=['country']).reset_index()

nonq = []
q = []
for country in grouped_data['country']:
    nonq_country = []
    q_country = []
    count = data[data['country'] == country]
    for value in count['qualified']:
        if value == '-' or value == '1': q_country.append(1)
        else: nonq_country.append(1)
    nonq.append(sum(nonq_country))
    q.append(sum(q_country))
grouped_data['Qualified'] = q
grouped_data['Nonqualified'] = nonq
grouped_data['In final'] = grouped_data['Qualified'] / (grouped_data['Qualified'] + grouped_data['Nonqualified']) * 100

fig = px.scatter(grouped_data, x='Qualified', y='Nonqualified', hover_name='country', text='country', color='In final', 
                 color_continuous_scale='Viridis',
                 title='Qualified and Nonqualified Numbers by Country Since 2009')
fig.update_traces(textposition='top center')
#fig.show()
name = "qualified_and_nonqualified_numbers_scatter.html"
fig.write_html(name) 


The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [18]:
# Column histogram: average placement by country ALL TIME
import pandas as pd
import plotly.express as px
import numpy as np

data = finals[['Country','Place']]

grouped_data = data.groupby('Country').mean().reset_index()
grouped_data = grouped_data.sort_values(by=['Country']).reset_index()

counts = finals['Country'].value_counts().reset_index()
counts.columns = ['Country', 'Count']
counts = counts.sort_values(by=['Country']).reset_index()

data_df = pd.concat([grouped_data,counts['Count']],axis=1)
data_df = data_df.sort_values(by=['Place']).reset_index()
data_df['Goodness'] = data_df['Count'] / data_df['Place']

fig = px.scatter(data_df, x='Place', y='Count', hover_name='Country', text='Country', color='Goodness', 
                 color_continuous_scale='Viridis',
                 title='Final Average Placement and Times in the Final by Country')
fig.update_traces(textposition='top center')
#fig.show()
name = "final_points_and_times_scatter.html"
fig.write_html(name) 

In [39]:
# Pie chart: average points (in percentages) in the final by year SINCE 2009
import plotly.express as px
import pandas as pd

song_data = song_data.dropna()
years = song_data.groupby('year')
keys = list(years.groups)

for key in keys:
    year = years.get_group(key)
    pie_title = "Distribution of Points in Final in " + str(key)
    fig = px.pie(year, values='final_total_points', names='country', title=pie_title)
    #fig.show()
    name = "final_points_pie_year/final_points_pie_year_" + str(key) + ".html"
    fig.write_html(name) 

fig = px.pie(song_data, values='final_total_points', names='country', title=pie_title)
name = "final_points_pie_year/final_points_pie_year_All.html"
fig.write_html(name) 

In [123]:
# Pie chart: style of the song by country and year (in percentages) SINCE 2009
import plotly.express as px
import pandas as pd

years = list(range(2009,2023))

for year in years:
    df = song_data.loc[song_data['year'] == year]
    counts = df['style'].value_counts().reset_index()
    counts.columns = ['style', 'count']
    pie_title = "Style Distribution of Songs in " + str(year)
    fig = px.pie(counts, values='count', names='style', title=pie_title)
    #fig.show()
    name = "style_pie_year/style_pie_year_" + str(year) + ".html"
    fig.write_html(name) 

countries = list(song_data['country'].unique())
countries = [x for x in countries if str(x) != 'nan']
countries = sorted(countries)

for country in countries:
    df = song_data.loc[song_data['country'] == country]
    counts = df['style'].value_counts().reset_index()
    counts.columns = ['style', 'count']
    pie_title = "Style Distribution of Songs of " + str(country) + " Since 2009"
    fig = px.pie(counts, values='count', names='style', title=pie_title)
    #fig.show()
    name = "style_pie_country/style_pie_country_" + str(country) + ".html"
    fig.write_html(name) 

counts = song_data['style'].value_counts().reset_index()
counts.columns = ['style', 'count']
pie_title = "Style Distribution of All Songs"
fig = px.pie(counts, values='count', names='style', title=pie_title)
#fig.show()
name = "style_pie_year/style_pie_year_All.html"
fig.write_html(name) 

name = "style_pie_country/style_pie_country_All.html"
fig.write_html(name)

In [122]:
# Pie chart: language used by country and year (in percentages) SINCE 2009
import plotly.express as px
import pandas as pd

years = list(range(2009,2023))

for year in years:
    df = song_data.loc[song_data['year'] == year]
    counts = df['language'].value_counts().reset_index()
    counts.columns = ['language', 'count']
    pie_title = "Language Distribution of Songs in " + str(year)
    fig = px.pie(counts, values='count', names='language', title=pie_title)
    #fig.show()
    name = "language_pie_year/language_pie_year_" + str(year) + ".html"
    fig.write_html(name) 

countries = list(song_data['country'].unique())
countries = [x for x in countries if str(x) != 'nan']
countries = sorted(countries)

for country in countries:
    df = song_data.loc[song_data['country'] == country]
    counts = df['language'].value_counts().reset_index()
    counts.columns = ['language', 'count']
    pie_title = "Language Distribution of Songs of " + str(country) + " Since 2009"
    fig = px.pie(counts, values='count', names='language', title=pie_title)
    #fig.show()
    name = "language_pie_country/language_pie_country_" + str(country) + ".html"
    fig.write_html(name) 


In [121]:
# Pie chart: female / male as representative by country and year (in percentages) and All SINCE 2009

import plotly.express as px
import pandas as pd

years = list(range(2009,2023))

for year in years:
    df = song_data.loc[song_data['year'] == year]
    counts = df['gender'].value_counts().reset_index()
    counts.columns = ['gender', 'count']
    pie_title = "Gender Distribution of Songs in " + str(year)
    fig = px.pie(counts, values='count', names='gender', title=pie_title)
    #fig.show()
    name = "gender_pie_year/gender_pie_year_" + str(year) + ".html"
    fig.write_html(name) 

countries = list(song_data['country'].unique())
countries = [x for x in countries if str(x) != 'nan']
countries = sorted(countries)

for country in countries:
    df = song_data.loc[song_data['country'] == country]
    counts = df['gender'].value_counts().reset_index()
    counts.columns = ['gender', 'count']
    pie_title = "Gender Distribution of Songs of " + str(country) + " Since 2009"
    fig = px.pie(counts, values='count', names='gender', title=pie_title)
    #fig.show()
    name = "gender_pie_country/gender_pie_country_" + str(country) + ".html"
    fig.write_html(name) 

counts = song_data['gender'].value_counts().reset_index()
counts.columns = ['gender', 'count']
pie_title = "Gender Distribution of All Songs"
fig = px.pie(counts, values='count', names='gender', title=pie_title)
#fig.show()
name = "gender_pie_year/gender_pie_year_All.html"
fig.write_html(name) 

name = "gender_pie_country/gender_pie_country_All.html"
fig.write_html(name)