In [1]:
import json

import pandas as pd

PATH = "./data/"

In [2]:
years = [2016, 2017, 2018, 2019, 2021, 2022]

code2country = {
    "AL": 'Albania',
    "AM": 'Armenia',
    "AU": 'Australia',
    "AT": 'Austria',
    "AZ": 'Azerbaijan',
    "BY": 'Belarus',
    "BE": 'Belgium',
    "BA": 'Bosnia and Herzegovina',
    "BG": 'Bulgaria',
    "HR": 'Croatia',
    "CY": 'Cyprus',
    "CZ": 'Czech Republic',
    "DK": 'Denmark',
    "EE": 'Estonia',
    "FI": 'Finland',
    "FR": 'France',
    "GE": 'Georgia',
    "DE": 'Germany',
    "GR": 'Greece',
    "HU": 'Hungary',
    "IS": 'Iceland',
    "IE": 'Ireland',
    "IL": 'Israel',
    "IT": 'Italy',
    "LV": 'Latvia',
    "LT": 'Lithuania',
    "MT": 'Malta',
    "MD": 'Moldova',
    "ME": 'Montenegro',
    "NL": 'Netherlands',
    "MK": 'North Macedonia',
    "NO": 'Norway',
    "PL": 'Poland',
    "PT": 'Portugal',
    "RO": 'Romania',
    "RU": 'Russia',
    "SM": 'San Marino',
    "RS": 'Serbia',
    "SI": 'Slovenia',
    "ES": 'Spain',
    "SE": 'Sweden',
    "CH": 'Switzerland',
    "UA": 'Ukraine',
    "GB": 'United Kingdom'
}

country2code = {v: k for k, v in code2country.items()}
country2code["Macedonia"] = "MK"

In [3]:
def get_eurovision_world_data(year):
    path = f"./data/Polls/Eurovision World Poll/{year}_eurovisionworld_results.csv"
    df = pd.read_csv(path, index_col="Contestant")
    return df


countries_dict = {}
for year in years:
    df = get_eurovision_world_data(year)
    countries = df.index.values.tolist()
    countries = [(country2code[x], x) for x in countries]
    countries.sort()

    countries_dict[year] = countries

with open(f"{PATH}/countries.json", "w") as file:
    json.dump(countries_dict, file, indent=4)

In [4]:
def get_jury_data(year, drop_cols=True):
    path = f"./data/Final Results/Jury/{year}_jury_results.csv"
    df = pd.read_csv(path, index_col="Contestant")
    if drop_cols:
        df = df.iloc[:, 3:]
    df = df.fillna(0)
    df = df.astype("int32")
    return df


### Votes out

jury_votes_dict = {}
for year in years:
    df = get_jury_data(year)
    
    jury_votes = {}
    for country in df:
        temp = df[country]
        temp = temp[temp > 0]
        temp = temp.sort_values(ascending=False)
        temp = [x for x in zip(temp.index.to_list(), temp.to_list())]
        temp = [(country2code[x], y) for x, y in temp]
        jury_votes[country2code[country]] = temp
    
    jury_votes_dict[year] = jury_votes

with open(f"{PATH}/jury_votes_out.json", "w") as file:
    json.dump(jury_votes_dict, file, indent=4)

### Votes in

jury_votes_dict = {}
for year in years:
    df = get_jury_data(year)

    jury_votes = {}
    for country, row in df.iterrows():
        temp = row[row > 0]
        temp = temp.sort_values(ascending=False)
        temp = [x for x in zip(temp.index.to_list(), temp.to_list())]
        temp = [(country2code[x], y) for x, y in temp]
        jury_votes[country2code[country]] = temp
    
    jury_votes_dict[year] = jury_votes

with open(f"{PATH}/jury_votes_in.json", "w") as file:
    json.dump(jury_votes_dict, file, indent=4)

In [5]:
def get_televote_data(year, drop_cols=True):
    path = f"./data/Final Results/Televote/{year}_televote_results.csv"
    df = pd.read_csv(path, index_col="Contestant")
    if drop_cols:
        df = df.iloc[:, 3:]
    df = df.fillna(0)
    df = df.astype("int32")
    return df


### Votes out

televotes_dict = {}
for year in years:
    df = get_televote_data(year)
    
    televotes = {}
    for country in df:
        temp = df[country]
        temp = temp[temp > 0]
        temp = temp.sort_values(ascending=False)
        temp = [x for x in zip(temp.index.to_list(), temp.to_list())]
        temp = [(country2code[x], y) for x, y in temp]
        televotes[country2code[country]] = temp
    
    televotes_dict[year] = televotes

with open(f"{PATH}/televotes_out.json", "w") as file:
    json.dump(televotes_dict, file, indent=4)

### Votes in

televotes_dict = {}
for year in years:
    df = get_televote_data(year)

    televotes = {}
    for country, row in df.iterrows():
        temp = row[row > 0]
        temp = temp.sort_values(ascending=False)
        temp = [x for x in zip(temp.index.to_list(), temp.to_list())]
        temp = [(country2code[x], y) for x, y in temp]
        televotes[country2code[country]] = temp
    
    televotes_dict[year] = televotes

with open(f"{PATH}/televotes_in.json", "w") as file:
    json.dump(televotes_dict, file, indent=4)

In [6]:
final_rankings_dict = {}
for year in years:
    df = get_televote_data(year, drop_cols=False)
    df = df.iloc[:, 0]
    df = df.sort_values(ascending=False)
    
    final_rankings = [x for x in zip(df.index.to_list(), df.to_list())]
    final_rankings = [(country2code[x], y) for x, y in final_rankings]
    final_rankings_dict[year] = final_rankings

with open(f"{PATH}/final_rankings.json", "w") as file:
    json.dump(final_rankings_dict, file, indent=4)

In [7]:
def get_eurovision_world_data(year):
    path = f"./data/Polls/Eurovision World Poll/{year}_eurovisionworld_results.csv"
    df = pd.read_csv(path, index_col="Contestant")
    return df


eurovisionworld_by_year = {}
for year in years:
    df = get_eurovision_world_data(year)
    
    poll = [x for x in zip(df.index.to_list(), df.values.tolist())]
    poll = [(country2code[x], y[0]) for x, y in poll]
    eurovisionworld_by_year[year] = poll

with open(f"{PATH}/eurovisionworld_by_year.json", "w") as file:
    json.dump(eurovisionworld_by_year, file, indent=4)

In [8]:
eurovisionworld_by_country = {}
for year in years:
    df = get_eurovision_world_data(year)

    for country, num_votes in df.iterrows():
        c = country2code[country]
        if c in eurovisionworld_by_country:
            eurovisionworld_by_country[c].append((year, int(num_votes.values[0])))
        else:
            eurovisionworld_by_country[c] = [(year, int(num_votes.values[0]))]

with open(f"{PATH}/eurovisionworld_by_country.json", "w") as file:
    json.dump(eurovisionworld_by_country, file, indent=4)

In [9]:
def get_ogae_data(year):
    path = f"./data/Polls/OGAE Poll/{year}_ogae_results.csv"
    df = pd.read_csv(path, index_col="Contestant")
    return df


ogae_by_year = {}
for year in years:
    df = get_ogae_data(year)
    
    poll = [x for x in zip(df.index.to_list(), df.values.tolist())]
    poll = [(country2code[x], y[0]) for x, y in poll]
    ogae_by_year[year] = poll

with open(f"{PATH}/ogae_by_year.json", "w") as file:
    json.dump(ogae_by_year, file, indent=4)

In [10]:
ogae_by_country = {}
for year in years:
    df = get_ogae_data(year)

    for country, num_votes in df.iterrows():
        c = country2code[country]
        if c in ogae_by_country:
            ogae_by_country[c].append((year, int(num_votes.values[0])))
        else:
            ogae_by_country[c] = [(year, int(num_votes.values[0]))]

with open(f"{PATH}/ogae_by_country.json", "w") as file:
    json.dump(ogae_by_country, file, indent=4)