In [1]:
import pandas as pd
import json

Load datasets

In [9]:
contestants_df = pd.read_csv("contestants.csv")
votes_df = pd.read_csv("votes.csv")

Missing values in contestants dataset:

In [None]:
print(contestants_df.isnull().sum())
print(votes_df.isnull().sum())

Number of participants per year

In [None]:
participants_per_year = contestants_df.groupby("year")["to_country"].count()
participants_per_year.head(5)

Top 10 countries with the most points in finals

In [None]:

top_countries = contestants_df.groupby("to_country")["points_final"].sum().sort_values(ascending=False).head(10)
top_countries.head(5)

Top 10 countries with highest televote-to-jury vote ratio in finals

In [None]:
voting_comparison = contestants_df.groupby("to_country")[["points_tele_final", "points_jury_final"]].sum()
voting_comparison["tele_jury_ratio"] = voting_comparison["points_tele_final"] / (voting_comparison["points_jury_final"] + 1)
voting_comparison.sort_values("tele_jury_ratio", ascending=False).head(5)

Friend voting trends (potential biases)

In [None]:
top_voting_bias = votes_df.groupby(["from_country", "to_country"])["total_points"].sum().reset_index()
top_voting_bias = top_voting_bias.sort_values("total_points", ascending=False).head(10)
top_voting_bias.head(5)

Analyze if juryvoting and televoting favor the same countries

In [None]:
tele_vs_jury = votes_df.groupby("to_country")[["tele_points", "jury_points"]].sum()
tele_vs_jury["tele_jury_diff"] = tele_vs_jury["tele_points"] - tele_vs_jury["jury_points"]
tele_vs_jury.sort_values("tele_jury_diff", ascending=False).head(5)

In [87]:

def get_votes(year, country):
    test = {} 
    year_votes = votes_df[(votes_df["year"] == year) & (votes_df['from_country'] == country)]
    for _, row in year_votes.iterrows():
        from_country = row["from_country"]
        to_country = row["to_country"]
        tot_points = int(row["total_points"])

        jury_points = row["jury_points"]
        tele_points = row["tele_points"]

        jury_points = 0 if pd.isna(jury_points) else int(jury_points)
        tele_points = 0 if pd.isna(tele_points) else int(tele_points)
        if tot_points == 0:
            continue
        if from_country not in test:
            test[from_country] = {}
        
        if to_country in test[from_country]:
            test[from_country][to_country]["total_points"] += tot_points
            test[from_country][to_country]["jury_points"] += jury_points
            test[from_country][to_country]["tele_points"] += tele_points
        else:
            test[from_country][to_country] = {
                "total_points": tot_points,
                "jury_points": jury_points,
                "tele_points": tele_points
        }
    if test: 
        return test
    return f'No votes from {country} in the year {year}'

In [88]:
years = contestants_df["year"].unique()
countries = contestants_df["to_country"].unique()
result = {}

for year in years:
    year_data = contestants_df[contestants_df["year"] == year]
    
    year_countries = {}
    participating_count = 0

    votes_map = {country: {} for country in countries}
    for country in countries:
        country_data = year_data[year_data["to_country"] == country]

        if not country_data.empty:
            country_info = country_data.iloc[0].to_dict()
            country_info["participated"] = True
            country_info["Votes"] = get_votes(year, str(country_data["to_country_id"].iloc[0]))
            participating_count += 1
            
        else:
            country_info = {"participated": False}
        
        year_countries[country] = country_info
        result[int(year)] = {
            "participating_countries": participating_count,
            "countries": year_countries
    }

In [89]:
with open("eurovision_data.json", "w") as json_file:
    json.dump(result, json_file, indent=4)
