In [None]:
import pandas as pd

# Read the combined DataFrame from the CSV file
combined_df = pd.read_csv('dat/combined_data.csv')

# Convert the 'Date' column to datetime format
combined_df['Date'] = pd.to_datetime(combined_df['Date'], format='%d.%m.%Y')

# Filter the DataFrame to include only votes after 2020
filtered_df = combined_df[combined_df['Date'] > '2021-10-26']

# Combine the 'Name' and 'Vorname' columns to create a full name column
filtered_df['FullName'] = filtered_df['Vorname'] + ' ' + filtered_df['Name']

# Group by 'FullName' and calculate the total votes and 'yes' votes for each person
vote_counts = filtered_df.groupby('FullName').agg(
    total_votes=('ja', 'size'),
    yes_votes=('ja', 'sum')
).reset_index()

# Calculate the relative score (ratio of 'yes' votes to total votes)
vote_counts['relative_yes_score'] = vote_counts['yes_votes'] / vote_counts['total_votes']

# Sort the DataFrame by the relative yes score in descending order
vote_counts = vote_counts.sort_values(by='relative_yes_score', ascending=False)

# Find the person with the highest relative 'yes' score
highest_relative_yes_score = vote_counts.iloc[0]

# Display the result
print(f"The person with the highest relative 'yes' score is {highest_relative_yes_score['FullName']} with a score of {highest_relative_yes_score['relative_yes_score']:.2f}.")

# Display the sorted DataFrame with relative scores
print(vote_counts[:100])
print(vote_counts[-20:])
