In [None]:
import ast
import calendar

import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Helper constants
GA = 'General Assembly'
SC = 'Security Council'
START = 1946
END = 2024

In [None]:
# Load the all records table as the primary data frame
records_path = "../data/records.csv"
df = pd.read_csv(records_path, index_col='ID', keep_default_na=False)

## Helpers

In [None]:
# Filter DFs for different parameters

def date_is_between(date, start_year, end_year):
    return date.str[0:4].astype(int).between(start_year, end_year)

def filter_year(df, start_year, end_year):
    return df[date_is_between(df['Date'], start_year, end_year)]

def filter_body(df, body):
    return df[df['Body'] == body]

def filter(df, body, start_year, end_year):
    return df[(df['Body'] == body) & (date_is_between(df['Date'], start_year, end_year))]

In [None]:
# Common maps
def date_to_year(date):
    return int(date[0:4])

def to_percentage(value, total):
    return value * 100 / total

## Title

In [None]:
# Which titles appear most often
df_title_counts = df[['Title']].value_counts()
df_title_counts.head(5)

In [None]:
# Are resolutions with the same title all in a few years of each other?
most_common_title = df_title_counts.index[0][0]
df_most_common_title = df[df['Title'] == most_common_title]
display(df_most_common_title.head(3))
display(df_most_common_title.tail(3))

## Body

In [None]:
# How many % resolutions are in SC vs GA?
df_body_counts = df[['Body']].value_counts()
sc_percentage = df_body_counts[SC] * 100 / df.shape[0]
ga_perctange = df_body_counts[GA] * 100 / df.shape[0]
print('Resolutions by Security Council: ~' + str(round(sc_percentage)) + '%')
print('Resolutions by General Assembly: ~' + str(round(ga_perctange)) + '%')

## Dates

In [None]:
# Which dates were the most productive for either chamber
def count_resolutions_per_date(body):
    print(body)
    df_date_counts = df[df['Body'] == body][['Date']].value_counts()
    display(df_date_counts.head(5))

count_resolutions_per_date(SC)
print('')
count_resolutions_per_date(GA)

In [None]:
# Were there dates with only one resolution adopted?
df_date_counts = filter_body(df, GA)[['Date']].value_counts()
df_date_counts.tail(5)

In [None]:
# Investigate the most productive dates
def analyze_most_productive_date(body):
    print(body)
    
    dates_by_productivity = filter_body(df, body)[['Date']].value_counts()
    
    most_productive_date = dates_by_productivity.index[0][0]
    most_productive_date_count = dates_by_productivity[most_productive_date]

    most_productive_date_year = most_productive_date[0:4]
    most_productive_date_year_count = df[df['Date'].str.startswith(most_productive_date_year)].shape[0]

    most_productive_date_percantage_in_its_year = most_productive_date_count * 100 / most_productive_date_year_count

    print('Most productive date: ' + str(most_productive_date))
    print('During this date a total of ' + str(most_productive_date_count) + ' resolutions were adopted')
    print('During that year a total of ' + str(most_productive_date_year_count) + ' resolutions were adopted')
    print('That is ~' + str(round(most_productive_date_percantage_in_its_year)) + '% of all resolutions for that year')

analyze_most_productive_date(SC)
print('-------------------------------------')
analyze_most_productive_date(GA)

In [None]:
# How many dates in each year had resolutions held
def analyze_active_dates_per_year(body):
    print(body)
    df_body = filter_body(df, body)
    
    year_activity = {}
    for year in range(START, END - 1): #ignore current year, it's not complete
        days_in_year = 365 + calendar.isleap(year)
        df_year = df_body[df_body['Date'].str.startswith(str(year))]
        
        active_dates = len(df_year['Date'].unique())
        active_percentage = active_dates * 100 / days_in_year
        
        year_activity[year] = active_percentage
    
    most_active_year = max(year_activity, key=year_activity.get)
    most_activity = year_activity[most_active_year]
    print('Most active year ' + str(most_active_year) + ' had ' + str(round(most_activity)) + '% activity')
    
    least_active_year = min(year_activity, key=year_activity.get)
    least_activity = year_activity[least_active_year]
    print('Least active year ' + str(least_active_year) + ' had ' + str(round(least_activity)) + '% activity')

analyze_active_dates_per_year(SC)
print('-------------------------------------')
analyze_active_dates_per_year(GA)

In [None]:
# What's the average amount of adoptions in an active day?
def analyze_average_activity(body, start_year, end_year):
    print(body + " average adoptions for years [{}, {}]".format(start_year, end_year))
    
    df_filtered = filter(df, body, start_year, end_year)
    resolutions_per_date = df_filtered[['Date']].value_counts()
    print('Total adoptiops in that period: ' + str(df_filtered.shape[0]))
    print('Average resolutions adopted per day: ' + str(resolutions_per_date.mean()))
    print('Median resolutions adopted per day: ' + str(resolutions_per_date.median()))
    print('')

analyze_average_activity(SC, START, END - 1)
analyze_average_activity(GA, START, END - 1)

In [None]:
# How does the average amount of adoptions change over the years?

def analyze_average_activity_per_year(body):
    analyze_average_activity(body, START, 1955)
    analyze_average_activity(body, 1956, 1975)
    analyze_average_activity(body, 1976, 1990)
    analyze_average_activity(body, 1991, 2002)
    analyze_average_activity(body, 2003, END - 1)

analyze_average_activity_per_year(SC)
print('-------------------------------------')
analyze_average_activity_per_year(GA)

In [None]:
# Plot the number of resolutions per year
def plot_resolutions_per_year(body):
    df_body = filter_body(df, body)
    resolutions_per_year = df_body['Date'].map(date_to_year).value_counts(sort=False)
    resolutions_per_year = resolutions_per_year.drop(labels = [2024])
    resolutions_per_year.plot(title='Resolutions in the ' + body)

In [None]:
plot_resolutions_per_year(GA)

In [None]:
plot_resolutions_per_year(SC)

## Subjects

In [None]:
def get_percentage_with_no_subjects(df):
    empty = 0
    total = df.shape[0]

    for index, row in df.iterrows():
        subjects = ast.literal_eval(row['Subjects'])
        if not len(subjects):
            empty += 1

    return empty * 100 / total

In [None]:
# How many resolutions have no subjects
percentage_with_no_subjects = get_percentage_with_no_subjects(df)
print(str(round(percentage_with_no_subjects)) + "% of all resolutions have no subjects")

In [None]:
# What is the most recent year with no subjects
most_recent_year = 1943
most_recent_year_percentage = 0

for year in range (START, END + 1):
    df_year = filter_year(df, year, year)
    percentage_with_no_subjects = get_percentage_with_no_subjects(df_year)
    if percentage_with_no_subjects > 0:
        most_recent_year = year
        most_recent_year_percentage = percentage_with_no_subjects

print("{} is the most recent year with resolutions that have no subjects, {}% of them".format(most_recent_year, most_recent_year_percentage))

In [None]:
# Plot the percentage of resolutions with subjects per year
def subjects_to_subject_presence(subjects):
    return len(ast.literal_eval(subjects)) > 0

def plot_resolutions_with_subjects(body):
    df_wip = filter_body(df, body)[['Subjects', 'Date']]
    df_wip['Date'] = df_wip['Date'].map(date_to_year)
    df_wip['Subjects'] = df_wip['Subjects'].map(subjects_to_subject_presence)
    
    df_percentages = df_wip.groupby(by='Date')['Subjects'].mean()
    df_percentages.plot(title='Subject presence in the ' + body)

In [None]:
plot_resolutions_with_subjects(GA)

In [None]:
plot_resolutions_with_subjects(SC)

In [None]:
def subjects_to_length(subjects):
    return len(ast.literal_eval(subjects))

def plot_resolutions_per_subject_length():
    subjects_length = df['Subjects'].map(subjects_to_length).value_counts().sort_index()
    
    max_subjects = subjects_length.index.max()
    subjects_length = subjects_length.reindex(range(max_subjects + 1), fill_value=0)
    display(subjects_length)
    
    subjects_length.plot(kind='bar', title='Resolutions per subject length')
    return max_subjects

max_subjects = plot_resolutions_per_subject_length()

In [None]:
# Find resolutions that have the maximum amount of subjects
df_wip = df[['Subjects']].copy()
df_wip['Subjects'] = df['Subjects'].map(subjects_to_length)
df_wip = df_wip[df_wip['Subjects'] == max_subjects]
df[df.index.isin(df_wip.index)]

## Voting Data

In [None]:
# Create a Data Frame with simplified voting data for statistics purposes
def to_simplified_voting_data(voting_data):
    if voting_data == 'Concensus' or voting_data == 'N/A':
        return voting_data
    
    for voting_point in voting_data.split(';'):
        if voting_point[0] != 'Y':
            return 'Voted on'
        
    return 'Voted with Concensus'

df_vds = df[['Body', 'Voting Data', 'Date']].copy()
df_vds['Voting Data'] = df_vds['Voting Data'].map(to_simplified_voting_data)

In [None]:
# Plot resolutions by voting status
def plot_resolutions_by_voting_status(body):
    df_body = filter_body(df_vds, body)
    total = df_body.shape[0]
    
    vote_held = df_body['Voting Data'].value_counts()
    display(vote_held)
    
    vote_held = vote_held.map(lambda v: to_percentage(v, total))
    vote_held.plot(kind='barh', title='% of Resolutions per vote status in the ' + body)

In [None]:
plot_resolutions_by_voting_status(GA)

In [None]:
plot_resolutions_by_voting_status(SC)

In [None]:
# Adopted without voting through the decades
def percentage_of_concensus_for_years_and_body(body, start_year, end_year):
    df_wip = filter(df_vds, body, start_year, end_year)
    total_count = df_wip.shape[0]
    
    concensus_count = df_wip[(df_wip['Voting Data'] == 'Concensus') | (df_wip['Voting Data'] == 'Voted with Concensus')].shape[0]
    
    percentage = concensus_count * 100 / total_count
    print("Period {}-{}: {}%".format(start_year, end_year, round(percentage, 2)))

In [None]:
def percentage_of_concensus_for_body(body):
    print(body)
    percentage_of_concensus_for_years_and_body(body, START, 1955)
    percentage_of_concensus_for_years_and_body(body, 1956, 1975)
    percentage_of_concensus_for_years_and_body(body, 1976, 1990)
    percentage_of_concensus_for_years_and_body(body, 1991, 2002)
    percentage_of_concensus_for_years_and_body(body, 2003, END)

In [None]:
print('Percentage of adopted without voting:\n')
percentage_of_concensus_for_body(SC)
print('')
percentage_of_concensus_for_body(GA)

In [None]:
# What is the average percentage of different vote types for resolutions that are voted on
def to_percentage_of_vote_types(voting_data, target_vote_type):
    voting_points = voting_data.split(';')
    
    votes_yes = 0
    votes_total = len(voting_points)
    
    for voting_point in voting_points:
        if voting_point[0] == target_vote_type:
            votes_yes += 1
    
    return votes_yes * 100 / votes_total

def percentage_of_yes_votes(body, target_vote_type, print_contentious = False):
    df_wip = df[df_vds['Voting Data'] == 'Voted on']
    df_wip = filter_body(df_wip, body)
    
    percentages = df_wip['Voting Data'].map(lambda v: to_percentage_of_vote_types(v, target_vote_type))
    
    avg_percentage = percentages.mean()
    print("{}: Average percentage of vote type '{}': {}%".format(body, target_vote_type, round(avg_percentage, 2)))
    
    if print_contentious:
        min_percentage = percentages.min()
        min_percentage_id = percentages.idxmin()
        print("{}: Minimal percentage of vote type '{}': {}% for id {}".format(body, target_vote_type, round(min_percentage, 2), min_percentage_id))
                                                                               
        percentage_contentious = percentages[percentages < 70].count() * 100 / percentages.count()
        print("{}: Percentage of especially contentious resolutions: {}%".format(body, round(percentage_contentious, 2)))
    
percentage_of_yes_votes(GA, 'Y', print_contentious=True)
percentage_of_yes_votes(SC, 'Y', print_contentious=True)
print('')
percentage_of_yes_votes(GA, 'N')
percentage_of_yes_votes(SC, 'N')
print('')
percentage_of_yes_votes(GA, 'A')
percentage_of_yes_votes(SC, 'A')
print('')
percentage_of_yes_votes(GA, 'X')
percentage_of_yes_votes(SC, 'X')

In [None]:
# What were the most contentious resolutions adopted
def to_effective_percentage_of_yes_votes(voting_data):
    votes_yes = 0
    votes_total = 0
    
    for voting_point in voting_data.split(';'):
        if voting_point[0] == 'Y':
            votes_yes += 1
            votes_total += 1
        
        if voting_point[0] == 'N':
            votes_total += 1

    return votes_yes * 100 / votes_total

def effective_percentage_of_yes_votes(body):
    df_wip = df[df_vds['Voting Data'] == 'Voted on']
    df_wip = filter_body(df_wip, body)
    
    percentages = df_wip['Voting Data'].map(to_effective_percentage_of_yes_votes)
    
    avg_percentage = percentages.mean()
    print("{}: Average percentage of effective yes votes: {}%".format(body, round(avg_percentage, 2)))
    
    percentage_contentious = percentages[percentages < 70].count() * 100 / percentages.count()
    print("{}: Percentage of especially contentious resolutions: {}%".format(body, round(percentage_contentious, 2)))
    
    percentage_not_contentious = percentages[percentages >= 90].count() * 100 / percentages.count()
    print("{}: Percentage of non-contentious resolutions: {}%".format(body, round(percentage_not_contentious, 2)))
    
    print('Most contentious votes:')
    display(percentages.sort_values(ascending=True).head(10))
    print('')
    
effective_percentage_of_yes_votes(GA)
effective_percentage_of_yes_votes(SC)

In [None]:
# Number of YES votes per bracket
def to_percentage_brackets(percentage):
    bracket = round(percentage / 10)
    start = (bracket - 1) * 10
    end = bracket * 10
    return "{}-{}%".format(start, end)

def plot_percentage_of_yes_votes(body, effective):
    df_wip = df[df_vds['Voting Data'] == 'Voted on']
    df_wip = filter_body(df_wip, body)
    
    percentages = df_wip['Voting Data'].map(to_effective_percentage_of_yes_votes) if effective else df_wip['Voting Data'].map(lambda v: to_percentage_of_vote_types(v, 'Y'))
    percentage_brackets = percentages.map(to_percentage_brackets).value_counts().sort_index()
    
    total = percentages.count()
    percentage_brackets = percentage_brackets.map(lambda v: to_percentage(v, total))
    
    title = 'Percentage of' + (' Effective ' if effective else ' ') +  'YES votes in the ' + body
    percentage_brackets.plot(kind='bar', title=title)

In [None]:
plot_percentage_of_yes_votes(GA, False)

In [None]:
plot_percentage_of_yes_votes(GA, True)

In [None]:
plot_percentage_of_yes_votes(SC, False)

In [None]:
plot_percentage_of_yes_votes(SC, True)

In [None]:
# Average percentage of yes votes through the years
def average_percentage_of_yes_for_years_for_body(body, start_year, end_year, effective):
    df_wip = df[df_vds['Voting Data'] == 'Voted on']
    df_wip = filter(df_wip, body, start_year, end_year)
    
    percentages = df_wip['Voting Data'].map(to_effective_percentage_of_yes_votes) if effective else df_wip['Voting Data'].map(lambda v: to_percentage_of_vote_types(v, 'Y'))
    average = percentages.mean()
    
    print("Period {}-{}: {}%".format(start_year, end_year, round(average)))

In [None]:
def average_percentage_of_yes_for_body(body, effective):
    print(body)
    average_percentage_of_yes_for_years_for_body(body, START, 1955, effective)
    average_percentage_of_yes_for_years_for_body(body, 1956, 1975, effective)
    average_percentage_of_yes_for_years_for_body(body, 1976, 1990, effective)
    average_percentage_of_yes_for_years_for_body(body, 1991, 2002, effective)
    average_percentage_of_yes_for_years_for_body(body, 2003, END, effective)

In [None]:
print('Average percentage of YES votes:\n')
average_percentage_of_yes_for_body(SC, False)
print('')
average_percentage_of_yes_for_body(GA, False)

print('\nAverage percentage of Effective YES votes:\n')
average_percentage_of_yes_for_body(SC, True)
print('')
average_percentage_of_yes_for_body(GA, True)