In [None]:
import calendar

import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Helper constants
GA = 'General Assembly'
SC = 'Security Council'

In [None]:
# Load the all records table as the primary data frame
records_path = "../data/records.csv"
df = pd.read_csv(records_path, index_col='ID', keep_default_na=False)

## Title

In [None]:
# Which titles appear most often
df_title_counts = df[['Title']].value_counts()
df_title_counts.head(5)

In [None]:
# Are resolutions with the same title all in a few years of each other?
most_common_title = df_title_counts.index[0][0]
df_most_common_title = df[df['Title'] == most_common_title]
display(df_most_common_title.head(3))
display(df_most_common_title.tail(3))

## Body

In [None]:
# How many % resolutions are in SC vs GA?
df_body_counts = df[['Body']].value_counts()
sc_percentage = df_body_counts[SC] * 100 / df.shape[0]
ga_perctange = df_body_counts[GA] * 100 / df.shape[0]
print('Resolutions by Security Council: ~' + str(round(sc_percentage)) + '%')
print('Resolutions by General Assembly: ~' + str(round(ga_perctange)) + '%')

## Dates

In [None]:
# Get DF for a body and, optionally, a given set of years

def date_is_between(date, start_year, end_year):
    return date.str[0:4].astype(int).between(start_year, end_year)

def filter_by_year(df, body, start_year, end_year):
    return df[(df['Body'] == body) & (date_is_between(df['Date'], start_year, end_year))]

def filter(df, body):
    return df[df['Body'] == body]

In [None]:
# Which dates were the most productive for either chamber
def count_resolutions_per_date(body):
    print(body)
    df_date_counts = df[df['Body'] == body][['Date']].value_counts()
    display(df_date_counts.head(5))

count_resolutions_per_date(SC)
print('')
count_resolutions_per_date(GA)

In [None]:
# Were there dates with only one resolution adopted?
df_date_counts = df[['Date']].value_counts()
df_date_counts.tail(5)

In [None]:
# Investigate the most productive dates
def analyze_most_productive_date(body):
    print(body)
    
    df = filter(df, body)
    dates_by_productivity = df[['Date']].value_counts()
    
    most_productive_date = dates_by_productivity.index[0][0]
    most_productive_date_count = dates_by_productivity[most_productive_date]

    most_productive_date_year = most_productive_date[0:4]
    most_productive_date_year_count = df[df['Date'].str.startswith(most_productive_date_year)].shape[0]

    most_productive_date_percantage_in_its_year = most_productive_date_count * 100 / most_productive_date_year_count

    print('Most productive date: ' + str(most_productive_date))
    print('During this date a total of ' + str(most_productive_date_count) + ' resolutions were adopted')
    print('During that year a total of ' + str(most_productive_date_year_count) + ' resolutions were adopted')
    print('That is ~' + str(round(most_productive_date_percantage_in_its_year)) + '% of all resolutions for that year')

analyze_most_productive_date(SC)
print('-------------------------------------')
analyze_most_productive_date(GA)

In [None]:
# How many dates in each year had resolutions held
def analyze_active_dates_per_year(body):
    print(body)
    df_body = filter(df, body)
    
    year_activity = {}
    for year in range(1946, 2024): #ignore current year, it's not complete
        days_in_year = 365 + calendar.isleap(year)
        df_year = df_body[df_body['Date'].str.startswith(str(year))]
        
        active_dates = len(df_year['Date'].unique())
        active_percentage = active_dates * 100 / days_in_year
        
        year_activity[year] = active_percentage
    
    most_active_year = max(year_activity, key=year_activity.get)
    most_activity = year_activity[most_active_year]
    print('Most active year ' + str(most_active_year) + ' had ' + str(round(most_activity)) + '% activity')
    
    least_active_year = min(year_activity, key=year_activity.get)
    least_activity = year_activity[least_active_year]
    print('Least active year ' + str(least_active_year) + ' had ' + str(round(least_activity)) + '% activity')

analyze_active_dates_per_year(SC)
print('-------------------------------------')
analyze_active_dates_per_year(GA)

In [None]:
# What's the average amount of adoptions in an active day?
def date_is_between(date, start_year, end_year):
    return date.str[0:4].astype(int).between(start_year, end_year)

def analyze_average_activity(body, start_year, end_year):
    print(body + " average adoptions for years [{}, {}]".format(start_year, end_year))
    
    df = df[(df['Body'] == body) & (date_is_between(df['Date'], start_year, end_year))]
    resolutions_per_date = df[['Date']].value_counts()
    print('Total adoptiops in that period: ' + str(df.shape[0]))
    print('Average resolutions adopted: ' + str(resolutions_per_date.mean()))
    print('Median resolutions adopted: ' + str(resolutions_per_date.median()))
    print('')

analyze_average_activity(SC, 1946, 2023)
analyze_average_activity(GA, 1946, 2023)

In [None]:
# How does the average amount of adoptions change over the years?

def analyze_average_activity_per_year(body):
    analyze_average_activity(body, 1946, 1965)
    analyze_average_activity(body, 1966, 1985)
    analyze_average_activity(body, 1986, 2005)
    analyze_average_activity(body, 2006, 2023)

analyze_average_activity_per_year(SC)
print('-------------------------------------')
analyze_average_activity_per_year(GA)

In [None]:
# Plot the number of resolutions per year
def date_to_year(date):
    return int(date[0:4])

def plot_resolutions_per_year(body):
    df_body = filter(df, body)
    resolutions_per_year = df_body['Date'].map(date_to_year).value_counts(sort=False)
    resolutions_per_year = resolutions_per_year.drop(labels = [2024])
    resolutions_per_year.plot(title=body)

plot_resolutions_per_year(GA)

In [None]:
plot_resolutions_per_year(SC)