## Import 1: Classes & Path

In [None]:
# Import 1. 
# Import classes into notebook from src folder. 
import sys

# Path relative to your notebook.
sys.path.append("/Users/joel/Desktop/daimil10/CAPSTONE_2/Niger-Political-Shift-Examination/src")

# Import my .py classes.(more edits later).
import data_clean
import plots

# Auto reload.
%load_ext autoreload
%autoreload 2

## Import 2: All Libs

In [None]:
# Import 2.
# All libs and modules I'll be using.
import pandas as pd
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib.ticker as mticker
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

## Read File & Inital Drop of Columns

In [None]:
# Read file in.
# Dropping columns to trim data to 1996-2022 (26 years).
# Use clean class and init a cleaner object.
cleaner = data_clean.Clean()

# Using .readfile method for data_clean class.
niger = cleaner.read_file('/Users/joel/Desktop/daimil10/CAPSTONE_2/Niger-Political-Shift-Examination/data/Niger.csv')
# Making it so I have my 26 year time frame. 
niger = niger.drop(['Country Name', 'Country Code', '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995'], axis=1)

# Make a metadata df for indicator name, source note, and source_org just in case. (would be shocked I use this)
niger_meta = cleaner.read_file('/Users/joel/Desktop/daimil10/CAPSTONE_2/Niger-Political-Shift-Examination/data/Metadata_Indicator_Descriptors.csv')

# Taking a look at the first 5 rows. 
niger.head()

## Describing the Dataset

In [None]:
# Describing the Dataset 1.
cleaner.describe_dataframe(niger)

In [None]:
# Describing the Dataset 2.
niger.info()

In [None]:
# Describing the Dataset 3.
niger.index

In [None]:
# Describing the Dataset 4.
niger.columns

In [None]:
# Describing the Dataset 5.
missing_v = niger.isnull().sum()
missing_v

In [None]:
# Describing the Dataset 6.
# Using the function that spits out dict of col names and dtypes. 
cleaner.get_column_features(niger)

## Project Goal #1:
Niger Governance: Examine Three World Governance Indicators from 1996 to 2022 (26 years).
- Political Stability and Absence of Violence/Terrorism (6 representative sources): measures perceptions of the likelihood of political instability and/or politically-motivated violence, including terrorism. Estimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.

- Control of Corruption (7 representative sources): captures perceptions of the extent to which public power is exercised for private gain, including both petty and grand forms of corruption, as well as "capture" of the state by elites and private interests. Estimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.

- Goverment Effectivness (6 representative sources): captures perceptions of the quality of public services, the quality of the civil service and the degree of its independence from political pressures, the quality of policy formulation and implementation, and the credibility of the government's commitment to such policies. Estimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.

In [None]:
# Need to figuere out indicator codes and how they line up with my desired catagories.
# Slice DF by PV.EST, CC.EST, GE.EST, RQ.EST, VA.EST, RL.EST indicator code which all make up the world governance index.
# Will only be using PV.EST, CC.EST, GE.EST as the amount of mising values for the other three make them a bit of a liability. 
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isin.html
indicator_codes = ['PV.EST', 'CC.EST', 'GE.EST']
PG1 = niger[niger['Indicator Code'].isin(indicator_codes)]

# Using replace for the labels since these things are kinda long.
PG1['Indicator Name'] = PG1['Indicator Name'].replace('Political Stability and Absence of Violence/Terrorism: Estimate','Political Stability')
PG1['Indicator Name'] = PG1['Indicator Name'].replace('Control of Corruption: Estimate','Control of Corruption')
PG1['Indicator Name'] = PG1['Indicator Name'].replace('Government Effectiveness: Estimate','Government Effectiveness')

# No Data for 1997, 1999, 2001, 2022.
PG1 = PG1.drop(['1997', '1999', '2001', '2022'], axis=1)

In [None]:
# Now to plot the graph. 
# Use the style that I like.
style.use('fivethirtyeight')

# Figsize.
plt.figure(figsize=(16, 9))

# For the line chart need each indicator code separate.
for indicator in indicator_codes:
    # Not to repeat codes.
    if indicator in PG1['Indicator Code'].unique():
        # Using indicator code to cycle through.
        indicator_data = PG1[PG1['Indicator Code'] == indicator]
        # X = Year, Y= Aggregated Indicator Scale.
        # values.flatten for the list of values.
        # https://pandas.pydata.org/pandas-docs/version/0.14.1/generated/pandas.Index.flatten.html 
        # F string for label, used in last project.
        # Will only be plotting 2000 on as it has complete data (which is why I have to slice it).
        # Label needs to start at the first postion each iteration.
        plt.plot(indicator_data.columns[4:], indicator_data.iloc[:, 4:].values.flatten(), marker='o', label=f'{indicator_data["Indicator Name"].iloc[0]}')

# Label and title.
plt.xlabel('Year', color='green')
plt.ylabel('Aggregate Indicator', color='green')
plt.title('World Governance Indicators For Niger', color='green')

#Label for Coup: Change of Power.
#Set for empty lables.
plt.plot([], [], ' ', label="Coup: Change of Power")

#Label for Attempted Coup.
#Set for empty lables.
plt.plot([], [], ' ', label="Attempted Coup")

# Set the background color to grey.
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.gca.html 
plt.gca().set_facecolor('grey')

# Using all the years present for the xticks.
# Set the tick color to black.
plt.xticks(rotation=45)
plt.tick_params(axis='x', colors='black')
plt.tick_params(axis='y', colors='black')

# Show the legend.
plt.legend()

# plt.show.
plt.show()

## Project Goal #2:
Niger Economic Health: Explore Economic Indicators over the specified time frame.

- Inflation(Yearly%): as measured by the consumer price index reflects the annual percentage change in the cost to the average consumer of acquiring a basket of goods and services that may be fixed or changed at specified intervals, such as yearly. The Laspeyres formula is generally used.

- GDP Annual (Yearly%): Annual percentage growth rate of GDP at market prices based on constant local currency. Aggregates are based on constant 2015 prices, expressed in U.S. dollars. GDP is the sum of gross value added by all resident producers in the economy plus any product taxes and minus any subsidies not included in the value of the products. It is calculated without making deductions for depreciation of fabricated assets or for depletion and degradation of natural resources.

- Unemployment (Yearly%): refers to the share of the labor force that is without work but available for and seeking employment.

In [None]:
# For some reason some of the indicator codes were not populating..
# Made me do the long way istead.

# GDP growth annual (looked this up via the metadata df)
a = niger[niger['Indicator Code'] == 'NY.GDP.MKTP.KD.ZG']
# Inflation.
b = niger[niger['Indicator Code'] == 'FP.CPI.TOTL.ZG']
# Unemployment
c = niger[niger['Indicator Code'] == 'SL.UEM.TOTL.ZS'] 

# Combine them vertically (stacked like, wanting them on top of another starting with a).
# https://pandas.pydata.org/docs/reference/api/pandas.concat.html
PG2 = pd.concat([a, b, c], axis=0)

# Reset the index of the combined data frame (cause it was funky).
# Really a good idea whenever you mess with it via melt or concat.
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html 
PG2 = PG2.reset_index(drop=True) # Dropping the old one. 

# Everything lines up for 25 years (thank goodness).
PG2.isnull().sum()

# Drop Indicator code, it will not be needed.
PG2 = PG2.drop(['Indicator Code'], axis=1)

In [None]:
# Get overall metrics for the legend. 
# This was a fun exercies and comparison from wrold bank (macro trends pulls from there as far as I can tell).

# https://www.macrotrends.net/countries/WLD/world/inflation-rate-cpi
world_inflation = [3.50, 1.92, 2.19, 2.44, 2.19, 1.55, 1.43, 2.35, 2.62, 3.73, 4.82, 3.35, 2.93, 8.95, 4.81, 4.27, 4.11, 3.52, 3.03, 2.91, 3.84, 3.43, 3.04, 5.10, 5.55, 6.53]
world_infla_mean = np.mean(world_inflation)

# https://www.macrotrends.net/countries/WLD/world/gdp-gross-domestic-product
world_GDP = [5.87, -3.12, 2.59, 3.29, 3.39, 2.80, 3.08, 3.09, 2.81, 2.71, 3.31, 4.54, -1.34, 2.07, 4.38, 4.42, 4.00, 4.47, 3.11, 2.30, 2.01, 4.52, 3.55, 2.82, 3.88, 3.59]
world_gdp_mean = np.mean(world_GDP)

# https://www.macrotrends.net/countries/WLD/world/unemployment-rate
world_unemploy = [6.18, 6.58, 5.36, 5.40, 5.56, 5.66, 5.63, 5.61, 5.73, 5.74, 5.77, 5.90, 6.00, 5.41, 5.42, 5.63, 5.90, 6.00, 6.16, 6.07, 5.85, 5.76, 5.96, 5.85, 5.70, 5.70]
world_unemploy_mean = np.mean(world_unemploy)

# https://data.worldbank.org/country/niger
niger_unemploy = [2.027, 1.99, 1.92, 1.939, 1.913, 1.841, 2.614, 3.355, 4.047, 4.586, 3.748, 2.937, 2.11, 1.374, 0.68, 0.156, 5.333, 2.919, 0.403, 0.425, 0.446, 0.468, 0.489, 0.512, 0.787, 0.312, 0.256]
niger_unemploy_mean = np.mean(niger_unemploy)

niger_inflation = [5.288857513, 2.933452534, 4.548015777, -2.302125038, 2.900149736, 4.005514284, 2.628865979, -1.614407692, 0.262543757, 7.797497818, 0.04048583, 0.053959261, 11.30510988, 0.582906591, 0.804073081, 2.94238514, 0.45508982, 2.29723115, -0.93028726, -0.57609037, 1.653889224, 2.796373189, 2.96760366, -2.489792651, 2.89819376, 3.837868028, 4.226217919]
niger_inflation_mean = np.mean(niger_inflation)

niger_gdp = [9.972614956, -0.219562518, -1.208477741, 7.268134902, 4.918470576, 2.170610384, 0.363801579, 7.331868843, 5.931049974, 3.142724084, 7.731414228, 1.962600907, 8.578166743, 2.357756936, 10.54894458, 5.315130634, 6.642136655, 4.392648832, 5.740893151, 5.001359945, 7.210803008, 5.941396863, 3.550227506, 1.387129177, 11.50000492]
niger_gdp_mean = np.mean(niger_gdp)

In [None]:
# Set the 'Indicator Name' column as the index (optional but might help in chart labeling).
# This did end up helping btw .. 
PG2.set_index('Indicator Name', inplace=True)

# Transpose the DataFrame to make years as columns and indicators as rows.
# This was more cautionary than anything with that new set of index / could always flatten if needed.. 
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.transpose.html
df_transposed = PG2.T

# Plot the area chart. 
plt.figure(figsize=(16, 9))

# Yet another loop since im going through years again.. 
# Coloumns are the years and wanting to do each indicator by year. 
# Label was similiar to above but dont have to make it iloc[0] cause its not double nested.
# plt.fill_between is literally an area chart .. I searched for like two days for the keyword "area chart" .. 
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.fill_between.html
for indicator in df_transposed.columns:
    plt.fill_between(df_transposed.index, df_transposed[indicator], alpha=0.7, label=indicator)

# Labels w/ green theme.
plt.xlabel('Years', color='green')
plt.ylabel('Percentage', color='green')
plt.title('Niger Economic Indicators', color='green')

# Set the background color to grey.
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.gca.html 
plt.gca().set_facecolor('grey')

# Alot of years so I can do the tilt.
plt.xticks(rotation=45)

# Still cant figure out what fancy box does? when I turn it on it looks the same..
plt.legend(loc='center', bbox_to_anchor=(0.7, 0.92), fancybox=False, shadow=True, ncol=1)
plt.tick_params(axis='x', colors='black')
plt.tick_params(axis='y', colors='black')

# Needed to show that negative GDP and Inflation.
# Mainly so I can highlight why that is not good thing during the brief. 
plt.ylim(-2, 15)

# Add a text box with information.
# This was fun to mess with.  
text_box = '''
Over 26 Years

World GDP growth: 3.00% vs Niger GPD growth: 5.10%

World Inflation: 3.62% vs Niger Inflation: 2.19%

World Unemployment: 5.78% vs Niger Unemployment: 1.83%
'''
plt.text(0.02, 0.98, text_box, transform=plt.gca().transAxes, fontsize=12,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# plt.show.
plt.show()

## Project Goal #3:
Niger Leadership: Highlight Political Regime Effestiveness.

- Goverment Effectivness (6 representative sources): captures perceptions of the quality of public services, the quality of the civil service and the degree of its independence from political pressures, the quality of policy formulation and implementation, and the credibility of the government's commitment to such policies. Estimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.

In [None]:
# Reuse WGI code from PG1.
# Just doing GE this time to highlight regime effectiveness.
indicator_codes = ['GE.EST']
PG3 = niger[niger['Indicator Code'].isin(indicator_codes)]

# Using replace for the labels since they are kinda long (did this for PG1).
PG3['Indicator Name'] = PG3['Indicator Name'].replace('Government Effectiveness: Estimate', 'Government Effectiveness')

# Get the column names for the years of each leader's regime.
# 1996 - 1999: Ibrahim Bare Mainassara.
# 1999 - 2010: Mamadou Tandja.
# 2011- 2021:  Mahamadou Issoufou.
brahim_years = ['1996', '1997', '1998', '1999'] #some missing data.
mamadou_years = ['1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010']
issoufou_years = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']

# Extract data for each leader's reign.
# https://www.geeksforgeeks.org/flatten-a-list-of-dataframes/
# Will never forget .values.flatten now.
# Game changer for just getting data out of a DF fast.
brahim_scores = PG3[brahim_years].values.flatten()
mamadou_scores = PG3[mamadou_years].values.flatten()
issoufou_scores = PG3[issoufou_years].values.flatten()

# Remove missing or invalid data (NaN or Null). 
# I can do that now that the VALUES ARE FLATTENED. 
# 1996 , 1999 regime has missing values - along with 2022 era which isnt really chartable. 
# I could drop these, this is a little more fun. 
brahim_scores = [score for score in brahim_scores if pd.notnull(score)]
mamadou_scores = [score for score in mamadou_scores if pd.notnull(score)]
issoufou_scores = [score for score in issoufou_scores if pd.notnull(score)]

# Get the mean for the callout text
# Will use these for the floating text boxes.
b_mean = np.mean(brahim_scores)
m_mean = np.mean(mamadou_scores)
i_mean = np.mean(issoufou_scores)

# Combine the data into a DataFrame to use.
# Just using a Dict to DF for the index and then the GE scores.
# kinda just change the year to the leader for this (another way to look at it).
data = pd.DataFrame({'Leader': ['Ibrahim Bare Mainassara'] * len(brahim_scores) + ['Mamadou Tandja'] * len(mamadou_scores) + ['Mahamadou Issoufou'] * len(issoufou_scores), 
                     'Government Effectiveness Score': brahim_scores + mamadou_scores + issoufou_scores})

In [None]:
# Create the violin plot.
# I was trying to use niger colors, but it didnt really come out that well ..
# That organge is really light. 
new_colors = ['white', 'green', 'orange']

# Figsize.
plt.figure(figsize=(10, 6))

# Adding the new colors to the palette. 
sns.violinplot(y='Leader', x='Government Effectiveness Score', data=data, palette=new_colors)

# Set the background color to grey.
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.gca.html 
plt.gca().set_facecolor('grey')

# Add a text box with information.
text_1 = '''
Considerations

Regime: 
1996 - 1999: Ibrahim Bare Mainassara
1999 - 2010: Mamadou Tandja
2011- 2021:  Mahamadou Issoufou

GE Scale:
-2.5 <-> +2.5
'''
plt.text(0.02, 0.97, text_1, transform=plt.gca().transAxes, fontsize=8,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Add a text box for Leader 1 (use f string for mean).
text_2 = f"Mainassara's Mean: {b_mean:.2f}"
plt.text(0.61, 0.85, text_2, transform=plt.gca().transAxes, fontsize=8,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Add a text box for Leader 2 (use f string for mean).
text_3 = f"Tandja's Mean: {m_mean:.2f}"
plt.text(0.82, 0.51, text_3, transform=plt.gca().transAxes, fontsize=8,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Add a text box for Leader 3 (use f string for mean).
text_4 = f"Issoufou's Mean: {i_mean:.2f}"
plt.text(0.8, 0.18, text_4, transform=plt.gca().transAxes, fontsize=8,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Add labels and title.
# Dont really want that y lable because the names are so long.
plt.ylabel('')

# Sticking with the green theme.
plt.xlabel("Government Effectiveness Indicator", color='green')
plt.title("Distribution of Government Effectiveness During Regime", color='green')
plt.xlim(-2.5, 0)

# plt.show().
plt.show()

## Project Goal #4:
Niger Foreign Aid: Uncover data points related to foreign assistance from other countries.

- Net bilateral aid flows from DAC donors are the net disbursements of official development assistance (ODA) or official aid from the members of the Development Assistance Committee (DAC). Net disbursements are gross disbursements of grants and loans minus repayments of principal on earlier loans. ODA consists of loans made on concessional terms (with a grant element of at least 25 percent, calculated at a rate of discount of 10 percent) and grants made to promote economic development and welfare in countries and territories in the DAC list of ODA recipients. 

- Official aid refers to aid flows from official donors to countries and territories in part II of the DAC list of recipients: more advanced countries of Central and Eastern Europe, the countries of the former Soviet Union, and certain advanced developing countries and territories. Official aid is provided under terms and conditions similar to those for ODA. Part II of the DAC List was abolished in 2005. The collection of data on official aid and other resource flows to Part II countries ended with 2004 data. 

- DAC members are Australia, Austria, Belgium, Canada, Czech Republic, Denmark, Finland, France, Germany, Greece, Hungary, Iceland, Ireland, Italy, Japan, Korea, Luxembourg, The Netherlands, New Zealand, Norway, Poland, Portugal, Slovak Republic, Slovienia, Spain, Sweden, Switzerland, United Kingdom, United States, and European Union Institutions. Regional aggregates include data for economies not specified elsewhere. World and income group totals include aid not allocated by country or region. Data are in current U.S. dollars.

In [None]:
# Reuse PG1 code. 
# Tried to limit it to countires in the G20 that didnt have missing values.
# Focusing on DAC flows as the forgien aid since that can be stacked against GCF from world bank.  
indicator_codes = ['DC.DAC.GBRL.CD', 'DC.DAC.CECL.CD', 'DC.DAC.FRAL.CD', 'DC.DAC.DEUL.CD', 'DC.DAC.CANL.CD', 'DC.DAC.USAL.CD', 
'DC.DAC.JPNL.CD', 'DC.DAC.KORL.CD']
PG4 = niger[niger['Indicator Code'].isin(indicator_codes)]

# Missing Values scattered through only a handful though as opposed to the full list
PG4.isnull().sum()

# Using replace for the labels since they are kinda long (doing what I did above but found a replacement dict).
# Total game changer in the sense of having many replacements. 
rep = {
    'Net bilateral aid flows from DAC donors, United Kingdom (current US$)': 'United Kingdom',
    'Net bilateral aid flows from DAC donors, European Union institutions (current US$)': 'European Union',
    'Net bilateral aid flows from DAC donors, Korea, Rep. (current US$)': 'South Korea',
    'Net bilateral aid flows from DAC donors, France (current US$)': 'France',
    'Net bilateral aid flows from DAC donors, Germany (current US$)': 'Germany',
    'Net bilateral aid flows from DAC donors, Canada (current US$)': 'Canada',
    'Net bilateral aid flows from DAC donors, United States (current US$)': 'United States',
    'Net bilateral aid flows from DAC donors, Japan (current US$)': 'Japan'}
PG4['Indicator Name'] = PG4['Indicator Name'].replace(rep)

# This is really so I could visualize the numbers in the billions format. 
pd.options.display.float_format = '{:,.0f}'.format

# Droping some unwanted columns, dont need IC for this and 2022 is blank all around. 
PG4 = PG4.drop(['Indicator Code'], axis=1)

In [None]:
# So wanting to get total aid to niger for last 25/26 years from each country
# Sorting those for country name I just need to add across the DF. 
PG4["Total_Aid_Niger"] = PG4.iloc[:, 1:-1].sum(axis=1)

# Numbers werent formatting properly, instad of doing pd.display.options I just did a for loop for the numbers.
# This is so I can use the value in the call out boxes I plan to make. 
number = [PG4.iloc[0, -1], PG4.iloc[1, -1], PG4.iloc[2, -1], PG4.iloc[3, -1], PG4.iloc[4, -1], PG4.iloc[5, -1], PG4.iloc[6, -1], PG4.iloc[7, -1]]
new = []
for n in number:
    formatted_number = "${:,.2f}".format(n)
    new.append(formatted_number)


# Alot of fidding with this since i was orginally plotting on the same ax multiple times. 
fig, ax = plt.subplots(figsize=(16, 9))
grouped_data = PG4

# Doing the same thing as I did for the violin plot wanting the colors most
# closely associated to those countries. 
custom = ['blue', 'yellow', 'white', 'blue', 'black', 'maroon', 'navy', 'red']

# After the feedback from the instructors the bubble chart was kinda lame. 
# Doing the same thing with a bar chart, looks alot cleaner. 
# Indicator Name is Country, Total Aid is new last summed column. 
ax.bar(PG4["Indicator Name"], PG4["Total_Aid_Niger"], color=custom, alpha=0.6)

# Keeping the green lettering theme. 
ax.set_xlabel("Country", color='green')
ax.set_ylabel("Total DAC Flows to Niger (USD Billions)", color='green')
ax.set_title("Total DAC Flows by Country to Niger (1996-2021)", color='green')

# Set the background color to grey.
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.gca.html 
plt.gca().set_facecolor('grey')

# Messed with a lot of things to make the format of the y axis correct
# Formatteer allows you to deal with the lettering which is nice. 
formatter = mticker.FuncFormatter(lambda x, _: f"${x / 1e9:.0f}B")
ax.yaxis.set_major_formatter(formatter)

# This (after some research) is where I figuered out I was plotting multiple times
# I would have repeating y ticks and it was really driving me mad. 
# Long story short even with the formatter and lim adjustments, manually set the ticks using a list comp.
ax.set_ylim(0e7, 4e9)
ax.set_xlim(-0.5, len(grouped_data) - 0.5) 
max_total_aid = PG4["Total_Aid_Niger"].max()
y_ticks = [tick * 1e9 for tick in range(int(max_total_aid / 1e9) + 1)]
ax.set_yticks(y_ticks)
plt.xticks(rotation=45)

#Text box for Total UK Aid. 
text_1 = f"Total Aid: {new[0]}"
plt.text(0.01, 0.047, text_1, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total EU Aid.  
text_2 = f"Total Aid: {new[1]}"
plt.text(0.125, 0.868, text_2, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total SK Aid. 
text_3 = f"Total Aid: {new[2]}"
plt.text(0.255, 0.035, text_3, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total FR Aid. 
text_4 = f"Total Aid: {new[3]}"
plt.text(0.375, 0.525, text_4, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total GER Aid. 
text_5 = f"Total Aid: {new[4]}"
plt.text(0.5, 0.285, text_5, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total CAN Aid. 
text_6 = f"Total Aid: {new[5]}"
plt.text(0.628, 0.114, text_6, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total US Aid. 
text_7 = f"Total Aid: {new[6]}"
plt.text(0.75, 0.49, text_7, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Total JPN Aid. 
text_8 = f"Total Aid: {new[7]}"
plt.text(0.87, 0.15, text_8, transform=plt.gca().transAxes, fontsize=10,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

#Text box for Perspective Metrics (coming from world bank summaries). 
text_9 = """
Numbers for Perspective:
    - Niger's Net ODA received, (2021 percentage of GCF): 43.18%
    - Sub-Sahran Region, (2021 percentage of GCF): 13.7%
    - Making Niger 10th Highest in Region for 2021.
"""
plt.text(0.65, 0.98, text_9, transform=plt.gca().transAxes, fontsize=11,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# plt.show.
plt.show()

## Project Goal #5: 
Niger Leadership - ANOVA on Gov Effectvness.
- Goverment Effectivness (6 representative sources): captures perceptions of the quality of public services, the quality of the civil service and the degree of its independence from political pressures, the quality of policy formulation and implementation, and the credibility of the government's commitment to such policies. Estimate gives the country's score on the aggregate indicator, in units of a standard normal distribution, i.e. ranging from approximately -2.5 to 2.5.

In [None]:
# With ANOVA --> 
# Null Hypothesis (H0): The means of the different groups are equal. In other words, there is no effect of the factor ('Regime') on the dependent variable. 
# This is equivalent to saying that 'Regime' does not impact the values you are measuring.

# Alternative Hypothesis (H1): At least one group mean is different from the others. 
# In other words, there is some effect of the factor ('Regime') on the dependent variable. 
# This means that at least one 'Regime' has a significantly different value from at least one other 'Regime'.

# Regime: 
# 1996 - 1999: Ibrahim Bare Mainassara
# 1999 - 2010: Mamadou Tandja
# 2011- 2021:  Mahamadou Issoufou

# Overall this was a great exercise in using a dict and then iterating over that dict into new vaules for the DF. 
# Really havent had to do that before (will def earmark this).

# Really need to get regimes and years aligned, even the early years with missing values.
regimes = {'Ibrahim Bare Mainassara': range(1996, 2000), 'Mamadou Tandja': range(2000, 2011), 'Mahamadou Issoufou': range(2011, 2022),}

# Melt the DataFrame to convert it from wide to long format (changing the dimensions of it since years are a the thing mapping to regime).
# https://pandas.pydata.org/docs/reference/api/pandas.melt.html
# https://www.geeksforgeeks.org/python-pandas-melt/ 
df_melted = PG3.melt(id_vars=['Indicator Name', 'Indicator Code'], var_name='Year', value_name='Indicator Value')

# Took me a little while to figure out the next method
# https://www.geeksforgeeks.org/python-next-method/ 
# Trying to map regime to year in the dict (regimes)
get_regime = lambda year: next((regime for regime, years in regimes.items() if year in years), None)

# Need to convert year to int to compare it to our dictonary ranges. 
df_melted['Regime'] = df_melted['Year'].astype(int).apply(get_regime)

# These are NaN rows and wont really help with ANOVA
rows_to_remove = [1, 3, 5, 26]

# Need to reset index after dropping rows each time. 
df_cleaned = df_melted.drop(rows_to_remove).reset_index(drop=True)

# Need to go 9 out to get accuracy since everything is really tiny (on that -2.5 to +2.5).
pd.options.display.float_format = '{:.9f}'.format
df_cleaned

In [None]:
# Took me like 2 hours, ANOVA OLS is not a fan on the spaces.. yikes.
df_cleaned.rename(columns={'Indicator Value': 'Indicator_Value'}, inplace=True)

# Use OLS on Indicator Value (in this case GOV Effect).
# Fit the Model. 
model = ols('Indicator_Value ~ C(Regime)', data=df_cleaned).fit()

# Use stats.annova_lm. 
# https://www.statsmodels.org/stable/generated/statsmodels.stats.anova.anova_lm.html
anova_table = sm.stats.anova_lm(model)

# Print the ANOVA table. 
# Tried using pretty print on this and tabulate, no go. 
# I'll just throw it in excel. 
print(anova_table)

In [None]:
# So from some light research the next logical step would be to see about the Tukey HSD.

# Null Hypothesis (H0): The means of the two groups being compared are equal.

# Alternative Hypothesis (H1): The means of the two groups being compared are not equal. 
# (import part right here) ANOVA does not tell us which groups are different. 
# The Tukey HSD test helps us identify which specific group means are different from each other.

# Good summary -->
# The Tukey HSD ("honestly significant difference" or "honest significant difference") test is a statistical tool used to determine 
# if the relationship between two sets of data is statistically significant – that is, whether there's a strong chance that an observed 
# numerical change in one value is causally related to an observed change in another value. 
# In other words, the Tukey test is a way to test an experimental hypothesis.

# Perform multiple pairwise comparison (Tukey HSD)
m_comp = pairwise_tukeyhsd(endog=df_cleaned['Indicator_Value'], groups=df_cleaned['Regime'], alpha=0.05)

# This already prints really lovely so no need to change it. 
print(m_comp)

## Project Goal #6:
Linear Model (1, 2, or 3)