In [None]:
import yaml
import psycopg2
import os
import os.path
import matplotlib.pyplot as plt
import seaborn as sb
import pandas as pd
import numpy as np

In [None]:
def pg_connect(credentials_yaml, db='<db_name>', hn='<host_name>', p='<port_name>', sslmode='require'):
    with open(credentials_yaml) as f:
        configs = yaml.load(f)
        
    un = configs['username']
    pw = configs['pw']
        
    try:
        conn = psycopg2.connect(dbname=db,
                        user=un,
                        password=pw,
                        host=hn,
                        port=p,
                        sslmode='require')
    except Exception as e:
        print("Error connecting to db.")
        raise e
    #conn.set_client_encoding('latin_1')
    cur = conn.cursor()
    cur.execute('set role el_salvador_mined_education_write;')
    return cur, conn

In [None]:
home_dir = os.getcwd()
credential_dir = os.path.join('../config')
credentials_yaml = os.path.join(credential_dir, 'dhany.yml') # changed to dhany's yml
cur, conn = pg_connect(credentials_yaml)

In [None]:
## Uses GRADO_CODE
cur.execute("""
select distinct(year), count(distinct(entity)) as student_count, grado_code
from cleaned.events
group by year, grado_code
order by grado_code asc,
year asc;
""")
data = cur.fetchall()

In [None]:
## Legend for GRADO_CODE to GRADO mapping
cur.execute("""
select distinct(grado), grado_code
from cleaned.events
group by grado, grado_code
order by grado_code asc;
""")

legend = cur.fetchall()

In [None]:
## Preview data using grado_code
data[0:10]

In [None]:
## Calculate average of each grade over the last 10 years
from collections import defaultdict
from statistics import mean
from decimal import *

calc_avg_dict = defaultdict(list)
for i in data:
    calc_avg_dict[i[2]].append(i[1])

avg_list = [[k, round(Decimal(mean(v)), 2)] for k, v in calc_avg_dict.items()]

In [None]:
df = pd.DataFrame(avg_list)
col = [i[0] for i in avg_list]

In [None]:
avg_list

In [None]:
df.plot.bar()
plt.xlim(-1, 15)
plt.xticks(np.arange(16), col)
plt.show()

In [None]:
## Define x, y, z lists
X = [i[0] for i in data] # Years, needs to be columns
Y = [i[1] for i in data] # count students
Z = [i[2] for i in data] # Grado code, needs to be index

In [None]:
from matplotlib import rcParams
rcParams['figure.figsize'] = (18, 16)

In [None]:
%matplotlib inline
df = pd.DataFrame(np.column_stack((Y,Z)), index = X, columns = ['grado', 'year'])

plt.figure(figsize=(30,20))

df.plot.bar()
plt.show()

In [None]:
## Create plots
plot = sb.countplot(x = X, hue = Z, data = df, orient = 'v', palette = "Accent")

In [None]:
import numpy as np
np.arange(5)