# NameX Six Months Stats

**!pip** can be used to install any libraries not loaded when the env was created.

This notebook assumes you've installed the requirements.txt (`pip install -r requirements.txt`) before launching jupyter

contents of requirements.txt should be

`jupyter
psycopg2-binary
sqlalchemy
ipython-sql
simplejson
pandas
matplotlib
spacy
papermill
schedule`

We need to load in these libraries into our notebook in order to query, load, manipulate and view the data

In [1]:
import sqlalchemy
import psycopg2
import simplejson
import pandas as pd
import matplotlib
# from ..NotebookScheduler import NotebookScheduler
from datetime import datetime, timedelta
from IPython.core.display import HTML
%load_ext sql
%config SqlMagic.displaylimit = 5

Read in the connection string info, for DEV | TEST | PROD depending on which DB you wish to run stats against

In [2]:
# Local Credentials
# with open("creds-dev-forward.json.nogit") as fh:
#     creds = simplejson.loads(fh.read())

In [3]:
# DEV Credentials
with open("creds-dev.json.nogit") as fh:
    creds = simplejson.loads(fh.read())

In [4]:
# TEST Credentials
# with open("creds-test.json.nogit") as fh:
#     creds = simplejson.loads(fh.read())

In [5]:
# PROD Credentials
# with open("creds-prod.json.nogit") as fh:
#     creds = simplejson.loads(fh.read())

This will create the connection to the database and prep the jupyter magic for SQL

In [6]:
connect_to_db = 'postgresql://' + \
                creds['username'] + ":" + creds['password'] +'@' + \
                creds['hostname'] + ':' + creds['port_num'] + '/' + creds['db_name'];
%sql $connect_to_db

'Connected: postgres@namex'

Simplest query to run to ensure our libraries are loaded and our DB connection is working

In [7]:
%%sql 
select now() AT TIME ZONE 'PST' as current_date

 * postgresql://postgres:***@localhost:54323/namex
1 rows affected.


current_date
2019-11-14 13:50:21.611074


Six month totals before running time.

In [8]:
%%sql stat_six_month_completed  <<
SELECT r.user_id     
     , (select username from users u where u.id=r.user_id) AS EXAMINER
     , count(r.*) FILTER (WHERE r.state_cd = 'APPROVED')  AS APPROVED
     , count(r.*) FILTER (WHERE r.state_cd = 'REJECTED')  AS REJECTED
     , count(r.*) FILTER (WHERE r.state_cd = 'CONDITIONAL')  AS CONDITIONAL     
     , count(r.*) FILTER (WHERE r.priority_cd = 'Y')  AS PRIORITIES
     , count(r.*) + count(r.*) FILTER (WHERE r.priority_cd = 'Y')   AS total      
FROM requests r
where r.user_id != 1
AND date(r.last_update AT TIME ZONE 'PST') > current_date - interval '6 months' 
and r.state_cd in ('APPROVED','REJECTED','CONDITIONAL')
group by r.user_id
order by r.user_id

 * postgresql://postgres:***@localhost:54323/namex
6 rows affected.
Returning data to local variable stat_six_month_completed


In [9]:
edt = stat_six_month_completed.DataFrame()
edt['examiner'] = edt['examiner'].str.replace('idir/','')

In [10]:
edt['approved_%'] = ((edt.approved + edt.conditional) / edt.total * 100).round(1)
edt['rejected_%'] = (edt.rejected / edt.total * 100).round(1)

with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(HTML(edt.to_html()))
    print('grand total', edt['total'].sum())

Unnamed: 0,user_id,examiner,approved,rejected,conditional,priorities,total,approved_%,rejected_%
0,24,katie-ex,1,0,0,0,1,100.0,0.0
1,29,names-examiner,1,0,0,1,2,50.0,0.0
2,30,wamoar,2,0,0,2,4,50.0,0.0
3,67,schen,2,0,0,1,3,66.7,0.0
4,75,ltrent,12,12,14,27,65,40.0,18.5
5,80,github/scottrumsby,69,21,12,12,114,71.1,18.4


grand total 189


Save to CSV

In [11]:
filename = 'six_month_totals_before_' + datetime.strftime(datetime.now(), '%Y-%m-%d') +'.csv'
edt.to_csv(filename, sep=',', encoding='utf-8', index=False)