
<img src="http://www.nserc-crsng.gc.ca/_gui/wmms.gif" alt="Canada logo" align="right">

<br>

<img src="http://www.triumf.ca/sites/default/files/styles/gallery_large/public/images/nserc_crsng.gif?itok=H7AhTN_F" alt="NSERC logo" align="right" width = 90>



# Exploring NSERC Awards Data


Canada's [Open Government Portal](http://open.canada.ca/en) includes [NSERC Awards Data](http://open.canada.ca/data/en/dataset/c1b0f627-8c29-427c-ab73-33968ad9176e) from 1995 through 2016.

The awards data (in .csv format) were copied to an [Amazon Web Services S3 bucket](http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html). <br>

This open Jupyter notebook shows all of the "orphaned" researchers who are funded by the 1508 Mathematics and Statistics committee in 2016 but are not part of the mathematics department.

> **Acknowledgement:** I thank [Ian Allison](https://github.com/ianabc) and [James Colliander](http://colliand.com) of the [Pacific Institute for the Mathematical Sciences](http://www.pims.math.ca/) for building the [JupyterHub service](http://syzygy.ca) and for help with this notebook. -- I. Heisz

In [48]:
import numpy as np
import pandas as pd
import sys

df = pd.DataFrame()

startYear = 2014
endYear   = 2018  # The last year is not included, so 2017 means we include the 2016 collection but not 2017.

## some columns dropped here for convenience
for year in range(startYear, endYear):
#    file = 'https://s3.ca-central-1.amazonaws.com/open-data-ro/NSERC/NSERC_GRT_FYR' + str(year) + '_AWARD.csv.gz'
# https://pims-open-data.s3-us-west-2.amazonaws.com/NSERC_GRT_FYR2008_AWARD.csv
    file = 'https://pims-open-data.s3-us-west-2.amazonaws.com/NSERC_GRT_FYR' + str(year) + '_AWARD.csv'
    df = df.append(pd.read_csv(file, 
                               usecols = [1, 2, 3, 4, 5, 7, 9, 11, 13, 17, 28], 
                               encoding='latin-1'
                              )
                  )  
    print(year)
 

 




2014
2015
2016
2017


In [49]:
df

Unnamed: 0,AwardAmount,CommitteeCode,CountryEN,Department-Département,FiscalYear-Exercice financier,Institution-Établissement,Name-Nom,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
0,20000,187,CANADA,Biology,2014,Nipissing University,"(Mehes)Smith, Melanie",44,,Postdoctoral Fellowships,Ontario,Molecular biology
1,22000,1510,CANADA,Electrical & Computer Engineering,2014,University of Waterloo,"Aagaard, Mark",33,,Discovery Grants Program - Individual,Ontario,Computer hardware
2,32000,1510,CANADA,Electrical and Computer Engineering,2014,University of British Columbia,"Aamodt, Tor",2,,Discovery Grants Program - Individual,British Columbia,Computer architecture and design
3,22000,1510,CANADA,Electrical & Computer Engineering,2014,University of Toronto,"Aarabi, Parham",31,,Discovery Grants Program - Individual,Ontario,Digital signal processing
4,4500,996,CANADA,Chemistry,2014,The King's University College,"Aarbo, Alyxandra",3333,,University Undergraduate Student Research Awards,Alberta,Physical chemistry
...,...,...,...,...,...,...,...,...,...,...,...,...
25546,122000,96,CANADA,Renewable Resources,2017,University of Alberta,"Zwiazek, Janusz",9,Collaborative Research and Development Grants,,Alberta,Plant and treebiology
25547,100000,96,CANADA,Renewable Resources,2017,University of Alberta,"Zwiazek, Janusz",9,Collaborative Research and Development Grants,,Alberta,Plant and treebiology
25548,103600,96,CANADA,Renewable Resources,2017,University of Alberta,"Zwiazek, Janusz",9,Collaborative Research and Development Grants,,Alberta,Not available
25549,4500,996,CANADA,Botany,2017,University of British Columbia,"Zwimpfer, Charlotte",2,University Undergraduate Student Research Awards,,British Columbia,Evolution and ecology


In [50]:
df.columns

Index(['AwardAmount', 'CommitteeCode', 'CountryEN', 'Department-Département',
       'FiscalYear-Exercice financier', 'Institution-Établissement',
       'Name-Nom', 'OrganizationID', 'ProgramNameEN', 'ProgramNaneEN',
       'ProvinceEN', 'ResearchSubjectEN'],
      dtype='object')

In [39]:
## Rename columns for better readability.
df.columns = ['Name', 'Department', 'OrganizationID',
       'Institution', 'ProvinceEN', 'CountryEN',
       'FiscalYear', 'AwardAmount', 'ProgramNaneEN',
       'Committee', 'ResearchSubjectEN']

In [51]:
df.columns = ['AwardAmount', 'Committee', 'CountryEN', 'Department',
       'FiscalYear', 'Institution',
       'Name', 'OrganizationID', 'ProgramNameEN', 'ProgramNaneEN',
       'ProvinceEN', 'ResearchSubjectEN']

In [52]:
selectedData = df
selectedData = selectedData.loc[(selectedData['Committee'] == 1508)]

In [53]:
# remove people in the math department
subject = 'Math'
selectedData = selectedData[selectedData['Department'].str.contains(subject)==False]

# remove people in the statistics department
subject = 'Stat'
selectedData = selectedData[selectedData['Department'].str.contains(subject)==False]

In [54]:
selectedData = selectedData.drop('Committee', axis=1)
selectedData

Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
80,28000,CANADA,"Epidemiology, Biostatistics and Occupational H...",2014,McGill University,"Abrahamowicz, Michal",61,,Discovery Grants Program - Individual,Québec,Statistics and probability
289,28000,CANADA,No Department/Division,2014,McMaster University,"Alama, Stanley",27,,Discovery Grants Program - Individual,Ontario,Asymptotics and applied classical analysis
1922,14000,CANADA,Ottawa Institute of Systems Biology; Departmen...,2014,University of Ottawa,"Bickel, David",28,,Discovery Grants Program - Individual,Ontario,Statistical theory
2194,11000,CANADA,Faculty of Science,2014,University of Ontario Institute of Technology,"Bohun, Christopher",19222,,Discovery Grants Program - Individual,Ontario,Mathematical modelling
2552,23000,CANADA,mathematics department,2014,University of Waterloo,"Boyle, Phelim",33,,Discovery Grants Program - Individual,Ontario,Applied probability
...,...,...,...,...,...,...,...,...,...,...,...
24509,18000,CANADA,"Pharmacy, Leslie Dan Faculty of - Pharmacy, Le...",2017,University of Waterloo,"Wong, WilliamWaiLun",33,Discovery Grants Program - Individual,,Ontario,Applied statistics
24539,14000,CANADA,"Research, Office of - Research, Office of",2017,University of Western Ontario,"Woolford, Douglas",36,Discovery Grants Program - Individual,,Ontario,Applied statistics
24938,18000,CANADA,"Medicine, Faculty of - Medicine, Faculty of",2017,Memorial University of Newfoundland,"Yi, Yanqing",89,Discovery Grants Program - Individual,,Newfoundland and Labrador,Applied statistics
25416,11000,CANADA,"Science and Engineering , Faculty of",2017,York University,"Zhu, Hongmei",38,Discovery Grants Program - Individual,,Ontario,Algorithms


## Institution Orphans

In [55]:
pims_sites = ['SFU', 'UA', 'UBC', 'UC', 'UL', 'UM', 'UR', 'US', 'UV']

In [56]:
org_id = {'SFU': 5,
          'UA': 9,
          'UBC': 2,
          'UC': 11,
          'UL': 12,
          'UM': 19,
          'UR': 17,
          'US': 16,
          'UV': 7}

In [57]:
## Localize to single institution (e.g. UA)
df = selectedData.loc[(selectedData['OrganizationID'] == org_id['UC'])]
df.drop(['ProvinceEN', 'CountryEN', 'OrganizationID', 'Institution'], axis = 1)

Unnamed: 0,AwardAmount,Department,FiscalYear,Name,ProgramNameEN,ProgramNaneEN,ResearchSubjectEN
5129,25000,"Veterinary Medicine, Faculty of - Veterinary M...",2015,"Deardon, Rob",,Discovery Grants Program - Individual,Biostatistics
12942,13000,Oncology - Oncology,2015,"Li, Haocheng",,Discovery Grants Program - Individual,Nonparametric inference
5205,25000,"Veterinary Medicine, Faculty of - Veterinary M...",2016,"Deardon, Rob",,Discovery Grants Program - Individual,Biostatistics
13184,13000,Oncology - Oncology,2016,"Li, Haocheng",,Discovery Grants Program - Individual,Nonparametric inference
5369,25000,"Veterinary Medicine, Faculty of - Veterinary M...",2017,"Deardon, Rob",Discovery Grants Program - Individual,,Applied statistics
13568,13000,Oncology - Oncology,2017,"Li, Haocheng",Discovery Grants Program - Individual,,Biostatistics
19995,14000,Community Health Sciences - Community Health S...,2017,"Sajobi, Tolulope",Discovery Grants Program - Individual,,Applied statistics


In [58]:
## List the records of orphans for each PIMS site
for inst in pims_sites:
    orph_inst = selectedData.loc[(selectedData['OrganizationID'] == org_id[inst])]
    orph_inst.drop(['ProvinceEN', 'CountryEN', 'OrganizationID', 'Institution'], axis = 1)
    display(orph_inst)

Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
7571,14000,CANADA,Economics,2014,Simon Fraser University,"Gencay, Ramazan",5,,Discovery Grants Program - Individual,British Columbia,Nonparametric inference
7614,14000,CANADA,Economics,2015,Simon Fraser University,"Gencay, Ramazan",5,,Discovery Grants Program - Individual,British Columbia,Nonparametric inference
14531,17000,CANADA,"Health Sciences, Faculty of - Health Sciences,...",2015,Simon Fraser University,"McCandless, Lawrence",5,,Discovery Grants Program - Individual,British Columbia,Biostatistics
7778,14000,CANADA,Economics,2016,Simon Fraser University,"Gencay, Ramazan",5,,Discovery Grants Program - Individual,British Columbia,Nonparametric inference
14786,17000,CANADA,"Health Sciences, Faculty of - Health Sciences,...",2016,Simon Fraser University,"McCandless, Lawrence",5,,Discovery Grants Program - Individual,British Columbia,Biostatistics
8115,14000,CANADA,Economics,2017,Simon Fraser University,"Gencay, Ramazan",5,Discovery Grants Program - Individual,,British Columbia,Nonparametric inference
15198,17000,CANADA,"Health Sciences, Faculty of - Health Sciences,...",2017,Simon Fraser University,"McCandless, Lawrence",5,Discovery Grants Program - Individual,,British Columbia,Biostatistics


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
9248,12000,CANADA,Dentistry,2014,University of Alberta,"Heo, Giseon",9,,Discovery Grants Program - Individual,Alberta,Applied statistics
18587,12000,CANADA,Pediatrics,2014,University of Alberta,"Rosychuk, Rhonda",9,,Discovery Grants Program - Individual,Alberta,Statistics and probability
18875,15000,CANADA,Campus Saint-Jean,2014,University of Alberta,"Safouhi, Hassan",9,,Discovery Grants Program - Individual,Alberta,Numerical analysis
1611,11000,CANADA,Biomedical Engineering - Biomedical Engineering,2015,University of Alberta,"Belhamadia, Youssef",9,,Discovery Grants Program - Individual,Alberta,Mathematical biology andphysiology
9317,12000,CANADA,Dentistry,2015,University of Alberta,"Heo, Giseon",9,,Discovery Grants Program - Individual,Alberta,Applied statistics
18781,12000,CANADA,Pediatrics,2015,University of Alberta,"Rosychuk, Rhonda",9,,Discovery Grants Program - Individual,Alberta,Statistics and probability
19053,15000,CANADA,Campus Saint-Jean,2015,University of Alberta,"Safouhi, Hassan",9,,Discovery Grants Program - Individual,Alberta,Numerical analysis
9507,18000,CANADA,Dentistry - Dentistry,2016,University of Alberta,"Heo, Giseon",9,,Discovery Grants Program - Individual,Alberta,Applied statistics
19226,33000,CANADA,Pediatrics - Pediatrics,2016,University of Alberta,"Rosychuk, Rhonda",9,,Discovery Grants Program - Individual,Alberta,Statistical theory
9811,18000,CANADA,Dentistry - Dentistry,2017,University of Alberta,"Heo, Giseon",9,Discovery Grants Program - Individual,,Alberta,Applied statistics


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
3703,38000,CANADA,statistics,2014,University of British Columbia,"Chen, Jiahua",2,,Discovery Grants Program - Individual,British Columbia,Nonparametric inference
10737,11000,CANADA,No Department/Division,2014,University of British Columbia,"Kasahara, Hiroyuki",2,,Discovery Grants Program - Individual,British Columbia,Statistical theory
13605,15000,CANADA,"Population and Public Health, School of",2014,University of British Columbia,"MacNab, Ying",2,,Discovery Grants Program - Individual,British Columbia,Biostatistics
18491,21000,CANADA,"Science, Faculty of",2014,University of British Columbia,"Rolfsen, Dale",2,,Discovery Grants Program - Individual,British Columbia,Algebra
6862,14000,CANADA,Sauder School of Business - Sauder School of B...,2015,University of British Columbia,"Fisher, Adlai",2,,Discovery Grants Program - Individual,British Columbia,Statistics and probability
10838,11000,CANADA,Economics,2015,University of British Columbia,"Kasahara, Hiroyuki",2,,Discovery Grants Program - Individual,British Columbia,Statistical theory
13361,20000,CANADA,Okanagan - Irving K Barber School of Arts and ...,2015,University of British Columbia,"Loeppky, Jason",2,,Discovery Grants Program - Individual,British Columbia,Applied statistics
13800,15000,CANADA,"Population and Public Health, School of",2015,University of British Columbia,"MacNab, Ying",2,,Discovery Grants Program - Individual,British Columbia,Biostatistics
18679,21000,CANADA,"Science, Faculty of",2015,University of British Columbia,"Rolfsen, Dale",2,,Discovery Grants Program - Individual,British Columbia,Algebra
19322,14000,CANADA,Psychology - Psychology,2015,University of British Columbia,"Savalei, Victoria",2,,Discovery Grants Program - Individual,British Columbia,Applied statistics


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
5129,25000,CANADA,"Veterinary Medicine, Faculty of - Veterinary M...",2015,University of Calgary,"Deardon, Rob",11,,Discovery Grants Program - Individual,Alberta,Biostatistics
12942,13000,CANADA,Oncology - Oncology,2015,University of Calgary,"Li, Haocheng",11,,Discovery Grants Program - Individual,Alberta,Nonparametric inference
5205,25000,CANADA,"Veterinary Medicine, Faculty of - Veterinary M...",2016,University of Calgary,"Deardon, Rob",11,,Discovery Grants Program - Individual,Alberta,Biostatistics
13184,13000,CANADA,Oncology - Oncology,2016,University of Calgary,"Li, Haocheng",11,,Discovery Grants Program - Individual,Alberta,Nonparametric inference
5369,25000,CANADA,"Veterinary Medicine, Faculty of - Veterinary M...",2017,University of Calgary,"Deardon, Rob",11,Discovery Grants Program - Individual,,Alberta,Applied statistics
13568,13000,CANADA,Oncology - Oncology,2017,University of Calgary,"Li, Haocheng",11,Discovery Grants Program - Individual,,Alberta,Biostatistics
19995,14000,CANADA,Community Health Sciences - Community Health S...,2017,University of Calgary,"Sajobi, Tolulope",11,Discovery Grants Program - Individual,,Alberta,Applied statistics


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
7098,17000,CANADA,Agribusiness & Agricultural Economics,2014,University of Manitoba,"Frank, Julieta",19,,Discovery Grants Program - Individual,Manitoba,Time series analysis
17276,12000,CANADA,Computer Science,2014,University of Manitoba,"Pizzi, Nicolino",19,,Discovery Grants Program - Individual,Manitoba,Mathematical modelling
21541,17000,CANADA,Community Health Sciences,2014,University of Manitoba,"Torabi, Mahmoud",19,,Discovery Grants Program - Individual,Manitoba,Survey methodology
17457,12000,CANADA,Computer Science,2015,University of Manitoba,"Pizzi, Nicolino",19,,Discovery Grants Program - Individual,Manitoba,Mathematical modelling
21769,17000,CANADA,Community Health Sciences,2015,University of Manitoba,"Torabi, Mahmoud",19,,Discovery Grants Program - Individual,Manitoba,Survey methodology
24366,13000,CANADA,Warren Centre for Actuarial Studies and Resear...,2015,University of Manitoba,"Zhou, Rui",19,,Discovery Grants Program - Individual,Manitoba,Applied statistics
7231,17000,CANADA,Agribusiness & Agricultural Economics,2016,University of Manitoba,"Frank, Julieta",19,,Discovery Grants Program - Individual,Manitoba,Time series analysis
9086,16700,CANADA,Warren Centre for Actuarial Studies and Research,2016,University of Manitoba,"Hao, Xuemiao",19,,Discovery Grants Program - Individual,Manitoba,Applied probability
22287,27000,CANADA,Community Health Sciences - Community Health S...,2016,University of Manitoba,"Torabi, Mahmoud",19,,Discovery Grants Program - Individual,Manitoba,Applied statistics
24975,13000,CANADA,Warren Centre for Actuarial Studies and Resear...,2016,University of Manitoba,"Zhou, Rui",19,,Discovery Grants Program - Individual,Manitoba,Applied statistics


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN
6712,15000,CANADA,"Public Health, School of",2014,University of Saskatchewan,"Feng, CindyXin",16,,Discovery Grants Program - Individual,Saskatchewan,Statistics and probability
6704,15000,CANADA,"Public Health, School of",2015,University of Saskatchewan,"Feng, CindyXin",16,,Discovery Grants Program - Individual,Saskatchewan,Statistics and probability
6810,15000,CANADA,"Public Health, School of",2016,University of Saskatchewan,"Feng, CindyXin",16,,Discovery Grants Program - Individual,Saskatchewan,Statistics and probability
7125,15000,CANADA,"Public Health, School of",2017,University of Saskatchewan,"Feng, CindyXin",16,Discovery Grants Program - Individual,,Saskatchewan,Statistics and probability


Unnamed: 0,AwardAmount,CountryEN,Department,FiscalYear,Institution,Name,OrganizationID,ProgramNameEN,ProgramNaneEN,ProvinceEN,ResearchSubjectEN


In [59]:
## List the names of orphans for each PIMS site
for inst in pims_sites:
    orph_inst = selectedData.loc[(selectedData['OrganizationID'] == org_id[inst])]
    orph_inst.drop(['ProvinceEN', 'CountryEN', 'OrganizationID', 'Institution'], axis = 1)
    print(inst,orph_inst['Name'].unique())

SFU ['Gencay, Ramazan' 'McCandless, Lawrence']
UA ['Heo, Giseon' 'Rosychuk, Rhonda' 'Safouhi, Hassan' 'Belhamadia, Youssef']
UBC ['Chen, Jiahua' 'Kasahara, Hiroyuki' 'MacNab, Ying' 'Rolfsen, Dale'
 'Fisher, Adlai' 'Loeppky, Jason' 'Savalei, Victoria' 'Shizgal, Bernard']
UC ['Deardon, Rob' 'Li, Haocheng' 'Sajobi, Tolulope']
UL []
UM ['Frank, Julieta' 'Pizzi, Nicolino' 'Torabi, Mahmoud' 'Zhou, Rui'
 'Hao, Xuemiao' 'Shamseddine, Khodr']
UR []
US ['Feng, CindyXin']
UV []
