In [36]:
import glob
import os
import pandas as pd

In [37]:
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import shapely.geometry as sgeom
import matplotlib.colors as mcolors
import matplotlib.cm as mcm
import matplotlib.ticker as mticker

import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader

In [38]:
dfs = []
for fn in glob.glob('NEH_CSV/*.csv'):
    dfs.append(pd.read_csv(fn))

# Concatenate all data into one DataFrame
big_frame = pd.concat(dfs, ignore_index=True)

In [39]:
import numpy as np
np.log10(740060692)

8.86926733746051

In [42]:
big_frame.keys()

Index(['AppNumber', 'ApplicantType', 'ApprovedMatching', 'ApprovedOutright',
       'AwardMatching', 'AwardOutright', 'BeginGrant', 'CouncilDate',
       'Discipline', 'DisciplineCount', 'Division', 'EndGrant', 'InstCity',
       'InstCountry', 'InstPostalCode', 'InstState', 'Institution',
       'OrganizationType', 'OriginalAmount', 'Participant', 'ParticipantCount',
       'PrimaryDiscipline', 'Program', 'ProjectDesc', 'ProjectTitle',
       'Supplement', 'SupplementAmount', 'SupplementCount', 'ToSupport',
       'Unnamed: 0', 'YearAwarded'],
      dtype='object')

In [61]:
%matplotlib inline
big_frame[['Institution', 'ProjectTitle', 
           'YearAwarded','Program','AwardOutright','AwardMatching']].sort(columns=['AwardOutright'])


  app.launch_new_instance()


Unnamed: 0,Institution,ProjectTitle,YearAwarded,Program,AwardOutright,AwardMatching
28244,Princeton University Press,Purchase of Penta Tele/Media Interface for Tra...,1984,Scholarly Publications,-1041.77,11748.00
27061,University of Virginia,The Papers of George Washington,1981,Editions,-69.92,135000.00
27851,Unaffiliated Independent Scholar,Ancient Maya Writing,1984,Basic Research,-40.03,16070.00
31649,Pennsylvania State University - Brandywine,"New Lives, New Worlds: Utopian Novels by Women...",1988,"Travel to Collections, 11/85 - 2/95",0.00,0.00
15179,Massachusetts Institute of Technology,"Faith, Science, and the Future",1979,Special Projects,0.00,75000.00
40014,Harvard Semitic Museum,Publication of the Wadi Daliyeh Seal Impressions,1991,"Travel to Collections, 11/85 - 2/95",0.00,0.00
49095,Northwestern University,"An Excavation in the Theater of Pompey, Rome",2002,Collaborative Research,0.00,25000.00
49102,Alabama State University,Center for the Study of Civil Rights and Afric...,2000,Challenge Grants,0.00,500000.00
49103,American Antiquarian Society,Building Endowment for Acquisitions.,2000,Challenge Grants,0.00,450000.00
49104,American Council of Learned Societies,Endowment Campaign for ACLS Fellowship Program.,2000,Challenge Grants,0.00,500000.00


In [None]:
%matplotlib inline

def plot_percentange(yr_state, yr):
    fig = plt.figure()

    ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())
    ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())
    shapename = 'admin_1_states_provinces_lakes_shp'
    states_shp = shpreader.natural_earth(resolution='110m', 
                                         category='cultural', name=shapename)
    ax.background_patch.set_visible(False)
    ax.outline_patch.set_visible(False)
    states = shpreader.Reader(states_shp)

    colors = []
    patches = []


    #bounds = [1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]
    cmap = mcm.viridis_r
    #norm = mcolors.LogNorm(vmin=bounds[0], vmax=bounds[-2])
    norm = mcolors.Normalize(vmin=0, vmax=20)
    for val,  state, geo in zip(yr_state, states.records(), states.geometries()):
        sn = state.attributes['postal']
        color = cmap(norm(((val*1.0)/yr_state.sum())*100))
        colors.append(color) # add colors to colors list
        ax.add_geometries([geo], ccrs.PlateCarree(), facecolor=color, edgecolor='black')

    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm._A = []
    cb = plt.colorbar(sm, ax=ax, fraction=.035, pad=.0, aspect=15, extend='both', anchor=[-5,0])
    #, format=mticker.FormatStrFormatter('%.0f'))
    #cb.set_ticks(bounds)
    cb.ax.tick_params(labelsize=14)
    cb.set_label("% Awarded Outright", fontsize=16)
    ax.set_title("% Funded in the {}s".format(yr), fontsize=18)
    plt.show()
    fig.savefig("NEH_state_{}s".format(yr))

In [None]:
for df, yr in zip(dfs, [1960,1970,1980,1990,2000,2010]):
    yr_state = df.groupby('InstState')['AwardOutright'].sum()
    plot(yr_state, yr)

In [None]:
CUNY = big_frame[big_frame['Institution'].str.contains("CUNY Research Foundation, Graduate School and University Center")]

In [None]:
CUNY.to_csv("gc.csv")

In [None]:
CUNY['AwardOutright'].plot()

In [None]:
CUNY.keys()

In [None]:
fig, ax = plt.subplots()
CUNY.groupby('PrimaryDiscipline')['AwardOutright'].sum().plot("bar", ax=ax)
ax.set_title("CUNY by Primary Discipline")
ax.set_ylabel("Award Outright")

In [None]:
tab = CUNY.groupby(['BeginGrant', 'PrimaryDiscipline'])['AwardOutright'].sum().unstack()

In [None]:
tab.values

In [None]:
fig, ax = plt.subplots(figsize=(20,10))
clean_time = pd.to_datetime(tab.index.values,infer_datetime_format=True).strftime("%Y-%m")
sm = ax.imshow(tab.values.T, cmap=mcm.viridis_r)
ax.set_xticks(range(tab.index.values.shape[0]))
ax.set_xticklabels(clean_time, rotation=90)
ax.set_yticks(range(len(tab.keys())))
ax.set_yticklabels(tab.keys(),rotation=0)
cb = fig.colorbar(sm, ax=ax, fraction=.015)
cb.set_label("Award Outright ($)")

In [None]:
tab.keys()

In [None]:
pd.to_datetime(tab.index.values,infer_datetime_format=True).strftime("%Y/%m/%d")

In [None]:
states = dfs[-2].groupby('InstState').sum()

In [None]:
states[['ApprovedOutright','ApprovedMatching']].ix[['CA','KS']].sum(axis=1)/states[['ApprovedOutright','ApprovedMatching']].sum(axis=1).sum()*100

In [None]:
dfs[-1]

In [None]:
shpreader.natural_earth?

In [None]:
%matplotlib inline

def plot_total(yr_state, yr):
    fig = plt.figure()

    ax = fig.add_axes([0, 0, 1, 1], projection=ccrs.LambertConformal())
    ax.set_extent([-125, -66.5, 20, 50], ccrs.Geodetic())
    shapename = 'admin_1_states_provinces_lakes_shp'
    states_shp = shpreader.natural_earth(resolution='110m', 
                                         category='cultural', name=shapename)
    ax.background_patch.set_visible(False)
    ax.outline_patch.set_visible(False)
    states = shpreader.Reader(states_shp)

    colors = []
    patches = []


    #bounds = [1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9]
    cmap = mcm.viridis_r
    #norm = mcolors.LogNorm(vmin=bounds[0], vmax=bounds[-2])
    norm = mcolors.Normalize(vmin=0, vmax=20)
    for val,  state, geo in zip(yr_state, states.records(), states.geometries()):
        sn = state.attributes['postal']
        color = cmap(norm(((val*1.0)/yr_state.sum())*100))
        colors.append(color) # add colors to colors list
        ax.add_geometries([geo], ccrs.PlateCarree(), facecolor=color, edgecolor='black')

    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm._A = []
    cb = plt.colorbar(sm, ax=ax, fraction=.035, pad=.0, aspect=15, extend='both', anchor=[-5,0])
    #, format=mticker.FormatStrFormatter('%.0f'))
    #cb.set_ticks(bounds)
    cb.ax.tick_params(labelsize=14)
    cb.set_label("% Awarded Outright", fontsize=16)
    ax.set_title("% Funded in the {}s".format(yr), fontsize=18)
    plt.show()
    fig.savefig("NEH_state_{}s".format(yr))