# Mortality Analysis
Analyze data from the National Notifiable Diseases Surveillance System on [deaths by pneumonia and influenza](https://www.healthdata.gov/dataset/nndss-table-ii-invasive-pneumococcal-diseases-all-ages) in 122 cities.

In [55]:
import numpy as np
from scipy import stats
import cntk as C

In [56]:
dataPath = "source.csv"
data = np.genfromtxt(dataPath, delimiter = ",", dtype = str)

In [57]:
deaths = data[1:, 6]
print(deaths)

non_blanks = []
for i in range(len(deaths)):
    if deaths[i] != "":
        non_blanks.append(i)
deaths = deaths[non_blanks]

['11' '11' '5' ..., '2' '12' '11']


In [58]:
years = stats.zscore( (data[1:, 0])[non_blanks].astype(np.float32))
print(years)
print(years[15000:15005])

[-1.68896079 -1.68896079 -1.68896079 ...,  1.73466921  1.73466921
  1.73466921]
[-0.73795241 -0.73795241 -0.73795241 -0.73795241 -0.73795241]


In [59]:
winter = np.asarray( [1.0 if (week < 7 or 47 < week) else 0.0 for week in data[1:, 1].astype(int)], dtype = np.float32 )
print(winter[:54])
winter = winter[non_blanks]

[ 1.  1.  1.  1.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  1.]


In [64]:
def shuffle_arrays(*arrays):
    rand_state = np.random.get_state()
    for array in arrays:
        np.random.set_state(rand_state)
        np.random.shuffle(array)

shuffle_arrays(years, winter, deaths)

  deaths  winter     years
0     11     1.0 -1.688961
1     11     1.0 -1.688961
2      5     1.0 -1.688961
3     12     1.0 -1.688961
4      5     1.0 -1.688961
