In [1]:
import pandas as pd
import numpy as np
import os
from hmmlearn import hmm
from matplotlib import pyplot as plt
from sklearn.utils import check_random_state
import pickle
import simplejson

In [2]:
import git
import sys
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir

In [3]:
def loadHMM(name):
    with open(name, "rb") as file: x = pickle.load(file)
    return x

In [4]:
def loadHMMlist(colname, minclust, maxclust):
    HMMlist = [0]*(maxclust - minclust + 1)
    n = 0
    labels = [0]*(maxclust - minclust + 1)
    for i in range(minclust, maxclust + 1):
        name = str(colname) + '_' + str(i) + '.0.pkl'
        HMMlist[n] = loadHMM(str(name))
        labels[n] = i
        n += 1
    return HMMlist, labels

In [5]:
def makeSample(HMM, length, initState = -1, smalltol = 2, largetol = 0.05, numSamples = 1000):
    Vals = [0.0] * length
    States = [0.0] * length
    
    if initState == -1:
        for i in range(numSamples):
            Sample = HMM.sample(n_samples=length)
            Vals = np.add(Vals, [a[0] for a in Sample[0]])
            States += Sample[1]
        return Vals/numSamples, States/numSamples
    else:
        for i in range(numSamples):
            print(i)
            Flag = True
            while Flag:
                Sample = HMM.sample(n_samples=length)
                if initState > 50:
                    if np.abs(Sample[0][0] - initState)/(initState + 1) <= largetol:
                        Flag = False
                else:
                    if Sample[0][0] > 0.5:
                        print('5')
                    if Sample[0][0] > 0.1:
                        print('1')
                    if Sample[0][0] > 1.0:
                        print('pop')
                    if np.abs(Sample[0][0] - initState) <= smalltol:
                        Flag = False
            Vals = np.add(Vals, [a[0] for a in Sample[0]])
            States += Sample[1]
        return Vals/numSamples, States/numSamples         
    

In [6]:
def makeHMMUnSupData(Input, colname='Deaths', fipsname='FIPS'):
    #Takes input dataframe, and gives out HMM format of data, a list of lists 
    #of the colname value, each list in the set represents one fips code.
    Output = []
    for fips in Input[fipsname].unique():
        temp = list(Input[Input[fipsname] == fips][colname])
        Output.append(temp)
    return Output

In [7]:
def makeFipsPrediction(HMM, Data, fipscode, length=14, n_iters=10):
    #Takes in an HMM, a dataset (either JHU, NYT_F, or NYT_W) and a fips code,
    #Gives the HMM state predictions and emission predictions
    #Does this predictions n_iters times, and reports the average states/emissions
    X = makeHMMUnSupData(Data[Data['FIPS']==fipscode])[0]
    states = HMM.predict(np.array(X).reshape(-1,1))
    transmat_cdf = np.cumsum(HMM.transmat_, axis=1)
    Emissions = [0.0] * length
    States = [0.0] * length
    
    for i in range(n_iters):
        for j in range(length):
            random_state = check_random_state(HMM.random_state)
            if j == 0:
                next_state = (transmat_cdf[states[-1]] > random_state.rand()).argmax()
            else:
                next_state = (transmat_cdf[next_state] > random_state.rand()).argmax()
            
            next_obs = HMM._generate_sample_from_state(next_state, random_state)
            
            Emissions[j] += next_obs[0]/n_iters
            States[j] += next_state/n_iters
            
    return States, Emissions       

In [8]:
def makeHMMListPrediction(HMMList, Data, colname, DTW, length=14, n_iters=10):
    HMMs = HMMList[0]
    labels = HMMList[1]
    PredictionFrame = DTW[~DTW[colname].isna()][['FIPS']]
    
    for i in range(length):
        PredictionFrame[str(1 + i)] = 0
    n = 0
    
    for i in labels:
        codes = DTW[DTW[colname] == i]['FIPS'].unique().tolist()
        HMM = HMMs[n]
        for code in codes:
            Prediction = makeFipsPrediction(HMM, Data, code, length, n_iters)[1]
            for j in range(length):
                PredictionFrame.loc[PredictionFrame['FIPS'] == code, str(j + 1)] = Prediction[j]
        n += 1
    return PredictionFrame
            
            

In [9]:
#Dataframes of deaths
NYT_F = pd.read_csv(f"{homedir}/models/HMM_Work/NYT_daily_Filled.csv", index_col=0)
NYT_W = pd.read_csv(f"{homedir}/models/HMM_Work/NYT_daily_Warp.csv", index_col=0)
NYT_F = NYT_F.rename(columns={'fips':'FIPS','deaths':'Deaths'})
NYT_W = NYT_W.rename(columns={'fips':'FIPS','deaths':'Deaths'})
JHU = pd.read_csv(f"{homedir}/models/HMM_Work/JHU_daily.csv", index_col=0)
#list of lists of deaths data
with open('NYT_daily_Warp_Death.txt') as f:
    NYT_daily_Warp_Death = simplejson.load(f)
with open('NYT_daily_Death_Filled.txt') as g:
    NYT_daily_Death_Filled = simplejson.load(g)
with open('JHU_daily_death.txt') as h:
    JHU_daily_death = simplejson.load(h)
#DTW Based Clusters
DTW_Clusters = pd.read_csv(f"{homedir}/models/HMM_Work/DTW_Clustering.csv", index_col=0)

In [10]:
JHU_Z_T_HMMs = loadHMMlist('JHU_Z_T', 1, 2)
JHU_Z_L_HMMs = loadHMMlist('JHU_Z_L', 1, 6)
JHU_N_T_HMMs = loadHMMlist('JHU_N_T', 0, 2)
JHU_N_L_HMMs = loadHMMlist('JHU_N_L', 0, 6)


In [11]:
JHU_Z_T_Pred = makeHMMListPrediction(JHU_Z_T_HMMs, JHU, 'JHU_Z_T', DTW_Clusters, length=14, n_iters=10)
JHU_Z_L_Pred = makeHMMListPrediction(JHU_Z_L_HMMs, JHU, 'JHU_Z_L', DTW_Clusters, length=14, n_iters=10)
JHU_N_T_Pred = makeHMMListPrediction(JHU_N_T_HMMs, JHU, 'JHU_N_T', DTW_Clusters, length=14, n_iters=10)
JHU_N_L_Pred = makeHMMListPrediction(JHU_N_L_HMMs, JHU, 'JHU_N_L', DTW_Clusters, length=14, n_iters=10)

In [12]:
NYT_F_Z_T_HMMs = loadHMMlist('NYT_F_Z_T', 1, 2)
NYT_F_Z_L_HMMs = loadHMMlist('NYT_F_Z_L', 1, 5)
NYT_F_N_T_HMMs = loadHMMlist('NYT_F_N_T', 0, 2)
NYT_F_N_L_HMMs = loadHMMlist('NYT_F_N_L', 0, 5)
NYT_F_N_L_L_HMMs = loadHMMlist('NYT_F_N_L_L', 0, 9)

In [13]:
NYT_F_Z_T_Pred = makeHMMListPrediction(NYT_F_Z_T_HMMs, NYT_F, 'NYT_F_Z_T', DTW_Clusters, length=14, n_iters=10)
NYT_F_Z_L_Pred = makeHMMListPrediction(NYT_F_Z_L_HMMs, NYT_F, 'NYT_F_Z_L', DTW_Clusters, length=14, n_iters=10)
NYT_F_N_T_Pred = makeHMMListPrediction(NYT_F_N_T_HMMs, NYT_F, 'NYT_F_N_T', DTW_Clusters, length=14, n_iters=10)
NYT_F_N_L_Pred = makeHMMListPrediction(NYT_F_N_L_HMMs, NYT_F, 'NYT_F_N_L', DTW_Clusters, length=14, n_iters=10)
NYT_F_N_L_L_Pred = makeHMMListPrediction(NYT_F_N_L_L_HMMs, NYT_F, 'NYT_F_N_L_L', DTW_Clusters, length=14, n_iters=10)

In [14]:
NYT_W_Z_T_HMMs = loadHMMlist('NYT_W_Z_T', 1, 2)
NYT_W_Z_L_HMMs = loadHMMlist('NYT_W_Z_L', 1, 7)
NYT_W_N_T_HMMs = loadHMMlist('NYT_W_N_T', 0, 2)
NYT_W_N_L_HMMs = loadHMMlist('NYT_W_N_L', 0, 7)


In [15]:
NYT_W_Z_T_Pred = makeHMMListPrediction(NYT_W_Z_T_HMMs, NYT_W, 'NYT_W_Z_T', DTW_Clusters, length=14, n_iters=10)
NYT_W_Z_L_Pred = makeHMMListPrediction(NYT_W_Z_L_HMMs, NYT_W, 'NYT_W_Z_L', DTW_Clusters, length=14, n_iters=10)
NYT_W_N_T_Pred = makeHMMListPrediction(NYT_W_N_T_HMMs, NYT_W, 'NYT_W_N_T', DTW_Clusters, length=14, n_iters=10)
NYT_W_N_L_Pred = makeHMMListPrediction(NYT_W_N_L_HMMs, NYT_W, 'NYT_W_N_L', DTW_Clusters, length=14, n_iters=10)


In [17]:
NYT_W_Z_T_Pred[NYT_W_Z_T_Pred['FIPS'] == 36061]

Unnamed: 0,FIPS,1,2,3,4,5,6,7,8,9,10,11,12,13,14
1717,36061.0,60.357464,47.047603,36.964599,49.382815,33.915387,45.954971,34.829428,31.63624,33.863018,48.878625,22.972845,22.124733,14.602803,27.91129
