In [2]:
import re
import math
import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly
from plotly.offline import iplot, init_notebook_mode

init_notebook_mode(connected=True)

def plot(figure):
    plotly.offline.iplot(figure)

In [3]:
states = pd.read_csv('state_alpha_to_numeric.csv', sep=',', header=None, names=['alpha','num'])
alphas = list(states['alpha'])
nums = list(states['num'])
statenum = {state:code for (state,code) in zip(alphas,nums)}
for state in statenum.keys():
    statenum[state] = str(statenum[state]).zfill(2)

In [4]:
agCensus = pd.read_csv('2017_cdqt_data.txt', sep='\t', header=0, low_memory=False)
agCensusAnimals = agCensus[
        (agCensus['CENSUS_CHAPTER'] == 2) &
        (agCensus['SECTOR_DESC'] == 'ANIMALS & PRODUCTS') &
        (agCensus['AGG_LEVEL_DESC'] == 'COUNTY') & 
        (agCensus['CENSUS_TABLE'] != 1)
    ]
agCensusAnimals.drop(columns=['CENSUS_CHAPTER', 'CENSUS_TABLE', 'SECTOR_DESC', 'AGG_LEVEL_DESC'], inplace=True)
agCensusAnimals.columns

Index(['CENSUS_ROW', 'CENSUS_COLUMN', 'SHORT_DESC', 'COMMODITY_DESC',
       'STATE_FIPS_CODE', 'STATE_ALPHA', 'STATE_NAME', 'COUNTY_CODE',
       'COUNTY_NAME', 'DOMAINCAT_DESC', 'VALUE'],
      dtype='object')

In [5]:
animalCensus = {
    state : 
        agCensusAnimals[
            (agCensusAnimals['STATE_ALPHA'] == state) &
            agCensusAnimals['COUNTY_NAME'].str.contains('.+', na=False)
        ]
    for state in statenum.keys()}

In [7]:
shortdescs = list(animalCensus['TX']['SHORT_DESC'].unique())
inventory_filter = re.compile('^.* - INVENTORY')
animal_keys = list(filter(inventory_filter.match, shortdescs))
animal_keys

['CATTLE, INCL CALVES - INVENTORY',
 'CATTLE, COWS - INVENTORY',
 'CATTLE, COWS, BEEF - INVENTORY',
 'CATTLE, COWS, MILK - INVENTORY',
 'CATTLE, (EXCL COWS) - INVENTORY',
 'CATTLE, ON FEED - INVENTORY',
 'HOGS - INVENTORY',
 'SHEEP, INCL LAMBS - INVENTORY',
 'GOATS - INVENTORY',
 'GOATS, MILK - INVENTORY',
 'GOATS, ANGORA - INVENTORY',
 'GOATS, MEAT & OTHER - INVENTORY',
 'EQUINE, HORSES & PONIES - INVENTORY',
 'EQUINE, MULES & BURROS & DONKEYS - INVENTORY',
 'CHICKENS, LAYERS - INVENTORY',
 'CHICKENS, PULLETS, REPLACEMENT - INVENTORY',
 'CHICKENS, BROILERS - INVENTORY',
 'TURKEYS - INVENTORY',
 'CHUKARS - INVENTORY',
 'DUCKS - INVENTORY',
 'EMUS - INVENTORY',
 'GEESE - INVENTORY',
 'GUINEAS - INVENTORY',
 'PARTRIDGES, HUNGARIAN - INVENTORY',
 'OSTRICHES - INVENTORY',
 'PEAFOWL, HENS & COCKS - INVENTORY',
 'PHEASANTS - INVENTORY',
 'PIGEONS & SQUAB - INVENTORY',
 'QUAIL - INVENTORY',
 'RHEAS - INVENTORY',
 'CHICKENS, ROOSTERS - INVENTORY',
 'POULTRY, OTHER - INVENTORY',
 'HONEY, BEE CO

In [42]:
animalInventories = {
    state : 
    {key : animalCensus[state][animalCensus[state]['SHORT_DESC'] == key] for key in animal_keys}
    for state in statenum.keys()
}

for state in statenum.keys():
    for key in animal_keys:
        animalInventories[state][key] = animalInventories[state][key][animalInventories[state][key]['DOMAINCAT_DESC'].isnull()]

for state in statenum.keys():
    for key in animal_keys:
        #if animalInventories[state][key].shape[0] > 0:
        animalInventories[state][key] = animalInventories[state][key][~animalInventories[state][key]['VALUE'].str.contains('D')]   
        animalInventories[state][key]['VALUE'] = animalInventories[state][key]['VALUE'].apply(lambda s: int(s.replace(',', '')))
        #animalInventories[state][key] = animalInventories[state][key][animalInventories[state][key]['VALUE'].apply(lambda x: isinstance(x,float))]
        #animalInventories[state][key] = animalInventories[state][key][animalInventories[state][key]['VALUE'].notnull()]                                                                                              

#for key in animal_keys:
#       print(key, animalInventories['TX']['CENSUS_ROW'].unique())
        
#animalInventories['TX']['CATTLE, INCL CALVES - INVENTORY']['CENSUS_ROW'].unique()     

for state in statenum.keys():
    for key in animal_keys:
        animalInventories[state][key].drop(columns=['CENSUS_ROW', 'CENSUS_COLUMN', 'DOMAINCAT_DESC', 'STATE_ALPHA', 'STATE_FIPS_CODE', 'STATE_NAME', 'SHORT_DESC', 'COMMODITY_DESC'], inplace=True)

In [60]:
for state in statenum.keys():
    for key in animal_keys:
        animalInventories[state][key]['COUNTY_CODE'] = animalInventories[state][key]['COUNTY_CODE'].astype(int)
        animalInventories[state][key].set_index('COUNTY_CODE', inplace=True)

In [61]:
animalInventories['NY']['CATTLE, INCL CALVES - INVENTORY']

# Should now try to reindex by county code here.

Unnamed: 0_level_0,COUNTY_NAME,VALUE
COUNTY_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
1,ALBANY,6814
3,ALLEGANY,29319
7,BROOME,12782
9,CATTARAUGUS,36651
11,CAYUGA,94427
13,CHAUTAUQUA,43922
15,CHEMUNG,6384
17,CHENANGO,27474
19,CLINTON,33373
21,COLUMBIA,16374


In [90]:
cow_keys = ['CATTLE, COWS, BEEF - INVENTORY',
                 'CATTLE, COWS, MILK - INVENTORY',
                 'CATTLE, (EXCL COWS) - INVENTORY']

pig_keys = ['HOGS - INVENTORY']

chicken_keys = ['CHICKENS, LAYERS - INVENTORY',
                'CHICKENS, PULLETS, REPLACEMENT - INVENTORY',
                'CHICKENS, BROILERS - INVENTORY',
                'CHICKENS, ROOSTERS - INVENTORY']

turkey_keys =  ['TURKEYS - INVENTORY']

s1 = 'CATTLE, COWS - INVENTORY'
s2 = 'CATTLE, COWS, BEEF - INVENTORY'
s3 = 'CATTLE, COWS, MILK - INVENTORY'
s4 = 'CATTLE, (EXCL COWS) - INVENTORY'
s5 = 'CATTLE, INCL CALVES - INVENTORY'
s6 = 'CATTLE, ON FEED - INVENTORY'

d = animalInventories['NY']

d[s1]['VALUE'] + d[s2]['VALUE'] + d[s3]['VALUE'] + d[s4]['VALUE'] - d[s5]['VALUE']

things = [s1, s2, s3, s4, s5, s6]

ck = chicken_keys

layers = 0
broilers = 0

for c in d[s1].index:
    print(d[s1]['COUNTY_NAME'][c])
    #for s in chicken_keys:
        #if c in d[s].index:
        #    print(d[s]['VALUE'][c])
    if (c in d[ck[0]].index) and (c in d[ck[2]].index):
        layers = layers + d[ck[0]]['VALUE'][c]
        broilers = broilers + d[ck[2]]['VALUE'][c]
        print('LAYERS:', d[ck[0]]['VALUE'][c], 'BROILERS:', d[ck[2]]['VALUE'][c])
        
print('TOTAL LAYERS:', layers, 'TOTAL BROILERS:', broilers)

ALBANY
LAYERS: 3557 BROILERS: 406
ALLEGANY
LAYERS: 18772 BROILERS: 744
BROOME
LAYERS: 3025 BROILERS: 296
CATTARAUGUS
LAYERS: 3539 BROILERS: 1058
CAYUGA
LAYERS: 34492 BROILERS: 462
CHAUTAUQUA
LAYERS: 5887 BROILERS: 2725
CHEMUNG
CHENANGO
CLINTON
COLUMBIA
LAYERS: 9359 BROILERS: 2558
CORTLAND
DELAWARE
LAYERS: 5905 BROILERS: 1254
DUTCHESS
LAYERS: 4542 BROILERS: 14301
ERIE
ESSEX
LAYERS: 6317 BROILERS: 3814
FRANKLIN
LAYERS: 84234 BROILERS: 636
FULTON
LAYERS: 5228 BROILERS: 126
GENESEE
GREENE
HERKIMER
LAYERS: 10079 BROILERS: 4372
JEFFERSON
LEWIS
LAYERS: 2772 BROILERS: 360
LIVINGSTON
LAYERS: 2577 BROILERS: 282
MADISON
LAYERS: 2902 BROILERS: 1288
MONROE
LAYERS: 1902 BROILERS: 723
MONTGOMERY
LAYERS: 5034 BROILERS: 815
NIAGARA
LAYERS: 4067 BROILERS: 1881
ONEIDA
LAYERS: 5744 BROILERS: 1142
ONONDAGA
LAYERS: 703150 BROILERS: 751
ONTARIO
LAYERS: 40723 BROILERS: 643
ORANGE
ORLEANS
LAYERS: 2832 BROILERS: 310
OSWEGO
LAYERS: 2607 BROILERS: 832
OTSEGO
LAYERS: 8161 BROILERS: 1005
PUTNAM
LAYERS: 899 BROILERS

In [41]:
animalInventories['NY']['CATTLE, INCL CALVES - INVENTORY']

for state in statenum.keys():
    for key in animal_keys:
        print(animalInventories[state][key]['COMMODITY_DESC'].unique().size)

1
1
1
1
1
0
1
1
1
1
0
1
1
1
1
1
1
1
0
1
0
1
0
0
0
0
1
0
0
0
1
0
1
1
1
0
0
0
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
0
1
0
1
1
0
0
1
1
1
1
0
1
1
1
1
0
1
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
0
1
1
0
1
1
0
1
1
1
1
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
1
1
1
1
0
1
1
1
1
1
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
1
1
1
1
0
1
1
1
1
0
1
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
0
0
1
0
0
0
0
1
0
1
1
1
0
0
0
0
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
1
1
1
1
0
1
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
1
1


In [28]:
for key in animal_keys:
    print(key, animalInventories['TX'][key]['CENSUS_ROW'].unique().size,  animalInventories['TX'][key]['CENSUS_COLUMN'].unique().size)
    print(animalInventories['TX'][key]['CENSUS_ROW'].unique())

CATTLE, INCL CALVES - INVENTORY 1 254
[3]
CATTLE, COWS - INVENTORY 1 254
[35]
CATTLE, COWS, BEEF - INVENTORY 1 197
[39]
CATTLE, COWS, MILK - INVENTORY 1 39
[57]
CATTLE, (EXCL COWS) - INVENTORY 1 254
[75]
CATTLE, ON FEED - INVENTORY 1 40
[93]
HOGS - INVENTORY 1 202
[3]
SHEEP, INCL LAMBS - INVENTORY 1 221
[3]
GOATS - INVENTORY 243 1
[  3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20
  21  22  23  24  26  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  43  44  45  47  48  49  50  51  52  54  55  56  57  58  59  60
  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78
  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96
  97  98  99 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 134
 135 136 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 154
 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
 173 174 1

In [29]:
for key in animal_keys:
    print(key, animalInventories['TX'][key].count())

CATTLE, INCL CALVES - INVENTORY CENSUS_ROW         254
CENSUS_COLUMN      254
SHORT_DESC         254
COMMODITY_DESC     254
STATE_FIPS_CODE    254
STATE_ALPHA        254
STATE_NAME         254
COUNTY_CODE        254
COUNTY_NAME        254
DOMAINCAT_DESC       0
VALUE              254
dtype: int64
CATTLE, COWS - INVENTORY CENSUS_ROW         254
CENSUS_COLUMN      254
SHORT_DESC         254
COMMODITY_DESC     254
STATE_FIPS_CODE    254
STATE_ALPHA        254
STATE_NAME         254
COUNTY_CODE        254
COUNTY_NAME        254
DOMAINCAT_DESC       0
VALUE              254
dtype: int64
CATTLE, COWS, BEEF - INVENTORY CENSUS_ROW         197
CENSUS_COLUMN      197
SHORT_DESC         197
COMMODITY_DESC     197
STATE_FIPS_CODE    197
STATE_ALPHA        197
STATE_NAME         197
COUNTY_CODE        197
COUNTY_NAME        197
DOMAINCAT_DESC       0
VALUE              197
dtype: int64
CATTLE, COWS, MILK - INVENTORY CENSUS_ROW         39
CENSUS_COLUMN      39
SHORT_DESC         39
COMMODITY_DESC   

In [31]:
def print_stuff(state_alpha, key):
    #key = key_pre + ' - INVENTORY'
    #print(animalInventories[state_alpha][key]['CENSUS_ROW'].unique())
    print(animalInventories[state_alpha][key]['CENSUS_COLUMN'].unique())
    #print(animalInventories[state_alpha][key]['CENSUS_TABLE'].unique())
    #print(animalInventories[state_alpha][key]['CENSUS_CHAPTER'].unique())
    
def printy(key):
    print_stuff('TX', key)

for key in animal_keys:
    print(key)
    for state in statenum.keys():
        print(':::' + state)
        print_stuff(state, key)

CATTLE, INCL CALVES - INVENTORY
:::AK
[2 3 4]
:::AL
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68]
:::AR
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
 74 75 76]
:::AS
[]
:::AZ
[ 2  3  4  5  6  7  9 10 11 12 13 14 15]
:::CA
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 40 41 42 43 44 45 46 47 48 49 50
 51 52 53 54 55 56 57 58 59]
:::CO
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 19 20 21 22 23 24 25 27
 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
 52 53 54 55 56 57 59 60 61 62 63 64 65]
:::CT
[2 3 4 5 6 7 8 9]
:::DC
[]
:::DE
[2 3 4]
:::FL
[ 2  3  4  5  6  7  8  9

 54 55 56 57 61 62 66 67 70 75 76 79 80 81 82 83 84 86 90 91 92 95 98]
:::VI
[]
:::VT
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15]
:::WA
[ 2  3  4  5  7 10 12 13 14 15 18 19 20 21 22 24 25 26 27 28 29 30 32 33
 34 35 36 38 40]
:::WI
[ 3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 21 23 24 25 26 28 29
 30 31 32 33 34 35 36 37 38 39 40 43 44 45 46 47 48 49 50 51 52 53 54 55
 56 57 58 59 60 61 62 63 64 66 67 68 69 70 71 72 73]
:::WV
[ 2  3  5  7  8 10 11 12 14 15 17 18 19 20 21 23 26 27 28 29 32 33 35 36
 37 39 40 41 42 43 44 46 47 48 50 51 52 53 54 55]
:::WY
[ 3  4  5  6  7  8 11 13 14 15 18 19 20 23 24]
CATTLE, (EXCL COWS) - INVENTORY
:::AK
[2 3 4]
:::AL
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68]
:::AR
[ 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

 55]
:::WY
[ 2  3  4  5  6  7  8  9 10 11 13 14 16 18 20 22 23 24]
SHEEP, INCL LAMBS - INVENTORY
:::AK
[3 4]
:::AL
[ 3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 27 28 29 31 32 34 36 37 38 39 40 41 42 43 44 46 47 48 49 50 51 52 53 54
 55 56 57 58 59 60 62 63 64 65 66 67 68]
:::AR
[ 3  4  5  6  7  9 11 12 13 15 16 17 18 19 21 23 24 25 26 27 28 29 30 31
 32 33 34 37 39 41 42 43 44 45 46 50 51 52 53 54 57 58 59 61 62 63 64 65
 66 67 68 69 70 71 72 73 74 76]
:::AS
[]
:::AZ
[ 2  3  4  5  6  7  8  9 11 12 14 15]
:::CA
[ 2  4  5  6  7  8  9 10 11 12 13 14 15 16 19 20 21 22 23 24 25 26 28 29
 30 31 32 33 34 35 36 37 38 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
 56 57 58 59]
:::CO
[ 2  3  4  5  7  8 10 13 14 15 16 17 19 20 21 22 23 24 25 27 28 30 32 33
 34 36 37 38 39 40 41 43 44 45 46 47 49 51 52 53 54 55 56 57 59 62 64 65]
:::CT
[2 3 4 5 6 7 8 9]
:::DC
[]
:::DE
[2 3 4]
:::FL
[ 2  3  4  5  6  7  9 10 11 12 13 14 15 16 17 18 20 21 22 24 25 26 27 28
 29 30 31 32 33 

[2]
:::MN
[2]
:::MO
[2]
:::MP
[]
:::MS
[2]
:::MT
[2]
:::NC
[2]
:::ND
[2]
:::NE
[2]
:::NH
[2]
:::NJ
[2]
:::NM
[2]
:::NV
[2]
:::NY
[2]
:::OH
[2]
:::OK
[2]
:::OR
[2]
:::PA
[2]
:::PR
[]
:::PW
[]
:::RI
[2]
:::SC
[2]
:::SD
[2]
:::TN
[2]
:::TX
[2]
:::UM
[]
:::UT
[2]
:::VA
[2]
:::VI
[]
:::VT
[2]
:::WA
[2]
:::WI
[2]
:::WV
[2]
:::WY
[2]
EQUINE, HORSES & PONIES - INVENTORY
:::AK
[2]
:::AL
[2]
:::AR
[2]
:::AS
[]
:::AZ
[2]
:::CA
[2]
:::CO
[2]
:::CT
[2]
:::DC
[]
:::DE
[2]
:::FL
[2]
:::FM
[]
:::GA
[2]
:::GU
[]
:::HI
[2]
:::IA
[2]
:::ID
[2]
:::IL
[2]
:::IN
[2]
:::KS
[2]
:::KY
[2]
:::LA
[2]
:::MA
[2]
:::MD
[2]
:::ME
[2]
:::MH
[]
:::MI
[2]
:::MN
[2]
:::MO
[2]
:::MP
[]
:::MS
[2]
:::MT
[2]
:::NC
[2]
:::ND
[2]
:::NE
[2]
:::NH
[2]
:::NJ
[2]
:::NM
[2]
:::NV
[2]
:::NY
[2]
:::OH
[2]
:::OK
[2]
:::OR
[2]
:::PA
[2]
:::PR
[]
:::PW
[]
:::RI
[2]
:::SC
[2]
:::SD
[2]
:::TN
[2]
:::TX
[2]
:::UM
[]
:::UT
[2]
:::VA
[2]
:::VI
[]
:::VT
[2]
:::WA
[2]
:::WI
[2]
:::WV
[2]
:::WY
[2]
EQUINE, MULES & BURROS & DONKEYS - INVENTORY


[ 3  4  5  6  7  8  9 11 12 13 14 15 16 17]
:::MH
[]
:::MI
[ 2  3  4  5  6  7  8  9 10 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 27 28 29 31 32 34 37 38 39 40 41 42 45 46 47 48 49 51 52 53 54 55 56 57
 58 59 60 62 63 64 65 66 67 68 69 70 72 74 75 76 77 79 81 82 83 84]
:::MN
[ 2  3  4  5  6  8 10 12 13 14 16 17 19 20 21 22 23 24 26 29 30 31 34 35
 37 38 39 40 41 43 44 47 49 50 51 52 53 54 56 57 59 61 67 69 70 74 75 77
 78 80 82 83]
:::MO
[  2   3   6   8   9  10  11  12  13  14  15  16  17  19  20  25  26  27
  29  30  31  34  35  37  38  39  40  41  42  43  44  46  47  49  51  53
  54  55  56  57  58  59  60  62  63  64  65  66  67  69  70  71  72  74
  75  77  78  80  81  82  83  84  85  86  87  88  89  90  92  93  94  95
  96  98 102 103 105 107 108 109 110 111 112 113 115]
:::MP
[]
:::MS
[ 3  4  5  6 11 13 16 19 20 21 22 23 24 25 27 31 34 35 37 38 39 42 44 45
 46 47 48 49 51 52 55 56 57 58 59 60 62 63 65 67 68 70 72 76]
:::MT
[ 2  5  6  7  8 12 15 16 17 19 23 25 28 33 34 35 38 3

:::KY
[2]
:::LA
[2]
:::MA
[2]
:::MD
[2]
:::ME
[2]
:::MH
[]
:::MI
[2]
:::MN
[2]
:::MO
[2]
:::MP
[]
:::MS
[2]
:::MT
[2]
:::NC
[2]
:::ND
[2]
:::NE
[2]
:::NH
[2]
:::NJ
[2]
:::NM
[2]
:::NV
[2]
:::NY
[2]
:::OH
[2]
:::OK
[2]
:::OR
[2]
:::PA
[2]
:::PR
[]
:::PW
[]
:::RI
[2]
:::SC
[2]
:::SD
[2]
:::TN
[2]
:::TX
[2]
:::UM
[]
:::UT
[2]
:::VA
[2]
:::VI
[]
:::VT
[2]
:::WA
[2]
:::WI
[2]
:::WV
[2]
:::WY
[2]
EMUS - INVENTORY
:::AK
[]
:::AL
[2]
:::AR
[]
:::AS
[]
:::AZ
[2]
:::CA
[2]
:::CO
[2]
:::CT
[2]
:::DC
[]
:::DE
[2]
:::FL
[2]
:::FM
[]
:::GA
[2]
:::GU
[]
:::HI
[]
:::IA
[2]
:::ID
[2]
:::IL
[2]
:::IN
[2]
:::KS
[2]
:::KY
[2]
:::LA
[2]
:::MA
[2]
:::MD
[2]
:::ME
[]
:::MH
[]
:::MI
[2]
:::MN
[2]
:::MO
[2]
:::MP
[]
:::MS
[2]
:::MT
[2]
:::NC
[2]
:::ND
[]
:::NE
[]
:::NH
[2]
:::NJ
[2]
:::NM
[]
:::NV
[2]
:::NY
[2]
:::OH
[2]
:::OK
[]
:::OR
[2]
:::PA
[2]
:::PR
[]
:::PW
[]
:::RI
[]
:::SC
[2]
:::SD
[]
:::TN
[2]
:::TX
[2]
:::UM
[]
:::UT
[2]
:::VA
[2]
:::VI
[]
:::VT
[]
:::WA
[2]
:::WI
[2]
:::WV
[]
:::WY
[]
GEESE - INVE

[2]
:::OK
[2]
:::OR
[2]
:::PA
[2]
:::PR
[]
:::PW
[]
:::RI
[]
:::SC
[2]
:::SD
[2]
:::TN
[2]
:::TX
[2]
:::UM
[]
:::UT
[2]
:::VA
[]
:::VI
[]
:::VT
[]
:::WA
[2]
:::WI
[2]
:::WV
[]
:::WY
[2]
DEER - INVENTORY
:::AK
[]
:::AL
[2]
:::AR
[2]
:::AS
[]
:::AZ
[]
:::CA
[]
:::CO
[2]
:::CT
[2]
:::DC
[]
:::DE
[]
:::FL
[2]
:::FM
[]
:::GA
[]
:::GU
[]
:::HI
[]
:::IA
[2]
:::ID
[]
:::IL
[2]
:::IN
[2]
:::KS
[2]
:::KY
[2]
:::LA
[2]
:::MA
[2]
:::MD
[]
:::ME
[2]
:::MH
[]
:::MI
[2]
:::MN
[2]
:::MO
[2]
:::MP
[]
:::MS
[2]
:::MT
[]
:::NC
[]
:::ND
[]
:::NE
[2]
:::NH
[]
:::NJ
[2]
:::NM
[]
:::NV
[]
:::NY
[2]
:::OH
[2]
:::OK
[2]
:::OR
[2]
:::PA
[2]
:::PR
[]
:::PW
[]
:::RI
[]
:::SC
[2]
:::SD
[]
:::TN
[2]
:::TX
[2]
:::UM
[]
:::UT
[]
:::VA
[]
:::VI
[]
:::VT
[]
:::WA
[2]
:::WI
[2]
:::WV
[2]
:::WY
[]
ELK - INVENTORY
:::AK
[]
:::AL
[2]
:::AR
[]
:::AS
[]
:::AZ
[]
:::CA
[]
:::CO
[2]
:::CT
[]
:::DC
[]
:::DE
[]
:::FL
[]
:::FM
[]
:::GA
[]
:::GU
[]
:::HI
[]
:::IA
[2]
:::ID
[]
:::IL
[]
:::IN
[]
:::KS
[2]
:::KY
[]
:::LA
[]
:::MA
[]


In [21]:
agCensus = pd.read_csv('2017_cdqt_data.txt', sep='\t', header=0, low_memory=False)
texasAnimals_pre1 = agCensus[agCensus['STATE_ALPHA'] == "TX"]
texasAnimals_pre2 = texasAnimals_pre1[texasAnimals_pre1['COUNTY_NAME'].str.contains('.+', na=False)]
texasAnimals = texasAnimals_pre2[texasAnimals_pre2['SECTOR_DESC'] == 'ANIMALS & PRODUCTS']
#texasAnimals = texasAnimals_pre3[(texasAnimals_pre3['CENSUS_CHAPTER']==2) & (texasAnimals_pre3['CENSUS_TABLE']==19)]

In [None]:
hogs_pre1 = texasAnimals[texasAnimals['SHORT_DESC'] == 'HOGS - INVENTORY']
hogs_pre1['SECTOR_DESC'].unique()
hogs_pre1['VALUE'] = pd.to_numeric(hogs_pre1['VALUE'], errors='coerce')
#hogs_pre1['VALUE'] = hogs_pre1['VALUE'].astype(int, errors='ignore')
hogs_pre2 = hogs_pre1[hogs_pre1['VALUE'].apply(lambda x: isinstance(x,float))]
hogs = hogs_pre2[hogs_pre2['VALUE'].notnull()]
hogs['VALUE'] = hogs['VALUE'].astype(int)
hogs

In [None]:
#texasChickenInventory = agCensusTexas3[agCensusTexas3['COMMODITY_DESC'] == 'CHICKENS']
texasChickens = texasChickenInventory[(texasChickenInventory['CENSUS_CHAPTER']==2) & (texasChickenInventory['CENSUS_TABLE']==19)]
texasBroilers = texasChickens[texasChickens['SHORT_DESC']=='CHICKENS, BROILERS - INVENTORY']
texasLayers = texasChickens[texasChickens['SHORT_DESC']=='CHICKENS, LAYERS - INVENTORY']

In [None]:
texasLayers['COUNTY_CODE'] = texasLayers['COUNTY_CODE'].astype(int)
texasLayers['FIPS'] = (texasLayers['COUNTY_CODE'] + 1000*48).apply(str)
texasLayers.set_index('FIPS', inplace=True, drop=False)
texasLayers['VALUE'] = texasLayers['VALUE'].apply(lambda s: s.replace(',', ''))
texasLayers2 = texasLayers[texasLayers['VALUE'] != '(D)']
texasLayers2['VALUE'] = texasLayers2['VALUE'].astype(int)

In [None]:
texasBroilers['COUNTY_CODE'] = texasBroilers['COUNTY_CODE'].astype(int)
texasBroilers['FIPS'] = (texasBroilers['COUNTY_CODE'] + 1000*48).apply(str)
texasBroilers.set_index('FIPS', inplace=True, drop=False)
texasBroilers['VALUE'] = texasBroilers['VALUE'].apply(lambda s: s.replace(',', ''))
texasBroilers2 = texasBroilers[texasBroilers['VALUE'] != '(D)']
texasBroilers2['VALUE'] = texasBroilers2['VALUE'].astype(int)

In [None]:
# Code to map increasing list of values to RGB colors
# start is a list of initial r, g, b values
# end is a list of final r, g, b values
# Color is linearly interpolated

def valsToColors(values, start, end):
    maxValue = values[-1]
    minValue = values[0]
    factors = list(map(lambda value: (value - minValue) / (maxValue - minValue), values))
    reds   = list(map(lambda t: int(round((1-t)*start[0] + t*end[0])), factors));
    greens = list(map(lambda t: int(round((1-t)*start[1] + t*end[1])), factors));
    blues  = list(map(lambda t: int(round((1-t)*start[2] + t*end[2])), factors));
    rgbs = [f'rgb({r}, {g}, {b})' for (r,g,b) in zip(reds, greens, blues)]
    return rgbs

In [None]:
fips = list(texasBroilers2['FIPS'])
values = list(texasBroilers2['VALUE'])
endpts = list(np.mgrid[min(values):max(values):7j])
colorscale = valsToColors(endpts, [200,255,0], [255,50,0])
endpts.pop(0)
endpts.pop(-1)

fig = ff.create_choropleth(fips=fips, values=values, scope=['TX'],
                           binning_endpoints=endpts, colorscale=colorscale,
                           county_outline={'color': 'rgb(0,0,0)', 'width': 0.5},
                           legend_title='Number of Broiler Chickens')

fig.layout.template = None
fig.show()

In [None]:
texasSTEC = pd.read_csv('Texas_STEC_By_County.csv', sep=',', header=0)

In [None]:
countyInfo = pd.read_csv('US_County_Info.csv', sep=',', header=0)

In [None]:
countyInfo.set_index('FIPS', inplace=True, drop=False)

In [None]:
def tx_fips(county):
    fips = list(countyInfo[(countyInfo['County'].str.upper()==county.upper()) & (countyInfo['State']=='TX')]['FIPS'])[0]
    return str(fips)

In [None]:
texasSTEC['FIPS'] = [tx_fips(county) for county in list(texasSTEC['County'])]
texasSTEC.set_index('FIPS', inplace=True, drop=False)

In [None]:
def agg_rate(fips):
    agg_rate = 0
    for i in range(2008,2018):
        agg_rate += texasSTEC[str(i)+' IR'][fips]
    return agg_rate

In [None]:
texasSTEC['AGG_RATE'] = list(map(agg_rate, texasSTEC.index))

In [None]:
texasSTEC3 = texasSTEC[texasSTEC['FIPS'].isin(texasBroilers2['FIPS'])]              # Intersect frames to avoid 
texasBroilers3 = texasBroilers2[texasBroilers2['FIPS'].isin(texasSTEC3['FIPS'])]    # missing FIPS
x_vals = np.asarray(texasBroilers3.loc[texasBroilers3['FIPS']]['VALUE'])
x_vals = x_vals.astype('float64')
y_vals = np.asarray(texasSTEC3.loc[texasSTEC3['FIPS']]['AGG_RATE'])
y_vals = y_vals.astype('float64')

In [None]:
def errors(m, b, xs, ys):
    return ys - (m*xs + b)

def MSE(m, b, xs, ys):
    return sum(errors(m, b, xs, ys)**2)/xs.size

def dMSE_dm(m, b, xs, ys):
    return -2*sum(xs*errors(m, b, xs, ys))/xs.size
    
def dMSE_db(m, b, xs, ys):
    return -2*sum(errors(m, b, xs, ys))/xs.size

def RMSE(m, b, xs, ys):
    return math.sqrt(MSE(m,b, xs, ys))   

In [None]:
def step(m, b, x_values, y_values, learn_rate):
    m_new = m - learn_rate * dMSE_dm(m, b, x_values, y_values)
    b_new = b - learn_rate * dMSE_db(m,b, x_values, y_values)
    return (m_new, b_new)

def learn(m_initial, b_initial, x_values, y_values, learn_rate, steps):
    m = m_initial
    b = b_initial
    for i in range(steps):
        (m, b) = step(m, b, x_values, y_values, learn_rate)
    return (m, b)

In [None]:
scale = 1/(2*max(x_vals)**2) # This is basically the invese of bound on the Laplacian of the error
(m,b) = learn(1, 25, x_vals, y_vals, scale*0.1, 100000)

In [None]:
(m,b)

In [None]:
def line_trace(m,b, max_val):
    xs = np.linspace(0.0, max_val, 100)
    ys = m*xs + b
    return {'x' : list(xs), 'y' : list(ys), 'mode' : 'lines', 'name' : 'Best Fit Line'}

In [None]:
trace = {'x' : x_vals, 'y' : y_vals, 'mode' : 'markers', 'name' : 'Counties'}
figure2 = {'data': [trace, line_trace(m,b, max(x_vals))], 'layout': {'title': 'STEC Incidence Rate versus Broiler Chicken Population'}}
plot(figure2)

This is surprising! STEC cases seem to be *negatively* correlated with broiler chicken population. We can check that this is not so weak a correlation: here's the Normalized Root Mean Square Error:

In [None]:
RMSE(m, b, x_vals, y_vals) / (max(y_vals) - min(y_vals))

## What is going on here?

It appears as though the correlation for STEC versus broiler chicken population goes the opposite direction as STEC versus catle population. This is not what we expected, so we should try to find an explanation. There might be some third factor which influences both cattle and chicken populations. It may be that chicken population is negatively correlated with the presence of another factor which more strongly influences STEC rates (for example, the presense of another type of animal which is a more favorable carrier of STEC). Ultimately, this indicates a need for multivariate regression. It is hard to make a meaningful conclusion about the effect chickens have on STEC incidents without controlling for other variables.

We can also check to see if there is a relationship between STEC rates and layer chickens:

In [None]:
texasSTEC4 = texasSTEC[texasSTEC['FIPS'].isin(texasLayers2['FIPS'])]              # Intersect frames to avoid 
texasLayers4 = texasLayers2[texasLayers2['FIPS'].isin(texasSTEC4['FIPS'])]    # missing FIPS
x_vals2 = np.asarray(texasLayers4.loc[texasLayers4['FIPS']]['VALUE'])
x_vals2 = x_vals2.astype('float64')
y_vals2 = np.asarray(texasSTEC4.loc[texasSTEC4['FIPS']]['AGG_RATE'])
y_vals2 = y_vals2.astype('float64')

In [None]:
scale2 = 1/(2*max(x_vals2)**2) # This is basically the invese of bound on the Laplacian of the error
(m2,b2) = learn(1, 25, x_vals2, y_vals2, scale*0.1, 100000)

In [None]:
trace2 = {'x' : x_vals2, 'y' : y_vals2, 'mode' : 'markers', 'name' : 'Counties'}
figure3 = {'data': [trace2, line_trace(m2,b2, max(x_vals2))], 'layout': {'title': 'STEC Incidence Rate versus Layer Chicken Population'}}
plot(figure3)

In [None]:
RMSE(m2, b2, x_vals2, y_vals2) / (max(y_vals2) - min(y_vals2))

## Conclusion

We see here a somewhat weaker correlation, this time positive. We would expect, all else being equal, that the direction of the relationship between chickens and STEC rate would not depend on whether those chickens were raised for meat or eggs, but our simple model is suggesting this is so. This indicates our model is inadequate, and there are some unaccounted for hidden variables.  