In [1]:
import addfips
import requests
import matplotlib.pyplot as plt
from IPython.display import Image
import folium
import pickle
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
import pyproj
import collections

%matplotlib inline

In [2]:
counties = gpd.read_file('../data/counties/tncounty.shp')
print(counties.crs)
counties.head()

epsg:2274


Unnamed: 0,OBJECTID,NAME,KEY,SHAPE_AREA,SHAPE_LEN,geometry
0,76,Chester,47023,8049024000.0,520461.080124,"POLYGON ((1137985.762 344601.643, 1137965.070 ..."
1,77,Wayne,47181,20507410000.0,666520.678598,"POLYGON ((1365052.057 391716.806, 1365746.554 ..."
2,78,Tipton,47167,13191250000.0,865093.887634,"MULTIPOLYGON (((886814.330 400456.525, 886774...."
3,79,Hamilton,47065,16047760000.0,652926.001078,"POLYGON ((2274954.438 239788.911, 2274090.610 ..."
4,80,Stewart,47161,13750030000.0,490090.33618,"POLYGON ((1382472.783 743972.302, 1382445.171 ..."


In [3]:
spec = pd.read_csv('../data/rare_species_by_county.csv')
spec.head()

Unnamed: 0,County,Type,Category,Scientific Name,Common Name,Global Rank,State Rank,Fed. Status,State Status,Habitat,Wet Habitat Flag
0,Anderson,Invertebrate Animal,Arachnid,Hesperochernes mirabilis,Southeastern Cave Pseudoscorpion,G5,S3,--,"Rare, Not State Listed",Terrestrial cave obligate; woodrat debris in c...,Upland
1,Anderson,Invertebrate Animal,Mollusc,Cyprogenia stegaria,Fanshell,G1,S1,"LE, XN",E,Medium to large streams and rivers with coarse...,Aquatic
2,Anderson,Vertebrate Animal,Bird,Limnothlypis swainsonii,Swainson's Warbler,G4,S3,--,D,"Mature, rich, damp, deciduous floodplain and s...",Possible
3,Anderson,Vertebrate Animal,Reptile,Ophisaurus attenuatus longicaudus,Eastern Slender Glass Lizard,G5T5,S3,--,D,"Dry upland areas including brushy, cut-over wo...",Upland
4,Anderson,Vertebrate Animal,Bird,Vermivora chrysoptera,Golden-winged Warbler,G4,S3B,--,T,Early successional habitats in foothill region...,Upland


In [4]:
spec.columns = ['County', 'Type', 'Category', 'Scientific_Name', 'Common_Name', 
               'Global_Rank', 'State_Rank', 'Fed_Status', 'State_Status', 'Habitat', 'Wet_Habitat_Flag']

In [5]:
spec['summer'] = 1
spec['zero'] = 0

Create 1's for global rank columns:

In [6]:
spec['G5BOOL'] = spec['Global_Rank'].isin(['G5'])
spec['G4BOOL'] = spec['Global_Rank'].isin(['G4'])
spec['G3BOOL'] = spec['Global_Rank'].isin(['G3'])
spec['G2BOOL'] = spec['Global_Rank'].isin(['G2'])
spec['G1BOOL'] = spec['Global_Rank'].isin(['G1'])

In [7]:
spec['G5'] = spec['G5BOOL'].astype(int)
spec['G4'] = spec['G4BOOL'].astype(int)
spec['G3'] = spec['G3BOOL'].astype(int)
spec['G2'] = spec['G2BOOL'].astype(int)
spec['G1'] = spec['G1BOOL'].astype(int)

In [8]:
spec = spec.drop(columns = ['G5BOOL', 'G4BOOL', 'G3BOOL', 'G2BOOL', 'G1BOOL'])

Now do the same for State ranks:

In [9]:
spec['S5BOOL'] = spec['State_Rank'].isin(['S5'])
spec['S4BOOL'] = spec['State_Rank'].isin(['S4'])
spec['S3BOOL'] = spec['State_Rank'].isin(['S3'])
spec['S2BOOL'] = spec['State_Rank'].isin(['S2'])
spec['S1BOOL'] = spec['State_Rank'].isin(['S1'])

In [10]:
spec['S5'] = spec['S5BOOL'].astype(int)
spec['S4'] = spec['S4BOOL'].astype(int)
spec['S3'] = spec['S3BOOL'].astype(int)
spec['S2'] = spec['S2BOOL'].astype(int)
spec['S1'] = spec['S1BOOL'].astype(int)

In [11]:
spec = spec.drop(columns = ['S5BOOL', 'S4BOOL', 'S3BOOL', 'S2BOOL', 'S1BOOL'])

Now do the same for Federal Status:

In [12]:
spec['LEBOOL'] = spec['Fed_Status'].isin(['LE'])
spec['LTBOOL'] = spec['Fed_Status'].isin(['LT'])

In [13]:
spec['LE'] = spec['LEBOOL'].astype(int)
spec['LT'] = spec['LTBOOL'].astype(int)

In [14]:
spec = spec.drop(columns = ['LEBOOL', 'LTBOOL'])

Now do this for Type:

In [45]:
spec['fungusBOOL'] = spec['Type'].isin(['Fungus'])
spec['ITESCBOOL'] = spec['Type'].isin(['International Terrestrial Ecological System Classification'])
spec['An_AssBOOL'] = spec['Type'].isin(['Animal Assemblage'])
spec['IVCNBOOL'] = spec['Type'].isin(['International Vegetation Classification - Natural'])
spec['NV_PlantBOOL'] = spec['Type'].isin(['Nonvascular Plant'])
spec['IN_AnimBOOL'] = spec['Type'].isin(['Invertebrate Animal'])
spec['VB_AnimBOOL'] = spec['Type'].isin(['Vertebrate Animal'])
spec['V_PlantBOOL'] = spec['Type'].isin(['Vascular Plant'])

In [46]:
spec['T_Fungus'] = spec['fungusBOOL'].astype(int)
spec['T_IntnlTer_EcoSystem'] = spec['ITESCBOOL'].astype(int)
spec['T_An_Asslg'] = spec['An_AssBOOL'].astype(int)
spec['T_IntnlVegClass'] = spec['IVCNBOOL'].astype(int)
spec['T_NV_Plant'] = spec['NV_PlantBOOL'].astype(int)
spec['T_INV_Animal'] = spec['IN_AnimBOOL'].astype(int)
spec['T_VBRT_Animal'] = spec['VB_AnimBOOL'].astype(int)
spec['T_VASC_Plant'] = spec['V_PlantBOOL'].astype(int)

In [47]:
spec = spec.drop(columns = ['fungusBOOL', 'ITESCBOOL', 'An_AssBOOL', 'IVCNBOOL', 'NV_PlantBOOL', 
                           'IN_AnimBOOL', 'VB_AnimBOOL', 'V_PlantBOOL'])

Now do this for Category:

In [48]:
spec['PlanarianBOOL'] = spec['Category'].isin(['Planarian'])
spec['TardigradeBOOL'] = spec['Category'].isin(['Tardigrade'])
spec['AnnelidBOOL'] = spec['Category'].isin(['Annelid'])
spec['LiverwortBOOL'] = spec['Category'].isin(['Liverwort'])
spec['Pl_GymnoBOOL'] = spec['Category'].isin(['Plant: Gymnosperm'])
spec['OtherBOOL'] = spec['Category'].isin(['Other Type'])
spec['NDBOOL'] = spec['Category'].isin(['No Data'])
spec['ArachnidBOOL'] = spec['Category'].isin(['Arachnid'])
spec['Pl_CommBOOL'] = spec['Category'].isin(['Plant Community'])
spec['FernBOOL'] = spec['Category'].isin(['Fern and Fern Ally'])
spec['ReptileBOOL'] = spec['Category'].isin(['Reptile'])
spec['CrustaceanBOOL'] = spec['Category'].isin(['Crustacean'])
spec['NV_PLBOOL'] = spec['Category'].isin(['Non-Vascular Plant'])
spec['AmphibianBOOL'] = spec['Category'].isin(['Amphibian'])
spec['BirdBOOL'] = spec['Category'].isin(['Bird'])
spec['InsectBOOL'] = spec['Category'].isin(['Insect'])
spec['MammalBOOL'] = spec['Category'].isin(['Mammal'])
spec['FishBOOL'] = spec['Category'].isin(['Fish'])
spec['MolluscBOOL'] = spec['Category'].isin(['Mollusc'])
spec['Fl_PlBOOL'] = spec['Category'].isin(['Flowering Plant'])

In [49]:
spec['C_Planarian'] = spec['PlanarianBOOL'].astype(int)
spec['C_Tardigrade'] = spec['TardigradeBOOL'].astype(int)
spec['C_Annelid'] = spec['AnnelidBOOL'].astype(int)
spec['C_Liverwort'] = spec['LiverwortBOOL'].astype(int)
spec['C_Pl_Gymnosperm'] = spec['Pl_GymnoBOOL'].astype(int)
spec['C_Other'] = spec['OtherBOOL'].astype(int)
spec['C_No_Data'] = spec['NDBOOL'].astype(int)
spec['C_Arachnid'] = spec['ArachnidBOOL'].astype(int)
spec['C_Plt_Community'] = spec['Pl_CommBOOL'].astype(int)
spec['C_Fern'] = spec['FernBOOL'].astype(int)
spec['C_Reptile'] = spec['ReptileBOOL'].astype(int)
spec['C_Crustacean'] = spec['CrustaceanBOOL'].astype(int)
spec['C_Non_Vasc_Plant'] = spec['NV_PLBOOL'].astype(int)
spec['C_Amphibian'] = spec['AmphibianBOOL'].astype(int)
spec['C_Bird'] = spec['BirdBOOL'].astype(int)
spec['C_Insect'] = spec['InsectBOOL'].astype(int)
spec['C_Mammal'] = spec['MammalBOOL'].astype(int)
spec['C_Fish'] = spec['FishBOOL'].astype(int)
spec['C_Mollusc'] = spec['MolluscBOOL'].astype(int)
spec['C_Flowering Plant'] = spec['Fl_PlBOOL'].astype(int)

In [50]:
spec = spec.drop(columns = ['PlanarianBOOL', 'TardigradeBOOL', 'AnnelidBOOL', 'LiverwortBOOL', 'Pl_GymnoBOOL', 
                           'OtherBOOL', 'NDBOOL', 'ArachnidBOOL', 'Pl_CommBOOL', 'FernBOOL', 'ReptileBOOL',
                            'CrustaceanBOOL', 'NV_PLBOOL', 
                           'AmphibianBOOL', 'BirdBOOL', 'InsectBOOL',  'MammalBOOL', 
                           'FishBOOL', 'MolluscBOOL', 'Fl_PlBOOL'])

In [51]:
rankstat_county = spec.groupby(['County']).agg({'G5': sum,'G4': sum, 'G3': sum, 'G2': sum,'G1': sum, 
                                               'S5': sum,'S4': sum, 'S3': sum, 'S2': sum,'S1': sum, 
                                               'LE': sum, 'LT': sum, 'summer': sum, 'T_Fungus': sum,
                                                'T_IntnlTer_EcoSystem': sum, 'T_An_Asslg': sum, 
                                               'T_IntnlVegClass': sum, 'T_NV_Plant': sum, 'T_INV_Animal': sum, 
                                               'T_VBRT_Animal': sum, 'T_VASC_Plant': sum, 'C_Planarian': sum, 
                                                'C_Tardigrade': sum, 'C_Annelid': sum, 'C_Liverwort': sum, 
                                                'C_Pl_Gymnosperm': sum, 'C_Other': sum, 'C_No_Data': sum, 
                                                'C_Arachnid': sum, 'C_Plt_Community': sum, 'C_Fern': sum,
                                                'C_Reptile': sum, 'C_Crustacean': sum, 'C_Non_Vasc_Plant': sum, 
                                                'C_Amphibian': sum, 'C_Bird': sum, 'C_Insect': sum, 'C_Mammal': sum, 
                                                'C_Fish': sum, 'C_Mollusc': sum, 'C_Flowering Plant': sum})

I want an idea of how many species don't have tags in these respective categories:

In [52]:
rankstat_county['global_diff'] = rankstat_county['summer'] - (rankstat_county['G5'] + rankstat_county['G4'] + 
                                                             rankstat_county['G3'] + rankstat_county['G2'] + 
                                                             rankstat_county['G1'])

In [53]:
rankstat_county['state_diff'] = rankstat_county['summer'] - (rankstat_county['S5'] + rankstat_county['S4'] + 
                                                             rankstat_county['S3'] + rankstat_county['S2'] + 
                                                             rankstat_county['S1'])

In [54]:
rankstat_county['fed_diff'] = rankstat_county['summer'] - (rankstat_county['LE'] + rankstat_county['LT'])

In [55]:
rankstat_county = rankstat_county.reset_index(drop = False)

In [56]:
rankstat_county.head()

Unnamed: 0,County,G5,G4,G3,G2,G1,S5,S4,S3,S2,...,C_Amphibian,C_Bird,C_Insect,C_Mammal,C_Fish,C_Mollusc,C_Flowering Plant,global_diff,state_diff,fed_diff
0,Anderson,13,9,10,2,16,0,1,15,18,...,4,4,3,5,8,15,17,17,10,63
1,Bedford,4,6,7,6,4,0,1,11,14,...,3,0,1,6,12,6,11,15,10,34
2,Benton,5,1,2,1,2,0,0,2,4,...,0,1,0,2,2,3,5,7,5,15
3,Bledsoe,18,4,4,2,3,0,1,5,14,...,1,0,0,1,5,1,25,8,8,33
4,Blount,22,11,14,9,8,0,1,14,26,...,3,3,4,9,14,4,43,40,27,97


In [57]:
rankstat_county.describe()

Unnamed: 0,G5,G4,G3,G2,G1,S5,S4,S3,S2,S1,...,C_Amphibian,C_Bird,C_Insect,C_Mammal,C_Fish,C_Mollusc,C_Flowering Plant,global_diff,state_diff,fed_diff
count,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,...,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0,95.0
mean,11.557895,6.336842,7.336842,2.810526,4.557895,0.0,0.389474,8.178947,12.715789,14.747368,...,1.663158,1.715789,1.831579,3.515789,5.294737,6.557895,19.726316,14.0,10.568421,41.842105
std,10.396108,4.990652,5.166608,2.658966,4.145533,0.0,0.490218,5.098976,9.214539,12.449018,...,1.581882,1.541367,2.537601,3.027755,3.524606,6.244514,17.633046,10.793024,7.775648,30.084744
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,3.0
25%,5.0,3.0,3.0,1.0,1.0,0.0,0.0,4.0,6.5,6.0,...,0.0,1.0,0.0,1.0,3.0,2.0,8.0,6.0,5.0,20.0
50%,9.0,5.0,6.0,2.0,4.0,0.0,0.0,7.0,10.0,11.0,...,1.0,1.0,1.0,2.0,5.0,5.0,14.0,11.0,8.0,34.0
75%,15.0,9.0,10.0,4.0,7.0,0.0,1.0,12.0,19.0,19.5,...,3.0,3.0,2.0,6.0,7.0,10.0,27.0,20.0,14.5,57.0
max,56.0,21.0,23.0,12.0,17.0,0.0,1.0,20.0,40.0,59.0,...,5.0,6.0,17.0,12.0,21.0,33.0,88.0,49.0,40.0,142.0


#### These ^ may be cool tables by ranks, types, etc.

In [58]:
rankstat_county_geo = pd.merge(left = counties, right = rankstat_county, left_on = 'NAME', right_on = 'County')

In [59]:
rankstat_county_geo['id'] = rankstat_county_geo.index.astype(str)

In [60]:
rankstat_county_geo.head()

Unnamed: 0,OBJECTID,NAME,KEY,SHAPE_AREA,SHAPE_LEN,geometry,County,G5,G4,G3,...,C_Bird,C_Insect,C_Mammal,C_Fish,C_Mollusc,C_Flowering Plant,global_diff,state_diff,fed_diff,id
0,76,Chester,47023,8049024000.0,520461.080124,"POLYGON ((1137985.762 344601.643, 1137965.070 ...",Chester,1,0,1,...,0,0,0,1,0,4,2,1,5,0
1,77,Wayne,47181,20507410000.0,666520.678598,"POLYGON ((1365052.057 391716.806, 1365746.554 ...",Wayne,10,3,11,...,1,1,6,11,5,13,12,12,35,1
2,78,Tipton,47167,13191250000.0,865093.887634,"MULTIPOLYGON (((886814.330 400456.525, 886774....",Tipton,5,0,4,...,1,0,0,7,3,4,6,2,14,2
3,79,Hamilton,47065,16047760000.0,652926.001078,"POLYGON ((2274954.438 239788.911, 2274090.610 ...",Hamilton,15,12,10,...,6,0,3,2,5,38,21,21,59,3
4,80,Stewart,47161,13750030000.0,490090.33618,"POLYGON ((1382472.783 743972.302, 1382445.171 ...",Stewart,10,6,7,...,5,0,2,2,1,23,14,10,34,4


In [61]:
rankstat_county_geo = rankstat_county_geo[['id', 'County', 'G5', 'G4', 'G3', 'G2', 'G1',
                                           'S5', 'S4', 'S3', 'S2', 'S1', 'LE', 'LT', 'summer', 
                                           'global_diff', 'state_diff', 'fed_diff', 'T_Fungus', 
                                           'T_IntnlTer_EcoSystem', 'T_An_Asslg', 'T_IntnlVegClass', 'T_NV_Plant', 
                                           'T_INV_Animal', 'T_VBRT_Animal', 'T_VASC_Plant', 'C_Planarian', 'C_Tardigrade', 
                                           'C_Annelid', 'C_Liverwort', 'C_Pl_Gymnosperm', 'C_Other', 'C_No_Data', 
                                           'C_Arachnid', 'C_Plt_Community', 'C_Fern', 'C_Reptile', 'C_Crustacean', 
                                           'C_Non_Vasc_Plant', 'C_Amphibian', 'C_Bird', 'C_Insect', 'C_Mammal', 'C_Fish', 
                                           'C_Mollusc', 'C_Flowering Plant', 'geometry']]

In [62]:
rankstat_county_geo

Unnamed: 0,id,County,G5,G4,G3,G2,G1,S5,S4,S3,...,C_Crustacean,C_Non_Vasc_Plant,C_Amphibian,C_Bird,C_Insect,C_Mammal,C_Fish,C_Mollusc,C_Flowering Plant,geometry
0,0,Chester,1,0,1,0,2,0,0,0,...,1,0,0,0,0,0,1,0,4,"POLYGON ((1137985.762 344601.643, 1137965.070 ..."
1,1,Wayne,10,3,11,2,3,0,0,9,...,1,0,1,1,1,6,11,5,13,"POLYGON ((1365052.057 391716.806, 1365746.554 ..."
2,2,Tipton,5,0,4,0,0,0,0,1,...,0,0,0,1,0,0,7,3,4,"MULTIPOLYGON (((886814.330 400456.525, 886774...."
3,3,Hamilton,15,12,10,3,5,0,1,5,...,2,4,2,6,0,3,2,5,38,"POLYGON ((2274954.438 239788.911, 2274090.610 ..."
4,4,Stewart,10,6,7,1,0,0,0,7,...,0,0,1,5,0,2,2,1,23,"POLYGON ((1382472.783 743972.302, 1382445.171 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,90,McNairy,12,3,5,2,2,0,0,4,...,2,0,0,3,1,2,4,0,19,"POLYGON ((1137985.762 344601.643, 1139350.519 ..."
91,91,Franklin,30,20,16,10,13,0,0,20,...,4,3,3,3,6,11,3,21,64,"POLYGON ((1873015.265 239618.144, 1872957.848 ..."
92,92,Bradley,9,1,5,4,6,0,0,3,...,2,0,0,1,2,0,12,8,10,"POLYGON ((2274954.438 239788.911, 2275552.803 ..."
93,93,Marion,15,12,21,5,10,0,1,17,...,2,3,5,1,6,5,6,13,41,"POLYGON ((2126056.390 236919.771, 2122873.509 ..."


In [64]:
rankstat_county_geo.to_csv('../data/countybreakout.csv')