In [38]:
import numpy as np
import numpy as np
import pandas as pd 

from googlemaps import Client as GoogleMaps
import geopy
from geopy.geocoders import Nominatim
import geopandas as gpd

import shapefile as shp

import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.mask import mask
from rasterio.plot import show

from pyproj import Proj, transform

from utils import * 

import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

%matplotlib inline 

## Obtaining and cleaning data  

1. Lyme disease per county per year (https://www.cdc.gov/lyme/datasurveillance/index.html)

2. County locations and shapefile (http://eric.clst.org/tech/usgeojson/)

3. Temperature and precipitation data per month per county (ftp://ftp.ncdc.noaa.gov/pub/data/cirs/climdiv/)

4. Deer population by county (https://data.nal.usda.gov/dataset/white-tailed-deer-density-estimates-across-eastern-united-states-2008)

     - deer_density_QDMA.zip = GIS shapefile depicting white-tailed deer density summarized between 2001 and 2005 by the Quality Deer Management Association for the eastern United States. Categories represent coarse deer density levels as identified in the QDMA report in 2009: (1) rare, absent, or urban area with unknown population, (2) less than 15 deer per square mile, (3) 15 to 30 deer per square mile, (4) 30 to 40 deer per square mile, or (5) greater than 45 deer per square mile. Deer density estimates represent levels summarized between 2001 and 2005 and should not be used to represent current or future deer density levels.

5. Forest cover data 

6. Climate projections for the same resolution in time/space (https://cida.usgs.gov/gdp/client/#!catalog/gdp/dataset/54dd5e4be4b08de9379b38ff)
    - data obtained, need to clean up DFs 
    
7. Add dog data if we can get it : https://capcvet.org/maps/#2017/all/lyme-disease/dog/canada/
8. Need the population per year of counties! 
9. https://www.health.ny.gov/statistics/diseases/communicable/ for NY Lyme numbers 

## Lyme/county data 

In [57]:
county_shapefile = gpd.read_file("./data/raw/gz_2010_us_050_00_20m/gz_2010_us_050_00_20m.shp")
lyme_per_county = pd.read_csv('./data/raw/LD-Case-Counts-by-County-00-17.csv',encoding='latin-1')

In [58]:
## making the labelling of counties compatible in both files 
## 3,007 counties, 64 parishes, 19 organized boroughs, 10 census areas, 41 independent cities, and the District of Columbia = 3142 total.

county_shapefile['long_lat'] = county_shapefile['geometry'].apply(lambda x : list(x.centroid.coords)[0])
county_shapefile['LSAD'] = county_shapefile['LSAD'].replace('CA', 'Census Area') 
county_shapefile['LSAD'] = county_shapefile['LSAD'].replace('Cty&Bor', 'City and Borough') 
county_shapefile['LSAD'] = county_shapefile['LSAD'].replace('Muny', 'Municipality') 
county_shapefile.iloc[777, county_shapefile.columns.get_loc('LSAD')] = ''
county_shapefile.iloc[1107, county_shapefile.columns.get_loc('LSAD')] = ''

county_shapefile['NEWNAME'] = county_shapefile['NAME'] + ' ' + county_shapefile['LSAD']

county_shapefile.COUNTY = county_shapefile.COUNTY.astype('int')
county_shapefile.STATE = county_shapefile.STATE.astype('int')

lyme_per_county = lyme_per_county[lyme_per_county['CTYCODE']!=999]
county_shapefile = county_shapefile[county_shapefile['STATE']!=72]

assert len(lyme_per_county)==len(county_shapefile)
merged = pd.merge(lyme_per_county, county_shapefile,  how='left', left_on=['CTYCODE', 'STCODE'], right_on = ['COUNTY', 'STATE'])
assert len(merged)==len(lyme_per_county)==len(county_shapefile)

merged[['Ctyname', 'Stname', 'STCODE', 'CTYCODE']].to_csv('./data/interim/all_county_state.csv')

In [59]:
merged[merged.isna().any(axis=1)]

Unnamed: 0,Ctyname,Stname,STCODE,CTYCODE,Cases2000,Cases2001,Cases2002,Cases2003,Cases2004,Cases2005,...,Cases2017,GEO_ID,STATE,COUNTY,NAME,LSAD,CENSUSAREA,geometry,long_lat,NEWNAME
81,Kusilvak Census Area,Alaska,2,158,0,0,0,0,0,0,...,0,,,,,,,,,
319,District of Columbia,District of Columbia,11,1,11,17,25,14,16,10,...,84,0500000US11001,11.0,1.0,District of Columbia,,61.048,"POLYGON ((-77.0329858478747 38.839500154093, -...","(-77.01578736260959, 38.90565503863293)",
1763,Carson City,Nevada,32,510,0,1,0,0,0,0,...,2,0500000US32510,32.0,510.0,Carson City,,144.662,"POLYGON ((-120.004504420333 39.1655986798664, ...","(-119.74298726381755, 39.15066857645598)",
2412,Oglala Lakota County,South Dakota,46,102,0,0,0,0,0,0,...,0,,,,,,,,,


In [60]:
# deleting oglala and all of Alaska 

merged = merged.drop(merged[merged['Stname'] == 'Hawaii'].index)
merged = merged.drop(merged[merged['Stname'] == 'Alaska'].index)
merged = merged.drop(merged[merged['Ctyname'] == 'Oglala Lakota County'].index)

# no nore nans merged[merged.isna().any(axis=1)]

## Get the temp and precipitation data from the NCDC


In [61]:
colspecs = [[0,2],[2,5],[5,7],[7,11],[11,18],[18,25], [25,32], [32,39],[39,46],[46,53],[53,60],[60,67],[67,74],[74,81],[81,88],[88,95]]
tmax_data = pd.read_fwf('./data/raw/clim/climdiv-tmaxcy-v1.0.0-20190604', index_col=None, colspecs=colspecs, header=None)
tmin_data = pd.read_fwf('./data/raw/clim/climdiv-tmincy-v1.0.0-20190604', index_col=None, colspecs=colspecs, header=None)
tmpc_data = pd.read_fwf('./data/raw/clim/climdiv-tmpccy-v1.0.0-20190604', index_col=None, colspecs=colspecs, header=None)
pcpn_data = pd.read_fwf('./data/raw/clim/climdiv-pcpncy-v1.0.0-20190604', index_col=None, colspecs=colspecs, header=None)

In [62]:
wsuffixes = ['_TMAX', '_TMIN', '_TMPC', '_PCPN']
dfs = [tmax_data, tmin_data, tmpc_data, pcpn_data]

for i in range(4):
    df = dfs[i]
    df.columns = ['ST', 'DIV', 'EL', 'YR', 'JAN'+wsuffixes[i], 'FEB'+wsuffixes[i], 'MAR'+wsuffixes[i], 'APR'+wsuffixes[i], 'MAY'+wsuffixes[i], 'JUN'+wsuffixes[i], 'JUL'+wsuffixes[i], 'AUG'+wsuffixes[i], 'SEP'+wsuffixes[i], 'OCT'+wsuffixes[i], 'NOV'+wsuffixes[i], 'DEC'+wsuffixes[i]]
    
tmin_data = tmin_data[tmin_data['YR']>=1998]
tmax_data = tmax_data[tmax_data['YR']>=1998]
tmpc_data = tmpc_data[tmpc_data['YR']>=1998]
pcpn_data = pcpn_data[pcpn_data['YR']>=1998]    

In [63]:
stupid_codes = pd.read_csv('./data/raw/state_code_stupid.csv')
logical_codes = pd.read_csv('./data/raw/states_code_logical.csv')
stupid_codes.name = stupid_codes.name.str.lower().str.strip()
logical_codes.name = logical_codes.name.str.lower().str.strip()
stupid_codes.columns=['code','num_stupid','name_stupid']
code_mapping = logical_codes.set_index('name').join(stupid_codes.set_index('name_stupid'))

In [64]:
# merging all the files together 

tmin_tmax = pd.merge(tmin_data, tmax_data ,how='left', left_on=['ST', 'DIV', 'YR'], right_on = ['ST', 'DIV', 'YR'])
tmin_tmax = tmin_tmax.loc[:,~tmin_tmax.columns.duplicated()]
tmpc_tmin_tmax = pd.merge(tmpc_data, tmin_tmax ,how='left', left_on=['ST', 'DIV', 'YR'], right_on = ['ST', 'DIV', 'YR'])
tmpc_tmin_tmax = tmpc_tmin_tmax.loc[:,~tmpc_tmin_tmax.columns.duplicated()]
pcpn_tmpc_tmin_tmax = pd.merge(pcpn_data, tmpc_tmin_tmax ,how='left', left_on=['ST', 'DIV', 'YR'], right_on = ['ST', 'DIV', 'YR'])
pcpn_tmpc_tmin_tmax = pcpn_tmpc_tmin_tmax.loc[:,~pcpn_tmpc_tmin_tmax.columns.duplicated()]

assert len(pcpn_tmpc_tmin_tmax)==len(tmin_tmax)

pcpn_tmpc_tmin_tmax = pcpn_tmpc_tmin_tmax[pcpn_tmpc_tmin_tmax['ST']!=50]

In [65]:
## replacing the 1-48 numbering by the FIPS codes for the states 
# there's no precipitation/temp data for DC, so we should remove that from merged 

pcpn_tmpc_tmin_tmax['ST_new'] = pcpn_tmpc_tmin_tmax['ST'].apply(lambda x: replace_st_code(x))
merged = merged.drop(merged[merged['STCODE'] == 11].index)


In [66]:
pcpn_tmpc_tmin_tmax.to_csv('./data/interim/weather.csv')

In [67]:
# exploring how to merge all the data 
finaldf = None

assert len(pcpn_tmpc_tmin_tmax['ST_new'].unique())==len(merged['STCODE'].unique())

for year in range(2000,2018): 
    temp_lyme = merged[['Ctyname','Stname', 'STCODE', 'CTYCODE',  'Cases'+str(year), 'CENSUSAREA', 'geometry','long_lat']]
    temp_tpc = pcpn_tmpc_tmin_tmax[pcpn_tmpc_tmin_tmax['YR']==year]
    temp_merged = pd.merge(temp_lyme, temp_tpc ,how='left', left_on=['STCODE', 'CTYCODE'], right_on = ['ST_new','DIV'])
    temp_merged.rename(columns={'Cases'+str(year): 'Cases'}, inplace=True)
    temp_merged['year'] = int(year)

    if finaldf is None: 
        finaldf = temp_merged
    else:
        finaldf = finaldf.append(temp_merged)

In [68]:
# the weather data is missing a couple of counties from Virginia, so dropping them. 

#pcpn_tmpc_tmin_tmax[pcpn_tmpc_tmin_tmax['DIV']==678]
#pcpn_tmpc_tmin_tmax[pcpn_tmpc_tmin_tmax['DIV']==515]

finaldf = finaldf.dropna(axis=0, how='any')
finaldf['State FIPS Code'] = finaldf['STCODE'].apply(lambda x: str(x).zfill(2))
finaldf['County FIPS Code'] = finaldf['CTYCODE'].apply(lambda x: str(x).zfill(3))
finaldf['FIPS'] = finaldf['State FIPS Code'] + finaldf['County FIPS Code']
finaldf.to_csv('./data/interim/lyme_weather.csv')
print(finaldf.shape)

#finaldf[finaldf.isna().any(axis=1)]

(55890, 66)


### Forest cover data

In [None]:
attributes = pd.read_csv('./data/raw/ncld_states/gaplf2011lc_v30_AL/GAP_LANDFIRE_National_Terrestrial_Ecosystems_2011_Attributes.txt',sep='\t')
attr_val = attributes[['Value','NVC_CLASS']]

In [29]:
finaldf = finaldf.drop(finaldf[finaldf['Stname'] == 'Hawaii'].index)
finaldf = finaldf.drop(finaldf[finaldf['Stname'] == 'Alaska'].index)

In [30]:
list_cols = ['FIPS'] +  list(attr_val.NVC_CLASS.unique())[1:] 
list_fips = finaldf['FIPS'].unique()
coverdf =  pd.DataFrame(columns=list_cols)

for i in range(3105):
    
    ele = list_fips[i]
    forest = pd.read_csv('./data/county_cover/'+str(ele)+'.csv')
    temp = forest.set_index('myclass').join(attr_val.set_index('Value'))
    temp = temp.reset_index()
    temp.columns= ['class', 'index', 'fraction', 'nvc_class']
    temp = temp.groupby('nvc_class').sum()
    temp = temp.reset_index()
    temp = temp.drop(labels=['index', 'class'], axis=1)
    temp.set_index('nvc_class', inplace=True)
    temp = temp.transpose()

    newdf = pd.DataFrame(columns=list_cols)
    for col in newdf.columns[1:]:
        if col in temp.columns:
            newdf[col] = temp[col]
        else:
            newdf[col] = 1e-10
    newdf.index = [0]    
    newdf.at[0,'FIPS'] = ele

    coverdf = coverdf.append(newdf)

In [31]:
coverdf.head(5)

Unnamed: 0,FIPS,Forest & Woodland,Shrub & Herb Vegetation,Desert & Semi-Desert,"Polar & High Montane Scrub, Grassland & Barrens",Aquatic Vegetation,Open Rock Vegetation,Nonvascular & Sparse Vascular Rock Vegetation,Agricultural & Developed Vegetation,Introduced & Semi Natural Vegetation,Recently Disturbed or Modified,Open Water,Developed & Other Human Use
0,1001,0.685407,0.231207,0.031586,0.00379,0.000632,0.017688,1e-10,0.001895136,0.0006317119,0.001895136,1e-10,1e-10
0,1003,0.524983,0.28679,0.064339,0.015514,0.008898,0.045175,0.006160164,0.008898015,0.005475702,0.00593201,0.00593201,0.0002281542
0,1005,0.756405,0.188576,0.0147,0.00294,0.00168,0.00798,1e-10,1e-10,0.0004199916,0.00251995,0.004619908,1e-10
0,1007,0.825015,0.149729,0.008419,0.002405,0.000601,0.000601,1e-10,1e-10,1e-10,1e-10,1e-10,1e-10
0,1009,0.373501,0.483724,0.075385,0.01028,0.00514,0.023986,0.0005711022,0.001142204,0.001713307,0.001142204,1e-10,1e-10


In [None]:
coverdf.to_csv('./data/interim/county_cover_all.csv')

In [42]:
lyme_weather_cover = finaldf.set_index(keys='FIPS').join(coverdf.set_index(keys='FIPS'))

# there's 47 values missing from coverdf, which result in 846 missing values/nans for lyme_weather_cover

len(coverdf[coverdf.isna().any(axis=1)])
len(lyme_weather_cover[lyme_weather_cover.isna().any(axis=1)])

lyme_weather_cover = lyme_weather_cover.dropna(axis=0, how='any')
lyme_weather_cover.to_csv('./data/interim/lyme_weather_cover.csv')

In [29]:
census_data_urb_rur = pd.read_csv('./data/PctUrbanRural_County.csv')

census_data = pd.read_csv('./data/raw/cc-est2017-alldata.csv', encoding = "ISO-8859-1")
census_imp = census_data[['STATE', 'AGEGRP', 'COUNTY', 'STNAME', 'CTYNAME', 'YEAR', 'TOT_POP']]

FIPS_POP = census_pop.groupby('FIPS')['TOT_POP'].sum().reset_index()

census_tot = census_data_urb_rur.set_index(keys = ['STATE', 'COUNTY']).join(census_imp.set_index(keys=['STATE', 'COUNTY'])).reset_index()

census_tot['State FIPS Code'] = census_tot['STATE'].apply(lambda x: str(x).zfill(2))
census_tot['County FIPS Code'] = census_tot['COUNTY'].apply(lambda x: str(x).zfill(3))
census_tot['FIPS'] = census_tot['State FIPS Code'] + census_tot['County FIPS Code']

census_temp = census_tot[['FIPS', 'POP_URBAN', 'POPPCT_URBAN', 'AREA_URBAN', 'AREAPCT_URBAN',
                  'POP_RURAL', 'POPPCT_RURAL', 'AREA_RURAL', 'AREAPCT_RURAL']]
census_temp.drop_duplicates(subset=None, keep='first', inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [31]:
inpdf = pd.read_csv('./data/lyme_weather_cover.csv')

inpdf['Cases_new'] = inpdf.apply(lambda x: x.Cases*1.4 if x.YR > 2008 else x.Cases, axis=1)

inpdf['long'] = inpdf['long_lat'].apply(lambda x: float(x[1:6]))
inpdf['lat'] = inpdf['long_lat'].apply(lambda x: float(x.split(',')[1][:7]))
fips = inpdf['FIPS']

# load NY data and insert it into our dataframe  

ny_data = pd.read_csv('/Users/avani/Dropbox/Work/dataScience/Insight/PredictLyme/data/nyc_all.csv')
nydata = ny_data.dropna()

inpdf['counties'] = inpdf.Ctyname.apply(lambda x: x.split('County')[0].strip().lower())
nydata['counties'] = nydata.County.apply(lambda x: x.strip().lower())

allindxs = []
cases = []

for county in nydata.counties:
    indxs = inpdf.index[(inpdf.counties==county) & (inpdf.Stname=='New York')].to_list()
    allindxs.append(indxs)
    for indx in indxs:
        tdf = nydata[nydata['counties']==county]
        cases.append(tdf[str(int(inpdf.iloc[indx]['YR']))])

allindxs_open = [item for sublist in allindxs for item in sublist]
cases[583] = 510 # figure out whats happening 
for i in range(len(allindxs_open)):
    ele = allindxs_open[i]
    inpdf.at[ele,'Cases'] = cases[i]
    
inpdf['Stfips'] = inpdf['STCODE'].apply(lambda x: str(x).zfill(2))
inpdf['Ctfips'] = inpdf['CTYCODE'].apply(lambda x: str(x).zfill(3))
inpdf['FIPS'] = inpdf['Stfips'] + inpdf['Ctfips']

inpdf = inpdf.set_index('FIPS').join(FIPS_POP.set_index('FIPS'))
inpdf.reset_index()

inpdf['FIPS'] = inpdf['Stfips'] + inpdf['Ctfips']

inpdf = inpdf.set_index('FIPS').join(census_temp.set_index('FIPS'))

inpdf['Cases_norm'] = inpdf['Cases']/inpdf['TOT_POP']

inpdf['tavg']= inpdf[['JAN_TMPC', 'FEB_TMPC', 'MAR_TMPC', 'APR_TMPC', 'MAY_TMPC', 'JUN_TMPC', 'JUL_TMPC', 'AUG_TMPC', 
                     'SEP_TMPC', 'OCT_TMPC', 'NOV_TMPC', 'DEC_TMPC']].mean(axis=1)

inpdf['tmax']= inpdf[['JAN_TMAX', 'FEB_TMAX', 'MAR_TMAX', 'APR_TMAX', 'MAY_TMAX', 'JUN_TMAX', 'JUL_TMAX', 'AUG_TMAX', 
                     'SEP_TMAX', 'OCT_TMAX', 'NOV_TMAX', 'DEC_TMAX']].mean(axis=1)

inpdf['tmin']= inpdf[['JAN_TMIN', 'FEB_TMIN', 'MAR_TMIN', 'APR_TMIN', 'MAY_TMIN', 'JUN_TMIN', 'JUL_TMIN', 'AUG_TMIN', 
                     'SEP_TMIN', 'OCT_TMIN', 'NOV_TMIN', 'DEC_TMIN']].mean(axis=1)

inpdf['pcpn']= inpdf[['JAN_PCPN', 'FEB_PCPN', 'MAR_PCPN', 'APR_PCPN', 'MAY_PCPN', 'JUN_PCPN', 'JUL_PCPN', 'AUG_PCPN', 
                     'SEP_PCPN', 'OCT_PCPN', 'NOV_PCPN', 'DEC_PCPN']].mean(axis=1)
  
inpdf.to_csv('./data/alldata.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [None]:
# selecting out features and states 

#imp_columns = ['Stname','TOT_POP', 'Cases','FIPS', 'CENSUSAREA','long','lat','YR', 'tavg', 'pcpn', 'tmax', 'tmin', 'Forest & Woodland', 'Shrub & Herb Vegetation']#
               #'Desert & Semi-Desert', 'Introduced & Semi Natural Vegetation','Recently Disturbed or Modified','Open Water','Developed & Other Human Use']

list_val_states = ['New York', 'Massachusetts', 'Pennsylvania', 'Connecticut', 'Michigan', 'Maine', 'New Hampshire', 'Vermont']

val_df = inpdf[inpdf['Stname'].isin(list_val_states)]
val_df.to_csv('./data/val_data.csv')


## Load the climate projections

In [814]:
fips_temp = finaldf[finaldf.ST_new==1]['FIPS'].unique()

In [1204]:
list_files = os.listdir('./data/CMIPS_data/')

list_cols = ['YEAR','JAN_PCPN', 'FEB_PCPN', 'MAR_PCPN', 'APR_PCPN', 'MAY_PCPN', 'JUN_PCPN',
       'JUL_PCPN', 'AUG_PCPN', 'SEP_PCPN', 'OCT_PCPN', 'NOV_PCPN', 'DEC_PCPN',
       'JAN_TMPC', 'FEB_TMPC', 'MAR_TMPC', 'APR_TMPC', 'MAY_TMPC',
       'JUN_TMPC', 'JUL_TMPC', 'AUG_TMPC', 'SEP_TMPC', 'OCT_TMPC', 'NOV_TMPC',
       'DEC_TMPC', 'JAN_TMIN', 'FEB_TMIN', 'MAR_TMIN', 'APR_TMIN', 'MAY_TMIN',
       'JUN_TMIN', 'JUL_TMIN', 'AUG_TMIN', 'SEP_TMIN', 'OCT_TMIN', 'NOV_TMIN',
       'DEC_TMIN', 'JAN_TMAX', 'FEB_TMAX', 'MAR_TMAX', 'APR_TMAX', 'MAY_TMAX',
       'JUN_TMAX', 'JUL_TMAX', 'AUG_TMAX', 'SEP_TMAX', 'OCT_TMAX', 'NOV_TMAX',
       'DEC_TMAX','FIPS']

projectiondf =  pd.DataFrame(columns=list_cols)

for file in list_files:
    
    clim = pd.read_csv('./data/CMIPS_data/'+file, skiprows=1, nrows=1440)

    nfips = int((len(clim.columns)-1)/4)
    clim = clim[clim.columns[0:nfips+1]]
    fips = clim.columns[1:]

    prpn_df, tavg_df, tmax_df, tmin_df = clim.iloc[1:361,:],clim.iloc[364:724,:],clim.iloc[727:1087,:],clim.iloc[1090:,:]

    prpn_df['year'] = prpn_df['Unnamed: 0'].apply(lambda x: int(x[0:4]))    
    tavg_df['year'] = tavg_df['Unnamed: 0'].apply(lambda x: int(x[0:4])) 
    tmin_df['year'] = tmin_df['Unnamed: 0'].apply(lambda x: int(x[0:4])) 
    tmax_df['year'] = tmax_df['Unnamed: 0'].apply(lambda x: int(x[0:4])) 

    prpn_df = prpn_df[(prpn_df['year']>=2017) & (prpn_df['year']<=2025)]
    tavg_df = tavg_df[(tavg_df['year']>=2017) & (tavg_df['year']<=2025)]
    tmin_df = tmin_df[(tmin_df['year']>=2017) & (tmin_df['year']<=2025)]
    tmax_df = tmax_df[(tmax_df['year']>=2017) & (tmax_df['year']<=2025)]

    prpn_df['month'] = prpn_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7]))    
    tavg_df['month'] = tavg_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7])) 
    tmin_df['month'] = tmin_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7])) 
    tmax_df['month'] = tmax_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7])) 

    prpn_df['month'] = prpn_df['month'] + '_PCPN'
    tmax_df['month'] = tmax_df['month'] + '_TMAX'
    tmin_df['month'] = tmin_df['month'] + '_TMIN'
    tavg_df['month'] = tavg_df['month'] + '_TMPC'

    for year in prpn_df.year.unique():

        temp = prpn_df[prpn_df['year']==year]
        temp0 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp0['FIPS']  = temp0.index
        temp0 = temp0.reset_index(drop=True)

        temp = tmax_df[tmax_df['year']==year]
        temp1 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp1['FIPS']  = temp1.index
        temp1 = temp1.reset_index(drop=True)

        temp = tavg_df[tavg_df['year']==year]
        temp2 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp2['FIPS']  = temp2.index
        temp2 = temp2.reset_index(drop=True)

        temp = tmin_df[tmin_df['year']==year]
        temp3 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp3['FIPS']  = temp3.index
        temp3 = temp3.reset_index(drop=True)

        merged = temp1.set_index('FIPS').join(temp0.set_index('FIPS')).join(temp2.set_index('FIPS')).join(temp3.set_index('FIPS'))
        merged['YEAR'] = year
        merged['FIPS'] = merged.index
        
        projectiondf = projectiondf.append(merged)

projectiondf['FIPS'] = projectiondf.index
projectiondf.to_csv('./climate_projections.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
of pandas will c

In [56]:
list_files = ['CMIPS_ct27.csv',
 'CMIPS_ct33.csv',
 'CMIPS_ct26.csv',
 'CMIPS_ct54_55_56.csv',
 'CMIPS_ct36.csv',
 'CMIPS_ct37.csv',
 'CMIPS_ct45_46.csv',
 'CMIPS_ct16_18.csv',
 'CMIPS_ct145.csv',
 'CMIPS_ct28_32.csv',
 'CMIPS_ct54_55_56 (1).csv',
 'CMIPS_ct45_46 (1).csv',
 'CMIPS_ct34_35.csv',
 'CMIPS_ct51.csv',
 'CMIPS_ct19_25.csv',
 'CMIPS_ct46_50.csv',
 'CMIPS_ct19_24.csv',
 'CMIPS_ct38_39.csv',
 'CMIPS_ct10_13.csv',
 'CMIPS_ct89.csv',
 'CMIPS_ct19_22.csv',
 'CMIPS_ct40_42.csv',
 'CMIPS_ct6.csv']

list_cols = ['YEAR','JAN_PCPN', 'FEB_PCPN', 'MAR_PCPN', 'APR_PCPN', 'MAY_PCPN', 'JUN_PCPN',
       'JUL_PCPN', 'AUG_PCPN', 'SEP_PCPN', 'OCT_PCPN', 'NOV_PCPN', 'DEC_PCPN',
       'JAN_TMPC', 'FEB_TMPC', 'MAR_TMPC', 'APR_TMPC', 'MAY_TMPC',
       'JUN_TMPC', 'JUL_TMPC', 'AUG_TMPC', 'SEP_TMPC', 'OCT_TMPC', 'NOV_TMPC',
       'DEC_TMPC', 'JAN_TMIN', 'FEB_TMIN', 'MAR_TMIN', 'APR_TMIN', 'MAY_TMIN',
       'JUN_TMIN', 'JUL_TMIN', 'AUG_TMIN', 'SEP_TMIN', 'OCT_TMIN', 'NOV_TMIN',
       'DEC_TMIN', 'JAN_TMAX', 'FEB_TMAX', 'MAR_TMAX', 'APR_TMAX', 'MAY_TMAX',
       'JUN_TMAX', 'JUL_TMAX', 'AUG_TMAX', 'SEP_TMAX', 'OCT_TMAX', 'NOV_TMAX',
       'DEC_TMAX','FIPS']

projectiondf =  pd.DataFrame(columns=list_cols)

for file in list_files:
    
    clim = pd.read_csv('./data/raw/CMIPS_data/'+file, skiprows=1, nrows=1440)

    nfips = int((len(clim.columns)-1)/4)
    clim = clim[clim.columns[0:nfips+1]]
    fips = clim.columns[1:]

    prpn_df, tavg_df, tmax_df, tmin_df = clim.iloc[1:361,:],clim.iloc[364:724,:],clim.iloc[727:1087,:],clim.iloc[1090:,:]

    prpn_df['year'] = prpn_df['Unnamed: 0'].apply(lambda x: int(x[0:4]))    
    tavg_df['year'] = tavg_df['Unnamed: 0'].apply(lambda x: int(x[0:4])) 
    tmin_df['year'] = tmin_df['Unnamed: 0'].apply(lambda x: int(x[0:4])) 
    tmax_df['year'] = tmax_df['Unnamed: 0'].apply(lambda x: int(x[0:4])) 

    prpn_df = prpn_df[(prpn_df['year']>=2000) & (prpn_df['year']<=2025)]
    tavg_df = tavg_df[(tavg_df['year']>=2000) & (tavg_df['year']<=2025)]
    tmin_df = tmin_df[(tmin_df['year']>=2000) & (tmin_df['year']<=2025)]
    tmax_df = tmax_df[(tmax_df['year']>=2000) & (tmax_df['year']<=2025)]

    prpn_df['month'] = prpn_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7]))    
    tavg_df['month'] = tavg_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7])) 
    tmin_df['month'] = tmin_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7])) 
    tmax_df['month'] = tmax_df['Unnamed: 0'].apply(lambda x: month_map(x[5:7])) 

    prpn_df['month'] = prpn_df['month'] + '_PCPN'
    tmax_df['month'] = tmax_df['month'] + '_TMAX'
    tmin_df['month'] = tmin_df['month'] + '_TMIN'
    tavg_df['month'] = tavg_df['month'] + '_TMPC'

    for year in prpn_df.year.unique():

        temp = prpn_df[prpn_df['year']==year]
        temp0 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp0['FIPS']  = temp0.index
        temp0 = temp0.reset_index(drop=True)

        temp = tmax_df[tmax_df['year']==year]
        temp1 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp1['FIPS']  = temp1.index
        temp1 = temp1.reset_index(drop=True)

        temp = tavg_df[tavg_df['year']==year]
        temp2 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp2['FIPS']  = temp2.index
        temp2 = temp2.reset_index(drop=True)

        temp = tmin_df[tmin_df['year']==year]
        temp3 = temp.drop(labels=['Unnamed: 0', 'year'], axis=1).set_index(keys='month').transpose()
        temp3['FIPS']  = temp3.index
        temp3 = temp3.reset_index(drop=True)

        merged = temp1.set_index('FIPS').join(temp0.set_index('FIPS')).join(temp2.set_index('FIPS')).join(temp3.set_index('FIPS'))
        merged['YEAR'] = year
        merged['FIPS'] = merged.index
        
        projectiondf = projectiondf.append(merged)

projectiondf['FIPS'] = projectiondf.index
projectiondf.to_csv('./climate_projections_all.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
of pandas will c