# Compile weather data from weather stations by county level

In [2]:
import numpy as np
import pandas as pd
import requests
import json
import time

### Load weather data from each weather station, together with its geolocation

In [60]:
filename='2005_GSOY.csv'
year=2005
stations= pd.read_csv(filename)

In [61]:
stations.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,CDSD,DSND,DSNW,EMNT,EMSD,EMSN,EMXP,EMXT,HDSD,PRCP,SNOW,TAVG,TMAX,TMIN
0,USR0000CDIA,DIABLO GRANDE CALIFORNIA,37.3292,-121.2939,563.9,2005,1423.0,,,30.0,,,,104.0,3044.0,,,61.6,71.1,52.2
1,USR0000CTHO,THOMES CREEK CALIFORNIA,39.8644,-122.6097,317.0,2005,2200.0,,,32.0,,,,114.0,2504.0,,,64.6,75.5,53.6
2,USC00049099,TWENTYNINE PALMS,34.128,-116.0369,602.0,2005,,0.0,0.0,31.0,0.0,0.0,2.34,,1968.0,7.67,0.0,,,56.8
3,USR0000CRDR,READER RANCH CALIFORNIA,39.3036,-121.1172,599.8,2005,1315.0,,,29.0,,,,105.0,3126.0,,,60.6,73.0,48.2
4,USR0000CTHS,THOUSAND OAKS CALIFORNIA,34.21,-118.87,242.3,2005,528.0,,,37.0,,,,95.0,1964.0,,,61.4,70.6,52.2


In [62]:
Nrows= len(stations)

stations['FIPS']= ['-']*Nrows
stations['County']= ['-']*Nrows

In [63]:
for i in xrange(Nrows):
    lat= stations.iloc[i,2]
    lon= stations.iloc[i,3]

    url= 'http://data.fcc.gov/api/block/find?format=json&latitude=%s&longitude=%s' % (str(lat),str(lon))
    response= requests.get(url)
    temp= json.loads(response.text)
    time.sleep(0.1)

    stations.iat[i,20]= temp['County']['FIPS']
    stations.iat[i,21]= temp['County']['name']

In [64]:
print stations.iloc[-10:,20:]

      FIPS          County
613  06071  San Bernardino
614  06089          Shasta
615  06051            Mono
616  06097          Sonoma
617  06061          Placer
618  06105         Trinity
619  06115            Yuba
620  06043        Mariposa
621  06077     San Joaquin
622  06037     Los Angeles


### Get info about counties

In [65]:
county_info= pd.read_csv('../../county_info.csv',dtype=np.str)

### Select columns

In [66]:
cols_counties= ['Average Temperature','Avg Max Temperature','Avg Min Temperature',
          'Extreme Max Temp','Extreme Min Temp','Total Precipitation','Highest Daily Precipitation']

In [67]:
cols_stations= ['TAVG','TMAX','TMIN','EMXT','EMNT','PRCP','EMXP']

In [68]:
Ncols= len(cols_counties)
np.transpose([cols_stations,cols_counties])

array([['TAVG', 'Average Temperature'],
       ['TMAX', 'Avg Max Temperature'],
       ['TMIN', 'Avg Min Temperature'],
       ['EMXT', 'Extreme Max Temp'],
       ['EMNT', 'Extreme Min Temp'],
       ['PRCP', 'Total Precipitation'],
       ['EMXP', 'Highest Daily Precipitation']], 
      dtype='|S27')

In [69]:
weather_county= county_info.copy()

for c in cols_counties:
    weather_county[c]= [np.NaN]*len(weather_county)

print weather_county.shape

(58, 9)


In [70]:
Ncounties= len(weather_county)
for i in xrange(Ncounties):
    fips= weather_county.iloc[i,1]

    temp_df= stations[stations['FIPS']==fips]
    temp_ds= temp_df.median()

    for j in xrange(Ncols):
        weather_county.iloc[i,j+2]= temp_ds.loc[cols_stations[j]]

In [71]:
weather_county.head()

Unnamed: 0,County,FIPS,Average Temperature,Avg Max Temperature,Avg Min Temperature,Extreme Max Temp,Extreme Min Temp,Total Precipitation,Highest Daily Precipitation
0,Alameda,6001,59.55,69.35,50.4,95.5,32.0,18.85,1.91
1,Alpine,6003,41.9,51.5,33.0,79.0,6.0,51.86,3.1
2,Amador,6005,63.6,75.1,52.1,105.0,33.0,,
3,Butte,6007,62.05,71.85,49.55,106.5,28.0,77.905,6.12
4,Calaveras,6009,62.7,66.3,51.6,98.5,33.0,29.39,1.62


Which counties have no weather data

In [72]:
weather_county[weather_county.isnull().any(1)]

Unnamed: 0,County,FIPS,Average Temperature,Avg Max Temperature,Avg Min Temperature,Extreme Max Temp,Extreme Min Temp,Total Precipitation,Highest Daily Precipitation
2,Amador,6005,63.6,75.1,52.1,105.0,33.0,,
50,Sutter,6101,,,,,,,


In [76]:
stations[stations['County']=='Sutter']

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,CDSD,DSND,DSNW,EMNT,...,EMXP,EMXT,HDSD,PRCP,SNOW,TAVG,TMAX,TMIN,FIPS,County
460,USC00046194,NICOLAUS NUMBER 2,38.9261,-121.5447,13.1,2007,,0.0,0.0,,...,1.5,,,10.74,0.0,,,,6101,Sutter


In [77]:
stations[stations['County']=='Colusa']

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,CDSD,DSND,DSNW,EMNT,...,EMXP,EMXT,HDSD,PRCP,SNOW,TAVG,TMAX,TMIN,FIPS,County
393,USC00041948,COLUSA 2 SSW,39.1875,-122.0269,15.2,2007,1149.0,0.0,0.0,22.0,...,0.8,104.0,2435.0,10.3,0.0,61.3,76.0,46.5,6011,Colusa
427,USR0000CSTO,STONYFORD CALIFORNIA,39.3669,-122.575,365.8,2007,,,,,...,,,2933.0,,,,,,6011,Colusa


Saving weather data per county

In [73]:
weather_county['Year']= [year]*Ncounties

In [74]:
temp_df= weather_county.drop('FIPS',axis=1)

In [75]:
temp_df.columns

Index([u'County', u'Average Temperature', u'Avg Max Temperature',
       u'Avg Min Temperature', u'Extreme Max Temp', u'Extreme Min Temp',
       u'Total Precipitation', u'Highest Daily Precipitation', u'Year'],
      dtype='object')

In [76]:
weather_county= temp_df.reindex(columns=[u'County', u'Year', u'Average Temperature', u'Avg Max Temperature',
       u'Avg Min Temperature', u'Extreme Max Temp', u'Extreme Min Temp',
       u'Total Precipitation', u'Highest Daily Precipitation'])

In [77]:
weather_county.head()

Unnamed: 0,County,Year,Average Temperature,Avg Max Temperature,Avg Min Temperature,Extreme Max Temp,Extreme Min Temp,Total Precipitation,Highest Daily Precipitation
0,Alameda,2005,59.55,69.35,50.4,95.5,32.0,18.85,1.91
1,Alpine,2005,41.9,51.5,33.0,79.0,6.0,51.86,3.1
2,Amador,2005,63.6,75.1,52.1,105.0,33.0,,
3,Butte,2005,62.05,71.85,49.55,106.5,28.0,77.905,6.12
4,Calaveras,2005,62.7,66.3,51.6,98.5,33.0,29.39,1.62


In [78]:
weather_county.to_csv('temp_weather.csv',index=False,header=True)