# Part 0: Libraries:

In [1]:
import requests 
import json
import csv 
import urllib2 
import pandas as pd 

# Part 1: Functions:

In [2]:
#Function 1: 
def generate_counties_df(state_name):
    """
    INPUT: state_name (str, name of the state)
    OUTPUT: counties_df (df that has columns of county name, state name, and respective FIPS ID)
    OVERVIEW: generating counties and their corresbonding location ids (FIPS) in pd form
    """
    column_names = ['Name', 'ID']
    counties_df = pd.read_csv("Data/Weather/Locations/{}_counties.txt".format(state_name),names=column_names)
    
    counties_df["State"] = [ID.split("(")[0] for ID in counties_df["ID"]]
    #remove ")" from end of the string:
    counties_df["ID"] = counties_df["ID"].apply(lambda x: x.replace(")", "")) 
    #FIPS ID is the ID of the NOAA weather station associated with respective counties: 
    counties_df["FIPS ID"] = [ID.split("FIPS:")[1] for ID in counties_df["ID"]]
    #capitalized county name: 
    counties_df["County"] = [name.split(" County")[0].upper() for name in counties_df["Name"]]
    #dropping unnecessary columns:
    counties_df.drop(column_names, axis=1, inplace=True)
    return counties_df

In [3]:
#function 2:
def GetDataFromUrl(url, filename, data_category, token):
    """
    INPUT: url (str), filename (str), data_category (str), token (str)
    OUTPUT: N/A (no return but generates json file)
    OVERVIEW: grab file with url and write to json file (for weather data)
    """
    headers = {"token": token}
    r = requests.get(url, headers=headers)
    print r
    with open('Data/{}/{}.json'.format(data_category, filename), 'w') as f:
        json.dump(r.json(), f)

In [4]:
#function 3: 
def json_to_df(filename, data_category):
    """
    INPUT: filename (str), data_category (str)
    OUTPUT: df (dataframe)
    OVERVIEW: read json file into pd data frame (weather)
    """
    with open('Data/{}/{}'.format(data_category, filename)) as f:
        data = json.load(f)
    df = pd.DataFrame.from_dict(data["results"])
    return df 

In [5]:
#function 4: generate weather data in csv form year by year: 
def generate_yield_data(county_id, county_name, start_year, end_year):
    locationid = master_locationid.format(county_id)
    filename = master_filename.format(start_year, end_year, county_name)
    startdate, enddate = master_startdate.format(start_year), master_enddate.format(end_year)
    yearly_url = master_url.format(datasetid=datasetid, locationid=locationid, startdate=startdate, enddate=enddate)
    try:
        print "STARTS"
        print yearly_url
        GetDataFromUrl(yearly_url, filename, data_category, weather_token)
    except: 
        print "THIS DOESN'T WORK!!!!!"
        return 
    weather_df = json_to_df("{}.json".format(filename), data_category)
    weather_df.to_csv("Data/{}/{}.csv".format(data_category, filename))

In [6]:
ca_df = generate_counties_df("CALIFORNIA")

# Part 2: Declaring Variables:

## 1. Declaring States:

In [7]:
#Targeting 10 top agriculture states (based on economic output)
#Source: http://www.ers.usda.gov/faqs.aspx (top 10 agriculture states)
targest_states = ["California", "Iowa", "Texas", "Nebraska", "Illinois",\
                  "Minnesota", "Kansas", "Indiana", "North Carolina", "Wisconsin"]
targest_states = [state.upper() for state in targest_states]

## 2. Datatype Key:

In [8]:
datatype_key = ["TPCP","TSNW","EMXT","EMNT","MNTM"]

## 3: Declaring master vriables for grabbing data from API:

In [17]:
#Master variables: 
master_filename, data_category = "weather_{}-{}_{}", "Weather"
start_year, end_year = 1960, 1960
county_id = "06"
weather_token = "lPtLBRYazwyqSgrWteXkaHStbdlWzqvV"

#i: Variable: DataSet:
#description: "Annual Summaries"
datasetid = "ANNUAL"

#ii: Variable: Locations:
master_locationid = "FIPS:{}"

#iii: Time frame: 
master_startdate, master_enddate = "{}-01-01", "{}-12-31"

#iv: Masater url:
master_url = \
"http://www.ncdc.noaa.gov/cdo-web/api/v2/"+ \
"data?datasetid={datasetid}&"+ \
"datatypeid=TPCP&TSNW&EMXT&EMNT&MNTM&"+\
"locationid={locationid}&"+\
"startdate={startdate}&enddate={enddate}&"+\
"units=metric&limit=1000"

# 2. Weather Data:

In [18]:
generate_yield_data("06", "CALIFORNIA", start_year, end_year)

STARTS
http://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=ANNUAL&datatypeid=TPCP&TSNW&EMXT&EMNT&MNTM&locationid=FIPS:06&startdate=1960-01-01&enddate=1960-12-31&units=metric&limit=1000
<Response [200]>


# 3. Archive Code: 

In [11]:
#I: Locations:
#description: "Country", "County", "State"
#locationid1, locationid2, locationid3 = "CNTRY:UNITED STATES", "CNTY", "ST"

In [19]:
df = pd.read_csv("/Users/Hsieh/Desktop/persephone/Data/Weather/weather_1960-1960_CALIFORNIA.csv")

In [25]:
ca_df["FIPS ID"]

0     06109
1     06009
2     06053
3     06071
4     06043
5     06069
6     06081
7     06013
8     06025
9     06033
10    06007
11    06005
12    06105
13    06019
14    06015
15    06061
16    06065
17    06063
18    06075
19    06107
20    06027
21    06085
22    06011
23    06057
24    06029
25    06051
26    06087
27    06113
28    06023
29    06073
30    06083
31    06001
32    06079
33    06103
34    06021
35    06041
36    06089
37    06097
38    06099
39    06017
40    06067
41    06049
42    06095
43    06091
44    06101
45    06111
46    06045
47    06047
48    06115
49    06059
Name: FIPS ID, dtype: object