In [1]:
import numpy as np
import pandas as pd

In [2]:
def create_grid_coords():
    """Create a vector of gridded coordinates.
        
    returns: (x , y , coords) where
        x is an RxC ndarray of xcoords for each cimis grid centroid
        y is an RxC ndarray of ycoords for each cimis grid centroid
        coords is an RxC chararray of unique identifiers of the form for 'XxYy' (e.g. '-409000x459000y')
            with X representing the xcoord of that grid cell and Y the ycoord of the grid cell
    """
    # create matrix of x coordinates for cells in EPSG 3310
    x = np.ndarray((nrows,ncols)) + xllcenter
    offsets = cellsize*np.arange(ncols)
    x = x + offsets
    
    # create matrix of y coordinates for cells in EPSG 3310
    y = np.ndarray((nrows,ncols)) + yllcenter
    offsets = cellsize*np.arange(nrows)
    y = np.flipud(np.transpose(np.transpose(y) + offsets))
    
    # create matrix of unique identifiers for each cell by appending x and y coords into a string
    xcoords = np.core.defchararray.add(x.astype(int).astype(str), "x" )
    ycoords = np.core.defchararray.add(y.astype(int).astype(str), "y" )
    coords = np.core.defchararray.add(xcoords, ycoords)
    
    return (x, y, coords)

In [3]:
def get_formatted_url(year, month, day, data_field):
    """Create a formatted url to query for a single Spatial CIMIS data file.
    
    returns: url a formatted url string
    """
    url = "http://cimis.casil.ucdavis.edu/cimis/{}/{:02d}/{:02d}/{}.asc.gz".format(year, month, day, data_field)
    return url

In [4]:
def parse_spatial_cimis_field(url):
    """Fetch Spatial CIMIS data in asc format from the url and parse into a 'long' output vector.
    
    return: CIMIS data reshaped into a single pandas Series (row-major format)
    """
    df = pd.read_csv("../../../../Dropbox/CIMIS/ET2003_02_20.asc", sep=" ", header=None, skiprows=6)
    #df = pd.read_csv(url, compression="gzip", sep=" ", header=None, skiprows=6)
    df = df.loc[:,df.mean().notnull()]
    
    return df.values.reshape(total_cells)

In [5]:
def get_tidy_cimis_daily(year, month, day, coord_vec=None):
    """Query all of the different Spatial CIMIS data fields for a single day and
    assemble them into a dataframe format suitable for storing in a relational database. 
    This includes the addition of contextual fields like coordinates, date fields,
    and a unique identifier.
    
    returns: output a pandas dataframe where each row holds all of the measurements for a given grid cell
        on a particular date.
    """
    
    print "Parsing data for", pd.datetime(year, month, day)
    
    if coord_vec is None:
        coord_vec = create_grid_coords()
        
    x, y, coords = coord_vec
    
    out_dict = {
        "sc_cell_id": coords.reshape(total_cells),
        "sc_x": x.reshape(total_cells),
        "sc_y": y.reshape(total_cells),
        "sc_year": year,
        "sc_month": month,
        "sc_day": day,
        "sc_date": pd.datetime(year, month, day)
    }
    
    for field in fieldlist:
        url = get_formatted_url(year, month, day, field)
        fieldname = "sc_{}".format(field).lower()
        out_dict[fieldname] = parse_spatial_cimis_field(url)
    
    # assemble output dataframe by melting/reshaping above matrices into single columns
    output = pd.DataFrame(out_dict)
    
    return output

In [6]:
def get_tidy_cimis_daterange(start_date=None, end_date=None, coord_vec=None):

    if start_date < pd.datetime("2003-02-20"):
        raise ValueError("no data available before 2003-02-20")
    
    if start_date > end_date:
        raise ValueError("start_date must be <= end_date")
    
    print "Fetching data from", start_date, "to", end_date
    
    if coord_vec is None:
        coord_vec = create_grid_coords()
        
    #TODO iterate over the dates in the range and call tidy_cimis_daily for each

In [7]:
ncols = 510
nrows = 560
xllcorner = -410000
yllcorner = -660000
cellsize = 2000.000000

total_cells = nrows*ncols
xllcenter = xllcorner + int(cellsize/2)
yllcenter = yllcorner + int(cellsize/2)

coord_vec = create_grid_coords()

year = 2003
month = 2
day = 20

fieldlist = ["ETo", "K", "Rnl", "Rs", "Rso", "Tdew", "Tn", "Tx", "U2"]

In [8]:
get_tidy_cimis_daily(2003, 2, 20, coord_vec)

Parsing data for 2003-02-20 00:00:00


Unnamed: 0,sc_cell_id,sc_date,sc_day,sc_eto,sc_k,sc_month,sc_rnl,sc_rs,sc_rso,sc_tdew,sc_tn,sc_tx,sc_u2,sc_x,sc_y,sc_year
0,-409000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-409000.0,459000.0,2003
1,-407000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-407000.0,459000.0,2003
2,-405000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-405000.0,459000.0,2003
3,-403000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-403000.0,459000.0,2003
4,-401000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-401000.0,459000.0,2003
5,-399000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-399000.0,459000.0,2003
6,-397000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-397000.0,459000.0,2003
7,-395000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-395000.0,459000.0,2003
8,-393000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-393000.0,459000.0,2003
9,-391000x459000y,2003-02-20,20,-9999.0,-9999.0,2,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-391000.0,459000.0,2003
