In [1]:
import pandas as pd
import numpy as np
import gdal
from gdalconst import * 
import os, sys, time
import xlsxwriter

The minimum supported version is 2.1



In [18]:
def getValues(rasterDS, locData, rasterName):
    size = len(locData)
    startTime = time.time() #if timing the function speed is really necessary
    xValues = locData.loc[:, 'Longitude']
    yValues = locData.loc[:, 'Latitude']
    rows = rasterDS.RasterYSize #number of rows
    cols = rasterDS.RasterXSize # number of columns
    bands = rasterDS.RasterCount # number of data value types (it's going to be 1 for our case)
    
    # get georeference info
    transform = rasterDS.GetGeoTransform()
    xOrigin = transform[0]
    yOrigin = transform[3]
    pixelWidth = transform[1]
    pixelHeight = transform[5]
    
    band = rasterDS.GetRasterBand(1)
    # getting all data at once
    allData = band.ReadAsArray(0,0, cols, rows)
    
    #Result dataset
    reqDataset = locData
    reqDataset[rasterName] = 'nan'
    
    for i in range(size):
        # get x, y
        x = xValues[i]
        y = yValues[i]
        # compute pixel offset
        xOffset = int((x - xOrigin) / pixelWidth)
        yOffset = int((y - yOrigin) / pixelHeight)
        value   = allData[yOffset, xOffset]
        reqDataset.loc[i, rasterName] = value
    endTime = time.time()
    print 'The script took ' + str(endTime - startTime) + 'seconds'
    
    return reqDataset

In [10]:
# baseFile = 'C:\\Users\\Allan\\Dropbox\\2015\\Research Data\\Weather Variables\\'
# coordDataset = dataset
# link = '\\RasterFiles\\'
def getDataset(RasterID, cat, coordDataset, baseFile, link):
    rasterName = str(RasterID) # RasterID might be either a string(month) or an int (year)
    fileName = baseFile + cat + link + rasterName + '.asc'
    ds = gdal.Open(fileName)
    if ds is None:
        print 'could not open ' + fileName
        sys.exit(1)
    completeDataset = getValues(ds, coordDataset, rasterName)
#     return completeDataset

In [12]:
def RasterFilesData2Excel(variable, coordDataFrame):
    baseFile = '../RasterFiles/'
    resFolder = '../Weather Variables/'
    cal      = {1:'jan', 2: 'feb', 3:'mar', 4:'apr', 5:'may', 6:'jun', 7:'jul', 8:'aug', 9: 'sep',
               10:'oct', 11:'nov', 12:'dec'}
    if variable == 'Precipitation':
        link = '/'
        for year in np.arange(33)+1981:
            getDataset(year, variable, coordDataFrame, baseFile, link)
        fileName = resFolder+ 'Ppt/' + 'PrecipitationRecord1981_2013.xlsx'
        writer = pd.ExcelWriter(fileName)
        coordDataFrame.to_excel(writer)
    elif variable == 'MinTemp':
        fileName = resFolder + variable + '/'
        for year in np.arange(33)+1981:
            link = '/' + str(year)+'/'
            record = fileName + str(year) + '.xlsx'
            for month in np.arange(12)+1:
                getDataset(cal[month], variable, coordDataFrame, baseFile, link)
            writer = pd.ExcelWriter(record)
            coordDataFrame.to_excel(writer, sheet_name=str(year))
            coordDataFrame = dataset[['uid', 'Longitude', 'Latitude']]
    elif variable == 'MaxTemp':
        fileName = resFolder + variable + '/'
        for year in np.arange(33)+1981:
            link = '/' + str(year)+'/' 
            record = fileName + str(year) + '.xlsx'
            for month in np.arange(12)+1:
                getDataset(cal[month], variable, coordDataFrame, baseFile, link)
            writer = pd.ExcelWriter(record)
            coordDataFrame.to_excel(writer, sheet_name=str(year))
            coordDataFrame = dataset[['uid', 'Longitude', 'Latitude']]
    else:
        print 'Incorrent variable name. The accepted variable names include "Precipitation", "MinTemp", and "MaxTemp"' 
        
    

In [19]:
dataFile = '..DataFiles/segCoord.csv'
dataset = pd.read_csv(dataFile)

In [20]:
dataset.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,uid,Longitude,Latitude
0,0,0,43213,-124.051402,46.432835
1,1,1,43249,-124.050016,46.490332
2,2,2,43153,-124.054758,46.346036
3,3,3,43158,-124.054019,46.359911
4,4,4,43163,-124.054019,46.359911


In [21]:
PrecipitationDS = dataset[['uid', 'Longitude', 'Latitude']]
MinTemperatureDS = dataset[['uid', 'Longitude', 'Latitude']]
MaxTemperatureDS = dataset[['uid', 'Longitude', 'Latitude']]

In [22]:
RasterFilesData2Excel('MinTemp', MinTemperatureDS)

The script took 54.6759998798seconds
The script took 1.46700000763seconds
The script took 1.42000007629seconds
The script took 1.41899991035seconds
The script took 1.40399980545seconds
The script took 1.45100021362seconds
The script took 1.45100021362seconds
The script took 1.43499994278seconds
The script took 1.46600008011seconds
The script took 1.46600008011seconds
The script took 1.4509999752seconds
The script took 1.49799990654seconds
The script took 82.3010001183seconds
The script took 1.45199990273seconds
The script took 1.41299986839seconds
The script took 1.4319999218seconds
The script took 1.43099999428seconds
The script took 1.42900013924seconds
The script took 1.43000006676seconds
The script took 1.4319999218seconds
The script took 1.42700004578seconds
The script took 1.44799995422seconds
The script took 1.42199993134seconds
The script took 1.42100000381seconds
The script took 82.6410000324seconds
The script took 1.4509999752seconds
The script took 1.42000007629seconds
The s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [23]:
RasterFilesData2Excel('MaxTemp', MaxTemperatureDS)

The script took 52.0270001888seconds
The script took 1.40400004387seconds
The script took 1.37300014496seconds
The script took 1.38800001144seconds
The script took 1.38800001144seconds
The script took 1.37299990654seconds
The script took 1.37299990654seconds
The script took 1.38899993896seconds
The script took 1.38800001144seconds
The script took 1.37299990654seconds
The script took 1.37300014496seconds
The script took 1.37300014496seconds
The script took 78.3540000916seconds
The script took 1.4430000782seconds
The script took 1.37299990654seconds
The script took 1.37299990654seconds
The script took 1.37199997902seconds
The script took 1.37299990654seconds
The script took 1.38800001144seconds
The script took 1.37300014496seconds
The script took 1.37199997902seconds
The script took 1.38800001144seconds
The script took 1.38799977303seconds
The script took 1.37199997902seconds
The script took 78.2809998989seconds
The script took 1.40400004387seconds
The script took 1.37299990654seconds
Th

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [24]:
RasterFilesData2Excel('Precipitation', PrecipitationDS)

The script took 52.0729999542seconds
The script took 1.42000007629seconds
The script took 1.40400004387seconds
The script took 1.38899993896seconds
The script took 1.38899993896seconds
The script took 1.40400004387seconds
The script took 1.38800001144seconds
The script took 1.38800001144seconds
The script took 1.40400004387seconds
The script took 1.40399980545seconds
The script took 1.38800001144seconds
The script took 1.38800001144seconds
The script took 1.40400004387seconds
The script took 1.38800001144seconds
The script took 1.40399980545seconds
The script took 1.38800001144seconds
The script took 1.38899993896seconds
The script took 1.38900017738seconds
The script took 1.40400004387seconds
The script took 1.38899993896seconds
The script took 1.40399980545seconds
The script took 1.40399980545seconds
The script took 1.38900017738seconds
The script took 1.40400004387seconds
The script took 1.38899993896seconds
The script took 1.40400004387seconds
The script took 1.40400004387seconds
T

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
