In [1]:
import pandas as pd
import numpy as np
import gdal
from gdalconst import * 
import os, sys, time
import xlsxwriter

In [2]:
def getValues(rasterDS, locData, rasterName):
    size = len(locData)
    startTime = time.time() #if timing the function speed is really necessary
    xValues = np.array(locData.loc[:, 'Longitude'])
    yValues = np.array(locData.loc[:, 'Latitude'])
    rows = rasterDS.RasterYSize #number of rows
    cols = rasterDS.RasterXSize # number of columns
    bands = rasterDS.RasterCount # number of data value types (it's going to be 1 for our case)
    
    # get georeference info
    transform = rasterDS.GetGeoTransform()
    xOrigin = transform[0]
    yOrigin = transform[3]
    pixelWidth = transform[1]
    pixelHeight = transform[5]
    
    band = rasterDS.GetRasterBand(1)
    # getting all data at once
    allData = band.ReadAsArray(0,0, cols, rows)
    
    #Result dataset
    reqDataset = locData
    reqDataset[rasterName] = 'nan'
    
    for i in range(size):
        # get x, y
        x = xValues[i]
        y = yValues[i]
        # compute pixel offset
        xOffset = int((x - xOrigin) / pixelWidth)
        yOffset = int((y - yOrigin) / pixelHeight)
        value   = allData[yOffset, xOffset]
        reqDataset.loc[i, rasterName] = value
    endTime = time.time()
    print 'The script took ' + str(endTime - startTime) + 'seconds'
    
    return reqDataset

In [3]:
# baseFile = 'C:\\Users\\Allan\\Dropbox\\2015\\Research Data\\Weather Variables\\'
# coordDataset = dataset
# link = '\\RasterFiles\\'
def getDataset(RasterID, cat, coordDataset, baseFile, link):
    rasterName = str(RasterID) # RasterID might be either a string(month) or an int (year)
    fileName = baseFile + cat + link + rasterName + '.asc'
    ds = gdal.Open(fileName)
    if ds is None:
        print 'could not open ' + fileName
        sys.exit(1)
    completeDataset = getValues(ds, coordDataset, rasterName)
#     return completeDataset

In [4]:
def RasterFilesData2Excel(variable, coordDataFrame):
    baseFile = '../RasterFiles/'
    resFolder = '../Weather Variables/'
    cal      = {1:'jan', 2: 'feb', 3:'mar', 4:'apr', 5:'may', 6:'jun', 7:'jul', 8:'aug', 9: 'sep',
               10:'oct', 11:'nov', 12:'dec'}
    if variable == 'Precipitation':
        link = '/'
        for year in np.arange(33)+1981:
            getDataset(year, variable, coordDataFrame, baseFile, link)
        fileName = resFolder+ 'Ppt/' + 'PrecipitationRecord1981_2013.xlsx'
        writer = pd.ExcelWriter(fileName)
        coordDataFrame.to_excel(writer)
    elif variable == 'MinTemp':
        fileName = resFolder + variable + '/'
        for year in np.arange(33)+1981:
            link = '/' + str(year)+'/'
            record = fileName + str(year) + '.xlsx'
            for month in np.arange(12)+1:
                getDataset(cal[month], variable, coordDataFrame, baseFile, link)
            writer = pd.ExcelWriter(record)
            coordDataFrame.to_excel(writer, sheet_name=str(year))
            coordDataFrame = dataset[['UID', 'Longitude', 'Latitude']]
    elif variable == 'MaxTemp':
        fileName = resFolder + variable + '/'
        for year in np.arange(33)+1981:
            link = '/' + str(year)+'/' 
            record = fileName + str(year) + '.xlsx'
            for month in np.arange(12)+1:
                getDataset(cal[month], variable, coordDataFrame, baseFile, link)
            writer = pd.ExcelWriter(record)
            coordDataFrame.to_excel(writer, sheet_name=str(year))
            coordDataFrame = dataset[['UID', 'Longitude', 'Latitude']]
    else:
        print 'Incorrent variable name. The accepted variable names include "Precipitation", "MinTemp", and "MaxTemp"' 
        
    

In [5]:
dataFile = '../FinalDatasets/RCsegCoord.csv'
dataset = pd.read_csv(dataFile)

In [6]:
dataset = dataset[~np.isnan(dataset.Latitude)]
print len(dataset)

8938


In [7]:
PrecipitationDS = dataset[['UID', 'Longitude', 'Latitude']]
MinTemperatureDS = dataset[['UID', 'Longitude', 'Latitude']]
MaxTemperatureDS = dataset[['UID', 'Longitude', 'Latitude']]

In [8]:
RasterFilesData2Excel('MinTemp', MinTemperatureDS)

The script took 5.07044196129seconds
The script took 4.90995287895seconds
The script took 4.93622684479seconds
The script took 5.13632702827seconds
The script took 4.89945292473seconds
The script took 4.78821897507seconds
The script took 4.91942191124seconds
The script took 4.77702999115seconds
The script took 4.78042411804seconds
The script took 4.99941897392seconds
The script took 4.830534935seconds
The script took 4.91498303413seconds
The script took 4.90936684608seconds
The script took 5.00675487518seconds
The script took 4.87059998512seconds
The script took 4.87580800056seconds
The script took 5.06999897957seconds
The script took 4.8216958046seconds
The script took 5.13943791389seconds
The script took 5.08776712418seconds
The script took 4.85321784019seconds
The script took 4.88320803642seconds
The script took 4.83705806732seconds
The script took 4.84818100929seconds
The script took 5.02358484268seconds
The script took 4.96905493736seconds
The script took 4.85164904594seconds
The 

In [9]:
RasterFilesData2Excel('MaxTemp', MaxTemperatureDS)

The script took 5.21338701248seconds
The script took 5.10265016556seconds
The script took 5.02900314331seconds
The script took 5.09801387787seconds
The script took 5.19872403145seconds
The script took 4.99088191986seconds
The script took 5.10726499557seconds
The script took 4.84316110611seconds
The script took 5.01953220367seconds
The script took 4.82884192467seconds
The script took 4.979377985seconds
The script took 4.96337008476seconds
The script took 5.06094098091seconds
The script took 4.97843503952seconds
The script took 4.90611696243seconds
The script took 4.9989759922seconds
The script took 4.8612241745seconds
The script took 5.01239705086seconds
The script took 5.08991289139seconds
The script took 5.04675292969seconds
The script took 5.0979950428seconds
The script took 4.89047288895seconds
The script took 5.38468599319seconds
The script took 4.91452693939seconds
The script took 5.18380498886seconds
The script took 5.27259397507seconds
The script took 5.5795879364seconds
The scr

In [10]:
RasterFilesData2Excel('Precipitation', PrecipitationDS)

The script took 5.22712802887seconds
The script took 5.06426787376seconds
The script took 5.35473299026seconds
The script took 5.53332495689seconds
The script took 5.14174509048seconds
The script took 4.99912190437seconds
The script took 5.39941692352seconds
The script took 4.90033984184seconds
The script took 5.09567809105seconds
The script took 4.96463799477seconds
The script took 4.84960508347seconds
The script took 5.00336194038seconds
The script took 4.9426779747seconds
The script took 4.93101191521seconds
The script took 5.57266402245seconds
The script took 5.42503190041seconds
The script took 5.26443195343seconds
The script took 5.09195995331seconds
The script took 4.9267179966seconds
The script took 5.18599891663seconds
The script took 5.41093397141seconds
The script took 5.04749798775seconds
The script took 5.21337604523seconds
The script took 5.04606103897seconds
The script took 5.39946293831seconds
The script took 6.46383714676seconds
The script took 5.88940191269seconds
The