In [30]:
#!/usr/bin/env python
"""
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples

Please see the test function for the expected return format

"""

from zipfile import ZipFile
import os
import functools

import xlrd
import pandas as pd


datafile = "dataset/2013_ERCOT_Hourly_Load_Data"


def open_zip(datafile):
    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
        myzip.extractall('dataset')


def parse_file(datafile):
    workbook = xlrd.open_workbook('{0}.xls'.format(datafile))
    sheet = workbook.sheet_by_index(0)

    ### example on how you can get the data
    #sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]

    ### other useful methods:
    # print "\nROWS, COLUMNS, and CELLS:"
    # print "Number of rows in the sheet:", 
    # print sheet.nrows
    # print "Type of data in cell (row 3, col 2):", 
    # print sheet.cell_type(3, 2)
    # print "Value in cell (row 3, col 2):", 
    # print sheet.cell_value(3, 2)
    # print "Get a slice of values in column 3, from rows 1-3:"
    # print sheet.col_values(3, start_rowx=1, end_rowx=4)

    # print "\nDATES:"
    # print "Type of data in cell (row 1, col 0):", 
    # print sheet.cell_type(1, 0)
    # exceltime = sheet.cell_value(1, 0)
    # print "Time in Excel format:",
    # print exceltime
    # print "Convert time to a Python datetime tuple, from the Excel float:",
    # print xlrd.xldate_as_tuple(exceltime, 0)
    
    print "Number of rows:", sheet.nrows
    
    #Convert Excel dates to tuples
    convert_date = functools.partial(xlrd.xldate_as_tuple, datemode=0)
    
    #Read second column
    coast = sheet.col_values(1, start_rowx=1, end_rowx=sheet.nrows)
    timestamp = sheet.col_values(0, start_rowx=1, end_rowx=sheet.nrows)
    df = pd.DataFrame({"timestamp": timestamp, "coast": coast})
    max_index = df.idxmax()["coast"]
    min_index = df.idxmin()["coast"]
    
    data = {
            'maxtime': convert_date(df.iloc[max_index]["timestamp"]),
            'maxvalue': df.iloc[max_index]["coast"],
            'mintime': convert_date(df.iloc[min_index]["timestamp"]),
            'minvalue': df.iloc[min_index]["coast"],
            'avgcoast': df["coast"].mean()
    }
    return data


def test():
    open_zip(datafile)
    data = parse_file(datafile)
    os.remove('{0}.xls'.format(datafile))
    

    assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
    assert round(data['maxvalue'], 10) == round(18779.02551, 10)


test()

Number of rows: 7296
