In [51]:
# -*- coding: utf-8 -*-
'''
Find the time and value of max load for each of the regions
COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character | as the delimiter.

An example output can be seen in the "example.csv" file.
'''

import xlrd
import os
import csv
from zipfile import ZipFile
import functools
import datetime

import pandas as pd

datafile = "dataset/2013_ERCOT_Hourly_Load_Data"
outfile = "2013_Max_Loads.csv"


def open_zip(datafile):
    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
        myzip.extractall("dataset")


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    data = { }
    # YOUR CODE HERE
    # Remember that you can use xlrd.xldate_as_tuple(sometime, 0) to convert
    # Excel date to Python tuple of (year, month, day, hour, minute, second)
    convert_date = functools.partial(xlrd.xldate_as_tuple, datemode=0)
    timestamp = map(convert_date, sheet.col_values(0, start_rowx=1, end_rowx=sheet.nrows))
    data["timestamp"] = [datetime.datetime(t[0], t[1], t[2], t[3], t[4], t[5]) for t in timestamp]
    for i in xrange(8):
        region = sheet.cell_value(0, i + 1)
        data[region] = sheet.col_values(i + 1, start_rowx=1, end_rowx=sheet.nrows)
    df = pd.DataFrame(data)
    df.set_index("timestamp", drop=True, inplace=True)
    indices = df.idxmax()
    data = [ ]
    for index, column in zip(indices, df.columns):
        region = { }
        region["Station"] = column
        region["Max Load"] = df.loc[index][column]
        region["Year"] = index.year
        region["Month"] = index.month
        region["Day"] = index.day
        region["Hour"] = index.hour
        data.append(region)
    return data

def save_file(data, filename):
    # YOUR CODE HERE
    with open(filename, 'w') as fobj:
        fieldnames = data[0].keys()
        csvwriter = csv.DictWriter(fobj, fieldnames, delimiter="|")
        csvwriter.writeheader()
        for row in data:
            csvwriter.writerow(row)
    
def test():
    open_zip(datafile)
    data = parse_file("{0}.xls".format(datafile))
    os.remove("{0}.xls".format(datafile))
    save_file(data, outfile)

    number_of_rows = 0
    stations = []

    ans = {'FAR_WEST': {'Max Load': '2281.2722140000024',
                        'Year': '2013',
                        'Month': '6',
                        'Day': '26',
                        'Hour': '17'}}
    correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH',
                        'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
    fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load']

    with open(outfile) as of:
        csvfile = csv.DictReader(of, delimiter="|")
        for line in csvfile:
            station = line['Station']
            if station == 'FAR_WEST':
                for field in fields:
                    # Check if 'Max Load' is within .1 of answer
                    if field == 'Max Load':
                        max_answer = round(float(ans[station][field]), 1)
                        max_line = round(float(line[field]), 1)
                        assert max_answer == max_line

                    # Otherwise check for equality
                    else:
                        assert ans[station][field] == line[field]

            number_of_rows += 1
            stations.append(station)

        # Output should be 8 lines not including header
        assert number_of_rows == 8

        # Check Station Names
        assert set(stations) == set(correct_stations)

        
if __name__ == '__main__':
        test()