In [6]:
# NumPy, SciPy and Pandas
import pandas as pd
import numpy as np


In [7]:
def hourly_dataset(name):
    # Building Data Genome dataset
    if name == 'BDG':
        df = pd.read_csv('../data/raw/temp_open_utc_complete.csv', parse_dates=True, 
                         infer_datetime_format=True, index_col=0)
    
    # Washington D.C. dataset
    elif name == 'DGS':
        df = pd.read_csv('../data/raw/DGS_322_Buildings-15m-By_Building-DST-gap-filled-3-2-18-508pm.csv',
                            parse_dates=[['Building ID', 'Unnamed: 1']], infer_datetime_format=True)
        # get rid of temperature column
        del df['Unnamed: 2']

        # update column names to match the row of building names
        new_column_names = df.iloc[0,:]
        df.columns = new_column_names

        # get rid of rows with metadata and update index
        df = df.drop([0,1,2], axis=0)
        df = df.rename(columns = {'Building nan':'timestamp'})
        df.index = df['timestamp'].astype('datetime64[ns]')
        del df['timestamp']
        df = df.astype(float)
        
        # since the dataset is made from 15min interval readings, resample to 1 hr
        df = df.resample('1H').sum()

    else:
        print("Please choose a valid dataset")
        exit()

    return df


In [8]:
# load building gnome dataset (BDG)
df_bdg = hourly_dataset('BDG')
df_bdg.to_csv('../data/processed/BDG_dataset.csv')

# load dc building dataset (DC)
df_dc = hourly_dataset('DGS')
df_dc.to_csv('../data/processed/DGS_dataset.csv')


  if (yield from self.run_code(code, result)):


In [9]:
df_bdg.head(10)


Unnamed: 0_level_0,Office_Cristina,PrimClass_Jolie,PrimClass_Jaylin,Office_Jesus,PrimClass_Uma,UnivClass_Tamra,PrimClass_Jayla,PrimClass_Janiya,PrimClass_Umar,PrimClass_Janice,...,Office_Emer,Office_Elena,Office_Emerald,Office_Ellis,Office_Elliot,Office_Eddie,Office_Georgia,UnivDorm_Lysander,PrimClass_Jazmin,PrimClass_Jenna
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01 08:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 09:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 10:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 11:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 12:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 13:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 14:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 15:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 16:00:00,,,,,,,,,,,...,,,,,,,,,,
2010-01-01 17:00:00,,,,,,,,,,,...,,,,,,,,,,


In [10]:
df_dc.head(10)

Unnamed: 0_level_0,1st District Headquarters,Impound Lot #1 & Fleet Fueling Site,Jefferson Playing Fields,DC Village,200 I Street Municipal Building,C.W. Harris Elementary School,Nalle Elementary School,Fort Davis Recreation Center,Kimball Elementary School,Income Maintenance Administration Office,...,Shepard Park Library,Spring Road Community Support Services,Van Ness Elementary School,Warehouse/Office,Washington Seniors Wellness Center,Waterfront Municipal Center East,Waterfront Municipal Center West,Winston Educational Center,H.D. Woodson High School,Youth Services Administration #3
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-03 00:00:00,127.37,0.2,0.0,418.14,1083.51,0.0,173.52,32.5,42.61,43.41,...,33.7,26.64,15.77,43.08,18.36,261.26,474.55,281.77,268.98,27.89
2016-01-03 01:00:00,126.49,0.2,0.0,432.4,1036.84,0.0,144.73,33.26,42.5,43.34,...,33.4,26.77,16.18,47.42,18.58,260.68,482.08,282.56,266.03,27.84
2016-01-03 02:00:00,125.85,0.2,0.0,420.77,1128.8,0.0,145.97,32.89,41.94,43.4,...,33.48,26.55,16.27,49.51,19.11,259.83,482.73,281.24,267.86,27.37
2016-01-03 03:00:00,124.05,0.2,0.0,447.9,1038.19,0.0,145.29,35.63,41.49,43.09,...,33.46,27.47,15.62,50.14,19.15,259.55,486.62,282.02,274.78,26.95
2016-01-03 04:00:00,121.78,0.2,0.0,437.19,1112.88,0.0,143.61,34.41,41.36,42.95,...,33.68,26.35,16.11,51.91,19.71,258.26,487.92,282.3,316.12,27.51
2016-01-03 05:00:00,129.84,0.2,0.0,455.15,1443.68,0.0,142.8,35.97,41.19,43.57,...,33.66,26.22,15.84,53.88,19.26,290.67,517.7,284.23,408.84,31.57
2016-01-03 06:00:00,132.63,0.2,0.0,463.73,1504.06,0.0,142.98,35.86,51.25,43.07,...,33.75,25.61,17.29,57.4,19.42,725.59,962.35,284.83,433.85,31.29
2016-01-03 07:00:00,132.48,0.05,0.0,456.54,1497.49,0.0,158.15,34.2,70.79,52.04,...,34.62,25.5,18.56,59.97,19.49,1100.65,1203.93,285.25,478.53,33.16
2016-01-03 08:00:00,139.56,0.0,0.0,456.25,1500.7,0.0,152.77,23.48,90.67,58.28,...,33.64,29.89,16.14,38.92,23.85,1083.87,1102.98,283.05,524.55,51.65
2016-01-03 09:00:00,142.42,0.0,0.0,457.18,1505.35,0.0,151.61,23.73,113.93,61.05,...,34.0,33.48,15.26,38.38,25.98,1025.86,1009.64,284.32,593.15,71.53
