### Download 15 mins interval data from http://www.buildsmartdc.com/ using the HTTP REST API

##### API Format is
https://api.newcityenergy.com/v1/buildings/145/interval_data?start_time=1527566400&end_time=1530162000
##### This is an undocumented API, which is being used for ploting in their website

In [32]:
import os
import json
import requests
import string
from datetime import datetime
import pytz
import pandas as pd

In [33]:
raw_dir       = "../data/raw/dgs_15min_api/"
raw_dir1      = "../data/raw/"

#https://api.newcityenergy.com/v1/buildings/145/interval_data?start_time=1528516800&end_time=1531112400

API_FORMAT = 'https://api.newcityenergy.com/v1/buildings/{}/interval_data?start_time={}&end_time={}'
## another API - This will download all 15mins interval data
##API_FORMAT = 'https://api.newcityenergy.com/v1/buildings/{}/interval_data


if not os.path.exists(raw_dir):
    os.makedirs(raw_dir)

In [68]:
def download_DGS_intervaldata_(API, bid, epoch_start, epoch_end):
    url = API.format(bid, epoch_start, epoch_end)
    print "Downloading %3d - %s" % (bid, url)
    r = requests.get(url)
    if r.status_code == requests.codes.ok:
        return r.text.encode('ascii',errors='ignore')

In [69]:
def download_DGS_intervaldata(API, bid, epoch_start, epoch_end):
    isDone   = False
    res      = None
    attempts = 1    
    while(not isDone and attempts <= 5):
        try:
            res = download_DGS_intervaldata_(API, bid, epoch_start, epoch_end)
            isDone = True
        except Exception as e:
            attempts += 1
            print '\tException: %s, retrying - attempt %d' % (e, attempts)
    return res

In [70]:
def convert_to_df(res):
    js = json.loads(res)
    kWh       = js.values()    
    epoch     = map(float, js.keys())
    epoch     = map(int, epoch)
    #timestamp = pd.to_datetime(epoch, unit='s')
    timestamp = pd.to_datetime(epoch, unit='s').tz_localize('UTC').tz_convert('US/Eastern')
    
    df = pd.DataFrame({'epoch':epoch, 'timestamp':timestamp, 'kWh':kWh}).sort_values('epoch')    
    df.set_index('timestamp', inplace=True)
    return df

In [71]:
#epoch_start = 1388534400 # January 1, 2014 12:00:00 AM
#epoch_end   = 1514764799 # December 31, 2017 11:59:59 PM

epoch_start = 1325394000   # Human time (GMT): Sunday, January 1, 2012 5:00:00 AM
epoch_end   = 1530417600-1 # Human time (GMT): Sunday, July 1, 2018 5:00:00 AM

for bid in range(412,800):
    res = download_DGS_intervaldata(API_FORMAT, bid, epoch_start, epoch_end)    
    if( res == None):
        print '%3d No data found! ' % (bid)
        continue
    df  = convert_to_df(res)
    nrows = len(df)
    
    if(nrows < 4*24*365/2): # we need atleast six months of data
        print '%3d  %6d - Ignored ' % (bid, nrows)
    else:       
        filename = os.path.join(raw_dir, str(bid) + '.csv')
        print '%3d  %6d - %s' % (bid, nrows, filename)
        df.to_csv(filename)

Downloading 412 - https://api.newcityenergy.com/v1/buildings/412/interval_data?start_time=1325394000&end_time=1530417599
412       0 - Ignored 
Downloading 413 - https://api.newcityenergy.com/v1/buildings/413/interval_data?start_time=1325394000&end_time=1530417599
413  138597 - ../data/raw/dgs_15min_api/413.csv
Downloading 414 - https://api.newcityenergy.com/v1/buildings/414/interval_data?start_time=1325394000&end_time=1530417599
414  107457 - ../data/raw/dgs_15min_api/414.csv
Downloading 415 - https://api.newcityenergy.com/v1/buildings/415/interval_data?start_time=1325394000&end_time=1530417599
415  177937 - ../data/raw/dgs_15min_api/415.csv
Downloading 416 - https://api.newcityenergy.com/v1/buildings/416/interval_data?start_time=1325394000&end_time=1530417599
416       0 - Ignored 
Downloading 417 - https://api.newcityenergy.com/v1/buildings/417/interval_data?start_time=1325394000&end_time=1530417599
417       0 - Ignored 
Downloading 418 - https://api.newcityenergy.com/v1/buildings/

464  189328 - ../data/raw/dgs_15min_api/464.csv
Downloading 465 - https://api.newcityenergy.com/v1/buildings/465/interval_data?start_time=1325394000&end_time=1530417599
465       2 - Ignored 
Downloading 466 - https://api.newcityenergy.com/v1/buildings/466/interval_data?start_time=1325394000&end_time=1530417599
466   95405 - ../data/raw/dgs_15min_api/466.csv
Downloading 467 - https://api.newcityenergy.com/v1/buildings/467/interval_data?start_time=1325394000&end_time=1530417599
467   96557 - ../data/raw/dgs_15min_api/467.csv
Downloading 468 - https://api.newcityenergy.com/v1/buildings/468/interval_data?start_time=1325394000&end_time=1530417599
468  138497 - ../data/raw/dgs_15min_api/468.csv
Downloading 469 - https://api.newcityenergy.com/v1/buildings/469/interval_data?start_time=1325394000&end_time=1530417599
469  181265 - ../data/raw/dgs_15min_api/469.csv
Downloading 470 - https://api.newcityenergy.com/v1/buildings/470/interval_data?start_time=1325394000&end_time=1530417599
470  188940

Downloading 516 - https://api.newcityenergy.com/v1/buildings/516/interval_data?start_time=1325394000&end_time=1530417599
516       2 - Ignored 
Downloading 517 - https://api.newcityenergy.com/v1/buildings/517/interval_data?start_time=1325394000&end_time=1530417599
517  189372 - ../data/raw/dgs_15min_api/517.csv
Downloading 518 - https://api.newcityenergy.com/v1/buildings/518/interval_data?start_time=1325394000&end_time=1530417599
518  167321 - ../data/raw/dgs_15min_api/518.csv
Downloading 519 - https://api.newcityenergy.com/v1/buildings/519/interval_data?start_time=1325394000&end_time=1530417599
519       0 - Ignored 
Downloading 520 - https://api.newcityenergy.com/v1/buildings/520/interval_data?start_time=1325394000&end_time=1530417599
520  183317 - ../data/raw/dgs_15min_api/520.csv
Downloading 521 - https://api.newcityenergy.com/v1/buildings/521/interval_data?start_time=1325394000&end_time=1530417599
521       0 - Ignored 
Downloading 522 - https://api.newcityenergy.com/v1/buildings/

568  165505 - ../data/raw/dgs_15min_api/568.csv
Downloading 569 - https://api.newcityenergy.com/v1/buildings/569/interval_data?start_time=1325394000&end_time=1530417599
569       0 - Ignored 
Downloading 570 - https://api.newcityenergy.com/v1/buildings/570/interval_data?start_time=1325394000&end_time=1530417599
570  183693 - ../data/raw/dgs_15min_api/570.csv
Downloading 571 - https://api.newcityenergy.com/v1/buildings/571/interval_data?start_time=1325394000&end_time=1530417599
571  183133 - ../data/raw/dgs_15min_api/571.csv
Downloading 572 - https://api.newcityenergy.com/v1/buildings/572/interval_data?start_time=1325394000&end_time=1530417599
572  189166 - ../data/raw/dgs_15min_api/572.csv
Downloading 573 - https://api.newcityenergy.com/v1/buildings/573/interval_data?start_time=1325394000&end_time=1530417599
573       0 - Ignored 
Downloading 574 - https://api.newcityenergy.com/v1/buildings/574/interval_data?start_time=1325394000&end_time=1530417599
574  189132 - ../data/raw/dgs_15min_

619       0 - Ignored 
Downloading 620 - https://api.newcityenergy.com/v1/buildings/620/interval_data?start_time=1325394000&end_time=1530417599
620   98858 - ../data/raw/dgs_15min_api/620.csv
Downloading 621 - https://api.newcityenergy.com/v1/buildings/621/interval_data?start_time=1325394000&end_time=1530417599
621       2 - Ignored 
Downloading 622 - https://api.newcityenergy.com/v1/buildings/622/interval_data?start_time=1325394000&end_time=1530417599
622       0 - Ignored 
Downloading 623 - https://api.newcityenergy.com/v1/buildings/623/interval_data?start_time=1325394000&end_time=1530417599
623  188996 - ../data/raw/dgs_15min_api/623.csv
Downloading 624 - https://api.newcityenergy.com/v1/buildings/624/interval_data?start_time=1325394000&end_time=1530417599
624       0 - Ignored 
Downloading 625 - https://api.newcityenergy.com/v1/buildings/625/interval_data?start_time=1325394000&end_time=1530417599
625       2 - Ignored 
Downloading 626 - https://api.newcityenergy.com/v1/buildings/62

675       0 - Ignored 
Downloading 676 - https://api.newcityenergy.com/v1/buildings/676/interval_data?start_time=1325394000&end_time=1530417599
676   10655 - Ignored 
Downloading 677 - https://api.newcityenergy.com/v1/buildings/677/interval_data?start_time=1325394000&end_time=1530417599
677 No data found! 
Downloading 678 - https://api.newcityenergy.com/v1/buildings/678/interval_data?start_time=1325394000&end_time=1530417599
678 No data found! 
Downloading 679 - https://api.newcityenergy.com/v1/buildings/679/interval_data?start_time=1325394000&end_time=1530417599
679 No data found! 
Downloading 680 - https://api.newcityenergy.com/v1/buildings/680/interval_data?start_time=1325394000&end_time=1530417599
680 No data found! 
Downloading 681 - https://api.newcityenergy.com/v1/buildings/681/interval_data?start_time=1325394000&end_time=1530417599
681 No data found! 
Downloading 682 - https://api.newcityenergy.com/v1/buildings/682/interval_data?start_time=1325394000&end_time=1530417599
682 No 

734 No data found! 
Downloading 735 - https://api.newcityenergy.com/v1/buildings/735/interval_data?start_time=1325394000&end_time=1530417599
735 No data found! 
Downloading 736 - https://api.newcityenergy.com/v1/buildings/736/interval_data?start_time=1325394000&end_time=1530417599
736 No data found! 
Downloading 737 - https://api.newcityenergy.com/v1/buildings/737/interval_data?start_time=1325394000&end_time=1530417599
737 No data found! 
Downloading 738 - https://api.newcityenergy.com/v1/buildings/738/interval_data?start_time=1325394000&end_time=1530417599
738 No data found! 
Downloading 739 - https://api.newcityenergy.com/v1/buildings/739/interval_data?start_time=1325394000&end_time=1530417599
739 No data found! 
Downloading 740 - https://api.newcityenergy.com/v1/buildings/740/interval_data?start_time=1325394000&end_time=1530417599
740 No data found! 
Downloading 741 - https://api.newcityenergy.com/v1/buildings/741/interval_data?start_time=1325394000&end_time=1530417599
741 No data f

793 No data found! 
Downloading 794 - https://api.newcityenergy.com/v1/buildings/794/interval_data?start_time=1325394000&end_time=1530417599
794 No data found! 
Downloading 795 - https://api.newcityenergy.com/v1/buildings/795/interval_data?start_time=1325394000&end_time=1530417599
795 No data found! 
Downloading 796 - https://api.newcityenergy.com/v1/buildings/796/interval_data?start_time=1325394000&end_time=1530417599
796 No data found! 
Downloading 797 - https://api.newcityenergy.com/v1/buildings/797/interval_data?start_time=1325394000&end_time=1530417599
797 No data found! 
Downloading 798 - https://api.newcityenergy.com/v1/buildings/798/interval_data?start_time=1325394000&end_time=1530417599
798 No data found! 
Downloading 799 - https://api.newcityenergy.com/v1/buildings/799/interval_data?start_time=1325394000&end_time=1530417599
799 No data found! 


In [52]:
print type(res)
isinstance(res, Exception)

<class 'requests.exceptions.ConnectionError'>


True

In [None]:
#test
epoch_start = 1388552400   # Human time (GMT): Wednesday, January 1, 2014 5:00:00 AM
epoch_end   = 1530417600-1 # Human time (GMT): Sunday, July 1, 2018 5:00:00 AM
bid = 1
res = download_DGS_intervaldata(API_FORMAT, bid, epoch_start, epoch_end)
print type(res), len(res)
#filename = os.path.join(raw_dir, str(bid) + '.json')
#with open(filename, 'wb') as fd:
#    fd.write(res)

In [None]:
resJs = json.loads(res)

js = json.loads(res)
kWh       = js.values()    
epoch     = map(float, js.keys())
epoch     = map(int, epoch)
timestamp = pd.to_datetime(epoch, unit='s').tz_localize('UTC').tz_convert('US/Eastern')
#timestamp = pd.to_datetime(epoch, unit='s').tz_localize('US/Eastern', ambiguous='infer')

    
df = pd.DataFrame({'epoch':epoch, 'timestamp':timestamp, 'kWh':kWh}).sort_values('epoch')    
df.set_index('timestamp', inplace=True)

filename = os.path.join(raw_dir1, str(bid) + '.csv')
df.to_csv(filename)

#df['epoch'] = pd.to_numeric(df['epoch'])
#pd.to_datetime([1349720105, 1349806505, 1349892905,unit='s')
print type(resJs.keys()[0]), resJs.keys()[0]
print type(timestamp)
print type(kWh)
print df.shape
df.head()

In [None]:
for tz in pytz.all_timezones:
    print tz