In [9]:
                                                            
import pandas as pd
import requests
import json
import os
from ggplot import *

def warn_for_large_dl(opts_dict, threshold=100000, sample_rate_hz=60):
    total_time = opts_dict["EndTime"] - opts_dict["StartTime"]
    to_ns = {
        'years': 31556952000000430,
        'weeks': 604799999999983,
        'days' : 86400000000000,
        'hours': 3600000000000,
        'minutes': 60000000000,
        's': 1000000000,
        'seconds': 1000000000,
        'ms': 1000000,
        'milliseconds': 1000000,
        '': 1000000,
        'us': 1000,
        'microseconds': 1000,
        'ns': 1,
        'nanoseconds': 1
    }

    total_time_ns = total_time * to_ns[opts_dict["UnitofTime"]]

    query_type = opts_dict["QueryType"]
    if query_type == 'windows':
        ns_per_point = int(opts_dict['WindowText']) * int(to_ns[opts_dict['WindowUnit']])
        total_rows = total_time_ns // ns_per_point

    total_obs_at_sample_rate = total_time_ns // (1/sample_rate_hz * to_ns['s'])

    if query_type == 'aligned':
        total_rows = total_time_ns // (2 ** opts_dict.get("PointWidth", 0))
        total_rows = min(total_rows, total_obs_at_sample_rate)
    elif query_type == 'raw':
        total_rows = total_obs_at_sample_rate

    print('Assuming {}hz sampling, you are downloading approximately {} rows.'.format(sample_rate_hz, int(total_rows)))
    if (total_rows > threshold):
        print('This exceeds your threshold of {}'.format(threshold))
        print('''
    Either increase your threshold or choose closer start and end times.
    If you are using QueryType: 'raw' then you must choose a closer start and end time.
    If you are using QueryType: 'aligned' you may also increase PointWidth to sample less frequently.
    If you are using QueryType: 'windows' you may also increase WindowText to sample less frequently.
        ''')
        return True
    return False
    
def download_pmu_data(opts_dict, domain=None, filename=None):
    if domain is None:
        raise ValueError("You must supply a domain to download_pmu_data (e.g. https://viz.predictivegrid.com)")
    if filename is None:
        filename = 'pmu_data.csv'
    url = domain + '/csv'
    payload = "json=" + json.dumps(opts_dict)
    print("Starting download...")
    r = requests.post(url, data=payload, stream=True)
    with open(filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
    print("Download finished")
    return filename

def make_df(csv_filename=None, max_size=100):
    if csv_filename is None:
        csv_filename = 'pmu_data.csv'
    file_size_mb = os.path.getsize(csv_filename) / 1024 / 1024
    if file_size_mb > max_size:
        print('The file is {:.2f}mb, which is larger than your threshold of {}mb. You may want to process it manually instead.'.format(file_size_mb, max_size))
        print('It is located at: {}/{}'.format(os.getcwd(), csv_filename))
        df = pd.read_csv(csv_filename, chunksize=10000)
        print('Returning a {} instead'.format(type(df)))
    else:
        df = pd.read_csv(csv_filename)
        print("Created dataframe of shape:", df.shape)
    return df

# The following dict's values determine the data downloaded. Some fields behave differently depending on QueryType.
#   "StartTime" : Int    - Represents the start time expressed in Epoch. Units determined by "UnitofTime"
#   "EndTime"   : Int    - Represents the end time expressed in Epoch. Units determined by "UnitofTime"
#   "UnitofTime": String - enum represents the unit of time applied to the "StartTime" and "EndTime" fields. One of "s", "ms", "us", "ns", "". "" is an alias for "ms".
#   "UUIDS"     : List   - list of UUID strings for the streams being downloaded
#   "Labels"    : List   - list of label strings to use for each stream in the outputted data
#   "QueryType" : String - enum with one of three values (windows, aligned, raw)
#   "WindowText": String - Valid only for QueryType "windows." Together with WindowUnit represents the amount of time per data point
#   "WindowUnit": String - Valid only for QueryType "windows." enum representing the unit of time to use for WindowText. One of "years","weeks","days","hours","minutes","seconds","milliseconds","microseconds","nanoseconds"
#   "PointWidth": Int    - The point width exponent for AlignedWindows queries, and the maximum depth for Windows queries. It is ignored for QueryType: "raw"
#   "_token"    : String - The token string issued by the server granting a user access for the current session.
                                                            
                                                            
opts_dict = {
    "StartTime": 1507892842460,
    "EndTime": 1512176702793,
    "UUIDS": [
        "4d6e251a-48e1-3bc0-907d-7d5440c34bb9",
        "9dc5b5cd-8cb1-3dd3-b582-5ed6bf3f0083",
        "7befb598-a0be-3e5f-a062-84f9fda33a13",
        "39d955bf-4b68-36c1-801d-5a54c8b0d953",
        "3c9a6de2-4b5e-3707-b599-449c395b3d2b"
    ],
    "Labels": [
        "ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand",
        "ciee/ devices/ meter/ 0xd8d5b9000000a110/ price",
        "ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_received",
        "ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_delivered",
        "ciee/ devices/ meter/ 0xd8d5b9000000a110/ tier"
    ],
    "QueryType": "aligned",
    "WindowText": "4398046511104",
    "WindowUnit": "nanoseconds",
    "UnitofTime": "ms",
    "PointWidth": 41,
    "_token": "EQ/VyUIn1j3SdK8QLsDM8Zcq6HuNKNupauO5gCLtSaKrDlpvFEOWwwANwYyNmqF/ezI2doI3y23afne7kBN4ooMcn5NSxwxTxc/ATL9HlTN1D1/V1a6IrmSdn7rlU7E0YN8D5nmCkQ5FJwu4N0JFhz/hx1qXr/e2LuxzubfFDXjAwi9jmZJZq3pa3rVzXxH4/QcER26B9NBWeUhqC/gdoA=="
}

domain = "https://plot.xbos.io"
                                                            
too_big = warn_for_large_dl(opts_dict, threshold=20000, sample_rate_hz=60)
if (not too_big):
    csv_filename = download_pmu_data(opts_dict, domain=domain)
    df = make_df(csv_filename, max_size=1)
    print("Done")
else:
    print("Aborted download.")

                                                            
                                                        

Assuming 60hz sampling, you are downloading approximately 1948 rows.
Starting download...
Download finished
Created dataframe of shape: (1856, 22)
Done


In [16]:
print(type(df).__name__)
list(df) # is this the correct sampling to be doing (I see it's aggregated)

DataFrame


['Timestamp (ns)',
 'Human-Readable Time (UTC)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand (Min)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand (Mean)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand (Max)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand (Count)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ price (Min)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ price (Mean)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ price (Max)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ price (Count)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_received (Min)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_received (Mean)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_received (Max)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_received (Count)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_delivered (Min)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ summation_delivered (Mean)',
 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ s

In [21]:
ggplot()+\
geom_point(data= df, aes(x='Human-Readable Time (UTC)',\ 
                     y= 'ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand (Mean)'))

SyntaxError: unexpected character after line continuation character (<ipython-input-21-044f86e35fd4>, line 1)

In [17]:
df['ciee/ devices/ meter/ 0xd8d5b9000000a110/ demand (Mean)']

0       1964.282238
1       1870.851582
2       2337.365854
3       2649.359606
4       2353.316953
5       1921.081081
6       2245.951220
7       1911.637280
8       3134.747475
9       2971.486146
10      3872.982456
11      6415.062344
12      6924.800000
13      5044.107579
14      5919.901961
15      8082.058824
16      7927.029703
17      8128.633094
18      7643.557692
19      7501.890547
20      7175.879397
21      5540.641975
22      6883.283582
23      5003.709273
24      5816.923077
25      4384.000000
26      3594.070352
27      3797.171717
28      3411.243781
29      2965.432099
           ...     
1826    2224.776119
1827    2044.588529
1828    1919.900249
1829    1938.992806
1830    1939.070905
1831    2268.722892
1832    1878.916256
1833    1845.658537
1834    1836.878049
1835    1901.359223
1836    1844.563107
1837    2275.012225
1838    2038.829268
1839    1889.685230
1840    2543.631961
1841    3254.424552
1842    4121.518987
1843    3835.555556
1844    4287.235142
