## Data Collection Code

In [3]:
import urllib3
import csv
import json
import xmltodict
import pandas as pd
import datetime
import requests
requests.packages.urllib3.disable_warnings()

#### getWeather()
Takes set of locations, date and time as input and returns weather info for each point in time.

In [4]:
API_KEY = '6FF9G8N2T2NZCSR2C2WPZ42QF'
WEATHER_endpoint = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history'
desired_features = ['temp','visibility','wspd','cloudcover','precip','sealevelpressure','dew','wgust','humidity']

def getWeather(latitudes,longitudes,datetime):
    query_params = '&contentType=json&unitGroup=uk&aggregateMinutes=15&key={}&startDateTime={}&endDateTime={}&locations={},{}'
    query_params=query_params.format(API_KEY, datetime.isoformat(), datetime.isoformat(),latitudes,longitudes)
    try:
        r = requests.get(WEATHER_endpoint + "?" + query_params,verify = False)
        # print(r.text)
        # print(json.loads(r.text))
        weatherInfo = r.json()
    except Exception:
        weatherInfo = []
        print('Problem loading weather data for query params' + query_params +"\n"+ Exception)
    if weatherInfo:
        if 'errorCode' in weatherInfo:
            print("Error")
        else:
            for locationid in weatherInfo['locations']: 
                for value in weatherInfo['locations'][locationid]['values']:
                    result = {key: value[key] for key in desired_features}
            return result

    else:
        print('error')

getWeather(55.61055739940275, -4.015384591727852, datetime.datetime(2021, 1, 12, 10, 0))


{'temp': 0.1,
 'visibility': 32.0,
 'wspd': 1.5,
 'cloudcover': 0.0,
 'precip': 0.0,
 'sealevelpressure': 1015.7,
 'dew': -0.5,
 'wgust': None,
 'humidity': 95.5}

#### getOutput()
Takes generator ID, date and time as input and returns the actual amount generated

In [5]:
BM_endpoint = 'https://api.bmreports.com/BMRS/PHYBMDATA/V1'
API_KEY_GEN = '90nkfffj5r9qtbh'
#CHANGE BACK TO B1610 and deal with the smalller dataset :()
def getOutput(id,date,period):
    dateStart = date.strftime("%Y-%m-%d")
    query_params = 'APIKey={}&SettlementDate={}&SettlementPeriod={}&NGCBMUnitName={}&ServiceType=xml'
    query_params=query_params.format(API_KEY_GEN, dateStart,period,id)
    # print(BM_endpoint + "?" + query_params)
    # try:
    r = requests.get(BM_endpoint + "?" + query_params,verify = False)
    generationInfo = r.text
    # print(r.text)
    generationInfo = xmltodict.parse(generationInfo)
    jsonGen = json.loads(json.dumps(generationInfo))
    return(jsonGen['response']['responseBody']['responseList']['item'][0]['pnLevelTo'])

#### Building dataset
Loops across locations dataset, for each location, collecting weather & generation data for 5 different dates and times spread across a year with a mix of times of day.
Aggregates all into one dataset, with attributes being weather, and the targets <i>y</i> being (actual generation)/(generation capacity)

In [8]:
dateTimes = [datetime.datetime(2021,10,12,12,0,0)]
locations = pd.read_csv('locations.csv')
dataRows = []
for location in locations.iterrows():
    print(location[1]['BMU_ID'])
    for time in dateTimes:
        try:
            period = time.hour * 2 + 1
            if time.minute > 30:
                period += 1
            dataPoint = getWeather(location[1]['latitude'],location[1]['longitude'],time)
            dataPoint['loadFactor'] = float(getOutput(location[1]['BMU_ID'],time,period))/location[1]['capacity']
            dataPoint['BMUID'] = location[1]['BMU_ID']
            dataRows.append(dataPoint)
        except:
            print("ASdBASIUDB")
# dataset attributes: locationID, temp,visibility,wspd,cloudcover,precip,sealevelpressue,dew,humidity,wgust     | loadfactor 0-1
dataset = pd.DataFrame(dataRows,columns=['BMUID','temp','wspd','wgust','sealevelpressure','visibility','cloudcover','precip','dew','humidity','loadFactor'])

dataset.to_csv('dataset.csv')


ABRTW-1
ACHLW-1
ASdBASIUDB
ACHRW-1
AFTOW-1
AIRSW-1
AKGLW-2
ANSUW-1
ARCHW-1
ASHWW-1
ASLVW-1
BABAW-1
BDCHW-1
BEINW-1
BETHW-1
BHLAW-1
BLKWW-1
BLLA-1
BLLA-2
BNWKW-1
BRDUW-1
BRYBW-1
BTUIW-2
BTUIW-3
CAIRW-2
CGTHW-1
CLDCW-1
CLDNW-1
CLDRW-1
CLDSW-1
CLFLW-1
CMSTW-1
CNCLW-1
COUWW-1
CRDEW-1
CRDEW-2
CRGHW-1
CRMLW-1
CRYRW-2
CRYRW-3
DALSW-1
DNLWW-1
DOREW-1
DOREW-2
DRSLW-1
DUNGW-1
EDINW-1
EWHLW-1
FAARW-1
FAARW-2
FALGW-1
FSDLW-1
GDSTW-1
GLCHW-1
GLOFW-1
GLWSW-1
GNAPW-1
GORDW-1
GORDW-2
GRIFW-1
GRIFW-2
HADHW-1
HALSW-1
HBHDW-1
HLGLW-1
HLTWW-1
HRHLW-1
HRSTW-1
KENNW-1
ASdBASIUDB
KILBW-1
KLGLW-1
KPMRW-1
KTHLW-1
LCLTW-1
MDHLW-1
MIDMW-1
MILWW-1
MINSW-1
MKHLW-1
MOYEW-1
MYGPW-1
PAUHW-1
PGBIW-1
PNYCW-1
SANQW-1
STLGW-1
STLGW-2
STLGW-3
STRNW-1
TDBNW-1
TLYMW-1
TRLGW-1
TULWW-1
TULWW-2
TWSHW-1
ASdBASIUDB
WDRGW-1
ASdBASIUDB
WHIHW-1
WHILW-1
WHILW-2
WISTW-2


In [13]:
print(getOutput('ASLVW-1',datetime.datetime(2021,10,12,12,0,0),26))

4.000
