## Data Collection Code

In [3]:
import urllib3
import csv
import json
import xmltodict
import pandas as pd
import datetime
import requests
requests.packages.urllib3.disable_warnings()

#### getWeather()
Takes set of locations, date and time as input and returns weather info for each point in time.

In [4]:
API_KEY = '6FF9G8N2T2NZCSR2C2WPZ42QF'
WEATHER_endpoint = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history'
desired_features = ['temp','visibility','wspd','cloudcover','precip','sealevelpressure','dew','wgust','humidity']

def getWeather(latitudes,longitudes,datetime):
    query_params = '&contentType=json&unitGroup=uk&aggregateMinutes=15&key={}&startDateTime={}&endDateTime={}&locations={},{}'
    query_params=query_params.format(API_KEY, datetime.isoformat(), datetime.isoformat(),latitudes,longitudes)
    try:
        r = requests.get(WEATHER_endpoint + "?" + query_params,verify = False)
        # print(r.text)
        # print(json.loads(r.text))
        weatherInfo = r.json()
    except Exception:
        weatherInfo = []
        print('Problem loading weather data for query params' + query_params +"\n"+ Exception)
    if weatherInfo:
        if 'errorCode' in weatherInfo:
            print("Error")
        else:
            for locationid in weatherInfo['locations']: 
                for value in weatherInfo['locations'][locationid]['values']:
                    result = {key: value[key] for key in desired_features}
            return result
    else:
        print('error')

getWeather(55.61055739940275, -4.015384591727852, datetime.datetime(2021, 1, 12, 10, 0))


{'temp': 0.1,
 'visibility': 32.0,
 'wspd': 1.5,
 'cloudcover': 0.0,
 'precip': 0.0,
 'sealevelpressure': 1015.7,
 'dew': -0.5,
 'wgust': None,
 'humidity': 95.5}

#### getOutput()
Takes generator ID, date and time as input and returns the actual amount generated

In [5]:
BM_endpoint = 'https://api.bmreports.com/BMRS/B1610/V2'
API_KEY_GEN = '90nkfffj5r9qtbh'
#CHANGE BACK TO B1610 and deal with the smalller dataset :(
def getOutput(id,date,period):
    dateStart = date.strftime("%Y-%m-%d")
    query_params = 'APIKey={}&SettlementDate={}&Period={}&NGCBMUnitID={}&ServiceType=xml'
    try:
        query_params=query_params.format(API_KEY_GEN, dateStart,period,id)
        r = requests.get(BM_endpoint + "?" + query_params,verify = False)
        generationInfo = r.text
        generationInfo = xmltodict.parse(generationInfo)
        jsonGen = json.loads(json.dumps(generationInfo))
        # print(jsonGen)
        return(float(jsonGen['response']['responseBody']['responseList']['item']['Period']['Point']['quantity']))
    except:
        return(-1)

getOutput('ACHRW-1',datetime.datetime(2021,10,12,12,0,0),24)

1.534

#### Building dataset
Loops across locations dataset, for each location, collecting weather & generation data for 5 different dates and times spread across a year with a mix of times of day.
Aggregates all into one dataset, with attributes being weather, and the targets <i>y</i> being (actual generation)/(generation capacity)

In [6]:
dateTimes = [datetime.datetime(2019,1,24,8,00,0),datetime.datetime(2019,2,27,19,00,0),datetime.datetime(2019,3,29,15,00,0),datetime.datetime(2019,4,16,17,00,0),
datetime.datetime(2019,5,19,8,00,0),datetime.datetime(2019,6,13,16,00,0),datetime.datetime(2019,7,7,12,00,0),
datetime.datetime(2019,8,10,10,00,0),datetime.datetime(2019,9,29,17,00,0),datetime.datetime(2019,10,29,11,00,0),
datetime.datetime(2019,11,6,14,00,0),datetime.datetime(2019,12,18,9,00,0),datetime.datetime(2019,7,3,18,00,0),datetime.datetime(2019,4,9,16,00,0),datetime.datetime(2019,9,11,6,00,0)]
locations = pd.read_csv('locations.csv')
dataRows = []
for location in locations.iterrows():
    print(location[1]['BMU_ID'])
    for time in dateTimes:
        try:
            period = time.hour * 2 + 1
            if time.minute > 30:
                period += 1
            dataPoint = getWeather(location[1]['latitude'],location[1]['longitude'],time)
            dataPoint['loadFactor'] = getOutput(location[1]['BMU_ID'],time,period)/location[1]['capacity']
            if (dataPoint['loadFactor'] < 0):
                #Error Occured 
                print('No gen data found for ' + location[1]['BMU_ID'])
            else:
                dataPoint['BMUID'] = location[1]['BMU_ID']
                dataRows.append(dataPoint)
        except:
            print("ASdBASIUDB")
# dataset attributes: locationID, temp,visibility,wspd,cloudcover,precip,sealevelpressue,dew,humidity,wgust     | loadfactor 0-1
dataset = pd.DataFrame(dataRows,columns=['BMUID','temp','wspd','wgust','sealevelpressure','visibility','cloudcover','precip','dew','humidity','loadFactor'])

dataset.to_csv('./Archived/dataset5feb-1.csv')


ABRTW-1
No gen data found for ABRTW-1
ACHRW-1
No gen data found for ACHRW-1
No gen data found for ACHRW-1
AKGLW-2
No gen data found for AKGLW-2
ANSUW-1
ARCHW-1
No gen data found for ARCHW-1
No gen data found for ARCHW-1
No gen data found for ARCHW-1
No gen data found for ARCHW-1
BEINW-1
No gen data found for BEINW-1
No gen data found for BEINW-1
No gen data found for BEINW-1
No gen data found for BEINW-1
No gen data found for BEINW-1
BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
No gen data found for BETHW-1
BHLAW-1
No gen data found for BHLAW-1
No gen data found for BHLAW-1
No gen data found for BHLAW-1
No gen data found for BHLAW-1
No gen data found for BHLAW-1
No gen data found for BHLAW-1
No gen data found for BHLAW-1
BLKWW-1
No gen data found for BLKWW-1
No gen data found for BLKWW-