In [1]:
import pandas as pd
import requests
import scipy.signal
import numpy as np

from datetime import datetime
from dateutil import parser

from urllib import parse
from data_pull import data_pull_date
from data_pull import data_pull_all

In [2]:
# System Frequency (Dec 2019)
# f0933bdd-1b0e-4dd3-aa7f-5498df1ba5b9    
# https://data.nationalgrideso.com/system/system-frequency-data/r/december_2019_-_historic_frequency_data

# "9bc4746e-3152-4c6f-886e-58377ab88e0e" # Nov 2021

###  CSV Filenames

In [3]:
file_base = ["f ", "fNew ", "fnew-"]

filenames = list()
for ii in range(0,2):
    filenames.append(file_base[1] + "2021 " + str(ii+11))
    
for ii in range(0,3):
    filenames.append(file_base[1] + "2022 " + str(ii+1))
    
for ii in range(0,7):
    filenames.append(file_base[2] + "2022-" + str(ii+4))    
    
print(filenames)

['fNew 2021 11', 'fNew 2021 12', 'fNew 2022 1', 'fNew 2022 2', 'fNew 2022 3', 'fnew-2022-4', 'fnew-2022-5', 'fnew-2022-6', 'fnew-2022-7', 'fnew-2022-8', 'fnew-2022-9', 'fnew-2022-10']


### Load Data from CSV and Analyse

In [4]:
#path = "C:\\Users\\kmp57\\Desktop\\Documents\\GitHub\\grid-uk-ml\\data\\"
path = "\\\\ad\\ap\\homes$\\kmp57\\My Documents\\GitHub\\grid-uk-ml\\data\\"


for files in filenames:
    freq = pd.read_csv(path + 'RawFrequencyData\\' + files + '.csv')
    freq['dtm'] = pd.to_datetime(freq['dtm'], format="%Y/%m/%d %H:%M:%S")

    start_time = freq.loc[0,"dtm"]
    start_str = "Start Time: {}".format(start_time)
    print(start_str)

    idx = freq.index[-1]
    end_time = freq.loc[idx,"dtm"]
    end_str = "End Time: {}".format(end_time)
    print(end_str)

    period = freq.loc[1,"dtm"] - start_time
    period_str = "Period: {}".format(period) 
    
    
     ## Rearrange Data by Date
    dates = freq.dtm.dt.date[0::86400]

    freq_date = pd.DataFrame()
    for ii in dates:
        date_iter = ii.strftime('%Y-%m-%d')
        foo = freq.loc[freq.dtm.dt.date==ii,'f'].reset_index(drop=True)
        freq_date[date_iter] = foo

        print(date_iter)
    

Start Time: 2021-11-01 00:00:00
End Time: 2021-11-30 23:59:59
Start Time: 2021-12-01 00:00:00
End Time: 2021-12-31 23:59:59
Start Time: 2022-01-01 00:00:00
End Time: 2022-01-31 23:59:59
Start Time: 2022-02-01 00:00:00
End Time: 2022-02-28 23:59:59


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

### Calculate PSD for each date every 30 min and estimate noise power in different frequency bands


In [None]:
    fs = 1    # Sampling Freq (Hz)
    T = 30*60 # Measurement time (s)

    idx_list = freq_date.index[0::T] # Find index for every 30 min

    band_edges = np.linspace(0,0.5,6) # Define Frequency Bands
    band_edges = np.round(band_edges, 1)

    band_names = list_string = map(str, band_edges[1:])
    band_names = list(band_names)

    col_labels = ["dtm"] +  band_names

    data = []
    for date in dates:
        date_iter = date.strftime('%Y-%m-%d')
        for ii in range(len(idx_list)):
            psd_band = []

            # Calculate PSD for each 30 min
            start_idx = idx_list[ii]
            end_idx = idx_list[ii] + T   


            signal = freq_date.loc[start_idx:end_idx,date_iter] - 50  # subtract 50 so signal is centred at 0Hz
            (ff, psd) = scipy.signal.periodogram(signal, fs, scaling='density')
            ## Frequency labelled ff as it is frequency of frequency measurements


            # Label each 30 with datetime
            dtm_str =  date_iter + datetime.fromtimestamp(start_idx).strftime("%H:%M:%S")
            dtm = datetime.strptime(dtm_str,"%Y-%m-%d%H:%M:%S")

            # Calculate average PSD within each band
            for jj in range(len(band_edges)-1):
                band_start = band_edges[jj]
                band_end = band_edges[jj+1]
                psd_band.append( psd[(ff >= band_start) & (ff < band_end)].mean() )           

            data.append( [dtm] + psd_band ) 

    # Build Datetimes and Averages into Dataframe
    freq_psd = pd.DataFrame (data, columns = col_labels)

### Save Data 

In [None]:
    savename = freq_psd.loc[0,"dtm"]
    savename = "Data\\Frequency\\psd_" + savename.strftime("%Y-%m") + ".csv"
    print(savename)
    freq_psd.to_csv(savename)
