## Importing the necessary packages

In [1]:
# import sys
# import tkinter as tk
# from tkinter import filedialog
import pandas as pd
import matplotlib.pyplot as plt


## Loading the raw data.
### The data is from DEFRA, please see this link https://uk-air.defra.gov.uk/data/uv-data?action=search
#### This link also defines the column headers

In [2]:
ls

 Volume in drive C has no label.
 Volume Serial Number is A61A-B099

 Directory of C:\Users\aniqb\Files\Untitled Folder

11/12/2022  20:43    <DIR>          .
11/12/2022  20:43    <DIR>          ..
11/12/2022  20:42    <DIR>          .ipynb_checkpoints
11/12/2022  20:42            27,953 ce46599147cf5c0f9bfc8a3165fd67ad.csv
11/12/2022  20:42             1,133 UK_UV_data.ipynb
11/12/2022  19:54           300,915 uv_radiation.ipynb
               3 File(s)        330,001 bytes
               3 Dir(s)  138,374,000,640 bytes free


## Load the file and print the first few rows using pandas

In [4]:
df = pd.read_csv('uv_data.csv')

In [5]:
df.head()

Unnamed: 0,Measurement date,Measurement time,Site,Instrument,UV Total Index,Level,Data Quality
0,12/02/2022,16:30:00,Reading,DM150,0.1,R,G
1,12/02/2022,16:00:00,Reading,DM150,0.1,R,G
2,12/02/2022,15:30:00,Reading,DM150,0.2,R,G
3,12/02/2022,15:00:00,Reading,DM150,0.4,R,G
4,12/02/2022,14:30:00,Reading,DM150,0.5,R,G


In [6]:
## Print out the different data features
column_names = df.columns
print(column_names)

Index(['Measurement date', 'Measurement time', 'Site', 'Instrument',
       'UV Total Index', 'Level', 'Data Quality'],
      dtype='object')


##Lets see how granular the data is in the time domain.

In [7]:
df.shape

(660, 7)

In [None]:

df.loc[df["SETTLEMENT_DATE"] == "2022-01-01"].shape

##Seems there are 48 rows for each day with the 
##settlement perdiod ranging from 0 to 47

In [None]:
##Lets see how many unique days there are in the data
df["SETTLEMENT_DATE"].nunique()

In [None]:
##Lets print the range of dates that are present in the dataset.
print(df["SETTLEMENT_DATE"].min())
print(df["SETTLEMENT_DATE"].max())

##Seems like as of 11st Dec 2022, the data has been updated to include data
##up to 19th Nov 2022

In [None]:
## Creating a new column where date and time is combined

In [None]:
##Lets see how many unique days there are in the data
df["SETTLEMENT_DATE"].dtypes

In [None]:
# combine the date and time columns into a single datetime column
# using a default time of 00:00:00
df['DATETIME'] = pd.to_datetime(df['SETTLEMENT_DATE'] + ' 00:00:00', format='%Y-%m-%d %H:%M:%S')

# add the appropriate time interval to the datetime values
df['DATETIME'] = df['DATETIME'] + pd.to_timedelta(df['SETTLEMENT_PERIOD'], unit='h') / 2

df.head()

In [None]:
##Set the new datetime as the data frame index
df.set_index("DATETIME", inplace= True)

In [None]:
##Exploring ND, TSD and ENGLAND_WALES_DEMAND
###ND = National Demand 
### TSD = Transmission system demand and is  = ND + 3 additional demands
#### i) station load, ii) puump storage pumping, iii) interconnects exports
##### TSD includes estimate of station load of 500MW in BST and 600MW in GMT

##Plotting National Demand as a function of time with varying resolutions

In [None]:
##Plotting all datapoints (i.e. half-hourly data)
df.plot( y = ['ND', 'TSD'])

In [None]:
##Plotting first week only (i.e. half-hourly data)
df.plot( y = ['ND', 'TSD'], xlim = ("2022-01-01 00:30:00", "2022-01-08 00:30:00"))
df.plot( y = ['ND', 'TSD'], xlim = ("2022-01-08 00:30:00", "2022-01-15 00:30:00"))

In [None]:
##Plotting one week in summer only (i.e. half-hourly data)
df.plot( y = ['ND', 'TSD'], xlim = ("2022-07-01 00:30:00", "2022-07-08 00:30:00"))
df.plot( y = ['ND', 'TSD'], xlim = ("2022-07-08 00:30:00", "2022-07-15 00:30:00"))

In [None]:
##Plotting one day only in winter (i.e. half-hourly data)
df.plot( y = ['ND', 'TSD'], xlim = ("2022-01-03 00:30:00", "2022-01-04 00:30:00"))
df.plot( y = ['ND', 'TSD'], xlim = ("2022-01-04 00:30:00", "2022-01-05 00:30:00"))

In [None]:
##Plotting one day only in summer (i.e. half-hourly data)
df.plot( y = ['ND', 'TSD'], xlim = ("2022-07-01 00:30:00", "2022-07-02 00:30:00"))
df.plot( y = ['ND', 'TSD'], xlim = ("2022-07-02 00:30:00", "2022-07-03 00:30:00"))