# Solar data

Script to process one minute solar historic data downloaded from https://www.nationalmap.gov.au/renewables, which is downloaded (access has to be requested) from http://www.bom.gov.au/climate/data/oneminsolar/about-IDCJAC0022.shtml
The solar data for use in the model has to be averaged to 30 minute figures. However, the data downladed was processed to have the hour average with this tool https://github.com/NICTA/aremi-tmy/.
Data format details: http://www.bom.gov.au/climate/cdo/about/IDCJAC0022-format.txt

### input

File extracted "086282_averaged.csv"
Station 086282 corresponds to Melbourne Airport Station. This data is available until May, 2015
One minute data for June 2015 until Dec 2019 can be found per month here: http://reg.bom.gov.au/climate/reg/oneminsolar/
It has to be processed to bein one file and averaged to 30 minute data as the "086282_averaged.csv" file.

In [208]:
import os # new
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
from matplotlib.dates import DateFormatter
import seaborn as sns
import numpy as np
import glob

#import earthpy as et

# Date time conversion registration
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from pandas.plotting import register_matplotlib_converters

# Prettier plotting with seaborn
sns.set(font_scale=1.5, style="whitegrid")

In [209]:
#Solar data
solar_min ='C:/Users/angel/Documents/GitHub/gr4sp/experiments/simulationData/086282_averaged.csv'
solar_min = pd.read_csv(solar_min, parse_dates = ['local time'],
                       index_col = ['local time'])

# get only the mean global irradiance, which is in W/sq m
solar_mean_ghi = solar_min.iloc[:,1:2]
solar_mean_ghi.head(10)

Unnamed: 0_level_0,ghi mean
local time,Unnamed: 1_level_1
1999-05-01 00:00:00,
1999-05-01 01:00:00,
1999-05-01 02:00:00,
1999-05-01 03:00:00,
1999-05-01 04:00:00,
1999-05-01 05:00:00,
1999-05-01 06:00:00,
1999-05-01 07:00:00,
1999-05-01 08:00:00,
1999-05-01 09:00:00,


In [210]:
solar_mean_ghi = solar_mean_ghi.dropna(axis = 0, how= 'any')
solar_mean_ghi.head(10)

Unnamed: 0_level_0,ghi mean
local time,Unnamed: 1_level_1
1999-05-12 00:00:00,0.0
1999-05-12 01:00:00,0.0
1999-05-12 02:00:00,0.0
1999-05-12 03:00:00,0.0
1999-05-12 04:00:00,0.0
1999-05-12 05:00:00,0.0
1999-05-12 06:00:00,0.0
1999-05-12 17:00:00,0.0
1999-05-12 18:00:00,0.0
1999-05-12 19:00:00,0.0


In [211]:
# group on the hour and minute attribute of the index
#solar_30 = solar_mean_ghi.groupby([solar_mean_ghi.index.hour, solar_mean_ghi.index.minute]).mean()
solar_mean_ghi = solar_mean_ghi.resample('30min').ffill()
solar_mean_ghi['ghi mean'] = solar_mean_ghi.resample('30min').ffill()
solar_mean30 = pd.DataFrame (solar_mean_ghi)
solar_mean30.tail()

Unnamed: 0_level_0,ghi mean
local time,Unnamed: 1_level_1
2015-05-31 22:00:00,0.0
2015-05-31 22:30:00,0.0
2015-05-31 23:00:00,0.0
2015-05-31 23:30:00,0.0
2015-06-01 00:00:00,0.0


## Fill data from 2015 onwards

Data from June 2015 is downloaded in one minute format from http://reg.bom.gov.au/climate/reg/oneminsolar/ (access has to be requested). The data is processed into 30 minute data. 

In [43]:
solar_minute_data = "C:/Users/angel/Documents/GitHub/gr4sp/experiments/simulationData/086282_2015_05_2020_07/"
output = 'C:/Users/angel/Documents/GitHub/gr4sp/experiments/simulationData/solar_minute_data.csv'

csvout_lst = []
files = [os.path.join(solar_minute_data, fname) for fname in os.listdir(solar_minute_data)]
pd.concat(
    pd.read_csv(os.path.join(solar_minute_data, fname), sep=',', index_col=0, header=None)
    for fname in sorted(os.listdir(solar_minute_data))
).to_csv(output)

  
  
  
  
  
  
  interactivity=interactivity, compiler=compiler, result=result)


In [226]:
solar_minute_data_df = pd.read_csv('C:/Users/angel/Documents/GitHub/gr4sp/experiments/simulationData/solar_minute_data.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [227]:
solar_minute_data_df_mean = solar_minute_data_df.iloc[:,2:-28]
solar_minute_data_df_mean = solar_minute_data_df_mean.rename(columns=solar_minute_data_df_mean.iloc[0]).drop(solar_minute_data_df_mean.index[0])
solar_minute_data_df_mean.columns = ['year','month','day','hour','minute','Mean global irradiance (over 1 minute) in W/sq m']
#Drop all non-numeric rows
solar_minute_data_df_mean = solar_minute_data_df_mean[pd.to_numeric(solar_minute_data_df_mean['year'], errors='coerce').notnull()]

solar_minute_data_df_mean.head()

Unnamed: 0,year,month,day,hour,minute,Mean global irradiance (over 1 minute) in W/sq m
1,2015,5,1,0,0,0.0
2,2015,5,1,0,1,0.0
3,2015,5,1,0,2,0.0
4,2015,5,1,0,3,0.0
5,2015,5,1,0,4,0.0


In [228]:
solar_minute_data_dates = pd.to_datetime(solar_minute_data_df_mean[['year','month','day','hour','minute']],format='%Y%m%d%H%M')

solar_minute_data_df_mean = pd.DataFrame({'date': solar_minute_data_dates, 'ghi mean':solar_minute_data_df_mean['Mean global irradiance (over 1 minute) in W/sq m']})

In [229]:
solar_ghi_2015_2020 = solar_minute_data_df_mean.set_index('date')


In [230]:
solar_ghi_2015_2020.index = pd.DatetimeIndex(solar_ghi_2015_2020.index)
solar_ghi_2015_2020.head()

Unnamed: 0_level_0,ghi mean
date,Unnamed: 1_level_1
2015-05-01 00:00:00,0.0
2015-05-01 00:01:00,0.0
2015-05-01 00:02:00,0.0
2015-05-01 00:03:00,0.0
2015-05-01 00:04:00,0.0


In [243]:
solar_ghi_2015_2020 = solar_ghi_2015_2020.resample('30min').ffill()
solar_ghi_2015_2020.tail()

Unnamed: 0_level_0,ghi mean
date,Unnamed: 1_level_1
2020-07-31 21:30:00,0.0
2020-07-31 22:00:00,0.0
2020-07-31 22:30:00,0.0
2020-07-31 23:00:00,0.0
2020-07-31 23:30:00,0.0


## Join two data frames

In [244]:
solar_30min_1999_2020 = pd.concat([solar_mean30, solar_ghi_2015_2020], axis=0)

solar_30min_1999_2020.tail(100)

Unnamed: 0,ghi mean
2020-07-29 22:00:00,0.00
2020-07-29 22:30:00,0.00
2020-07-29 23:00:00,0.00
2020-07-29 23:30:00,0.00
2020-07-30 00:00:00,0.00
2020-07-30 00:30:00,0.00
2020-07-30 01:00:00,0.00
2020-07-30 01:30:00,0.00
2020-07-30 02:00:00,0.00
2020-07-30 02:30:00,0.00


# Write to CSV

In [246]:
solar30 = solar_30min_1999_2020.to_csv(index=True)
f = open('solar30_99_20.csv', 'w', newline = '\n')
f.write(solar30)
f.close()