In [None]:
!which python3

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Import Data

Data sets are stored as netcf files, where each file is a one-dimensional time series of potential solar PV generation for each major city.

In [None]:
file_path = Path('/g/data/gb02/cd3022/hot-and-cloudy/solar-pv/GCCSA/')

files = list(file_path.glob(f"*.nc"))
ds = xr.open_mfdataset(files, combine='nested')

In [None]:
ds

### Remove the missing timestep from each day

Himawari skips a sample once a day for maintainence. The data looks better if this time step is removed.

In [None]:
ds = ds.where(ds['time'].dt.strftime('%H:%M') != '12:40', drop=True)

### Remove 3 bad days of Himawari data

Errors in the underlying irradiance dataset. Not during the hot months of interest, but code is here in case anyone wants to look at the other months

In [None]:
bad_days = [
    np.datetime64('2019-08-12'),
    np.datetime64('2019-10-01'),
    np.datetime64('2020-09-06'),
    
]
ds_dates = ds['time'].dt.floor('D')
good_time_mask = ~ds_dates.isin(bad_days)
ds = ds.sel(time=ds['time'][good_time_mask])

### Convert to Capacity Factors

In [None]:
# Rated capacity taken from sandia_modules['Canadian_Solar_CS5P_220M___2009_'], solar panel used in pvlib system,
# using the calculation rated_capacity = module.loc['Impo'] * module.loc['Vmpo']
rated_capacity = 219.656729124
ds = ds.apply(lambda x: x / rated_capacity)

### Replace missing values with 0

So when a daily mean is taken, days of different lengths can be compared

In [None]:
ds = xr.where(ds.isnull(), 0, ds)

# Plotting

In [None]:
# Example day
date = '2020-01-06'
for reg in ds.data_vars:
    plt.plot(ds[reg].sel(time=date), label=reg)
    plt.legend()
    plt.ylabel('capacity factor')

In [None]:
# Multiple days
start = '2020-01-06'
end = '2020-01-10'
for reg in ds.data_vars:
    plt.plot(ds[reg].sel(time=slice(start, end)), label=reg)
    plt.legend()
    plt.ylabel('capacity factor')

# Find Cloudy Days

Very simplified method for identifying cloudy days. The script simply converts the data into daily mean capacity factors, and then finds days that fall below a threshold of this.

In [None]:
# Resample to daily
daily = ds.resample(time='1D').mean()
daily = daily.compute()

# restrict data to the warm months (summer +/- 1)
warm_months = [1,2,3,11,12]
daily_warm_months = daily.where(daily.time.dt.month.isin(warm_months), drop=True)

threshold = 0.1

for reg in daily.data_vars:
    cloudy = daily_warm_months[reg].where(daily_warm_months[reg] < threshold, drop=True)
    print(f'Cloudy days for {reg}:')
    print(cloudy.time)