In [2]:
import pandas as pd     
import warnings
import numpy as np
warnings.simplefilter(action='ignore', category=FutureWarning)
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default='notebook'



def normalize(df, how="min-max"):
    if how == "min-max" :
        return (df - df.min())/(df.max()-df.min())
    if how == "mean" :
        return (df-df.mean())/df.std()


def get_cp_indices_df(df_cp, df_indices, selected_coutry = None, data_resolution = "M", normalization="mean", normalize_cp=True, smoothing = False, smoothing_duration=6, indices=None) :


    if not selected_coutry : selected_coutry = df_cp.country.unique()

    df_cp_filter = df_cp[df_cp.country.isin(selected_coutry) ]
    df_cp_filter = df_cp_filter.groupby([df_cp_filter.index.to_period(data_resolution)]).mean().reset_index()
    df_cp_filter = df_cp_filter.set_index(df_cp_filter.timestamp).drop(["timestamp"], axis=1)


    if indices : df_indices = df_indices[indices]
    df_indices_filter = df_indices.groupby([df_indices.index.to_period(data_resolution)]).mean().reset_index()
    df_indices_filter = df_indices_filter.set_index(df_indices_filter.timestamp).drop(["timestamp"], axis=1)

    df_cp_indices = df_cp_filter.join(df_indices_filter)
    df_cp_indices = df_cp_indices[df_cp_indices.index.year>=1982]
    df_cp_indices = normalize(df_cp_indices,how=normalization)


    if not normalize_cp :  
        df_cp_indices.capacity_factor = df_cp_filter[df_cp_filter.index.year>=1982]


    if smoothing : 
        df_cp_indices = df_cp_indices.rolling(smoothing_duration, center=True).mean()


    return df_cp_indices

    

def compare_cp_index(df_cp_indices, smoothing = False, smoothing_duration=6, indices = None, start=1990, end=2019, show_plot = True):
    if indices : 
        df_cp_indices = df_cp_indices[indices+["capacity_factor"]] 
    df_cp_indices = df_cp_indices[(df_cp_indices.index.year > start) & (df_cp_indices.index.year < end)]
    
    if show_plot : 
        fig = px.line(df_cp_indices.set_index(df_cp_indices.index.astype("str")), 
            title= ("normalized capacity factor and climate indices, smoothing={}".format(smoothing_duration) if smoothing else "normalized capacity factor and climate indices"))
        fig.update_traces(visible='legendonly')
        fig.update_traces(visible=True, selector=dict(name="capacity_factor"))
        fig.show()
        
    
    return df_cp_indices.corr().capacity_factor[:-1]

In [6]:



df_cp_solar = pd.read_csv("solar", usecols=['timestamp','country', "capacity_factor"])
df_cp_solar['timestamp'] = pd.to_datetime(df_cp_solar['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_cp_solar = df_cp_solar.groupby(["timestamp","country"]).mean().reset_index()
df_cp_solar = df_cp_solar.set_axis(df_cp_solar.timestamp)
df_cp_solar = df_cp_solar.drop(["timestamp"], axis=1)
df_cp_solar.rename(columns={'capacity_factor': 'capacity_factor_s'},
          inplace=True, errors='raise')

df_cp = pd.read_csv("dataset_with_timestamp", usecols=['timestamp','country', "capacity_factor"])
df_cp['timestamp'] = pd.to_datetime(df_cp['timestamp'], format='%Y-%m-%d %H:%M:%S')
df_cp = df_cp.groupby(["timestamp","country"]).mean().reset_index()
df_cp = df_cp.set_axis(df_cp.timestamp)
df_cp = df_cp.drop(["timestamp"], axis=1)
df_cp.rename(columns={'capacity_factor': 'capacity_factor_w'},
          inplace=True, errors='raise')




indices = ["nao", "ao","mjo80e","mjo40w","mjo20e","mjo160e","mjo10w","nino34"]

df_indices = pd.read_csv("daily_indices_82_to_19.csv")
df_indices['timestamp'] = pd.to_datetime(df_indices['timestamp'], format='%Y-%m-%d')
df_indices = df_indices[["timestamp"]+indices]
df_indices = df_indices.set_axis(df_indices.timestamp)
df_indices = df_indices.drop(["timestamp"], axis=1)

MemoryError: Unable to allocate 2.10 GiB for an array with shape (281769600, 1) and data type float64

In [3]:
df_cp_solar = pd.read_csv("solar", usecols=['timestamp','country', "capacity_factor"])
df_cp_solar.rename(columns={'capacity_factor': 'capacity_factor_s'},
          inplace=True, errors='raise')

df_cp = pd.read_csv("dataset_with_timestamp", usecols=['timestamp','country', "capacity_factor"])
df_cp.rename(columns={'capacity_factor': 'capacity_factor_w'},
          inplace=True, errors='raise')


In [5]:
df_cp_solar

Unnamed: 0,timestamp,country,capacity_factor_s
0,1979-01-01 00:00:00,AT,0.0
1,1979-01-01 01:00:00,AT,0.0
2,1979-01-01 02:00:00,AT,0.0
3,1979-01-01 03:00:00,AT,0.0
4,1979-01-01 04:00:00,AT,0.0
...,...,...,...
10063195,2019-12-31 19:00:00,GB,0.0
10063196,2019-12-31 20:00:00,GB,0.0
10063197,2019-12-31 21:00:00,GB,0.0
10063198,2019-12-31 22:00:00,GB,0.0


In [6]:
df_cp_all = df_cp.merge(df_cp_solar, on=["timestamp", "country"])




In [7]:
df_cp_all

Unnamed: 0,timestamp,country,capacity_factor_w,capacity_factor_s
0,1979-01-01 00:00:00,AT,0.159416,0.0
1,1979-01-01 00:00:00,CZ,0.078320,0.0
2,1979-01-01 00:00:00,LU,0.703548,0.0
3,1979-01-01 00:00:00,CH,0.827536,0.0
4,1979-01-01 00:00:00,SE,0.854483,0.0
...,...,...,...,...
10063195,2019-12-31 23:00:00,SE,0.603065,0.0
10063196,2019-12-31 23:00:00,LV,0.704257,0.0
10063197,2019-12-31 23:00:00,IT,0.152716,0.0
10063198,2019-12-31 23:00:00,HU,0.459178,0.0


In [10]:
df_cp_all

Unnamed: 0,timestamp,country,capacity_factor_w,capacity_factor_s
0,1979-01-01 00:00:00,AT,0.159416,0.0
1,1979-01-01 00:00:00,CZ,0.078320,0.0
2,1979-01-01 00:00:00,LU,0.703548,0.0
3,1979-01-01 00:00:00,CH,0.827536,0.0
4,1979-01-01 00:00:00,SE,0.854483,0.0
...,...,...,...,...
10063195,2019-12-31 23:00:00,SE,0.603065,0.0
10063196,2019-12-31 23:00:00,LV,0.704257,0.0
10063197,2019-12-31 23:00:00,IT,0.152716,0.0
10063198,2019-12-31 23:00:00,HU,0.459178,0.0


In [11]:
df_cp_all.to_csv('full_dataset.csv', index = False, encoding='utf-8') # False: not include index


In [None]:
data_resolution = "d"
selected_country = ["GB"]
normalize_cp=True
smoothing = False
smoothing_duration=6
normalization = "mean"

df_cp_indices = get_cp_indices_df(df_cp, df_indices, selected_coutry=selected_country, data_resolution=data_resolution, normalization=normalization,normalize_cp=normalize_cp , smoothing=smoothing, smoothing_duration=smoothing_duration)

In [None]:
max_lag = 100
indices = ['nao','ao','mjo80e',	'mjo40w',	'mjo20e',	'mjo160e'	,'mjo10w',	'nino34']
offsets = range(-max_lag,max_lag)



rs = pd.DataFrame(np.array([[crosscorr(df_cp_indices.capacity_factor,df_cp_indices[index], lag) for lag in offsets] for index in indices]).T, columns = indices, index=list(offsets))

fig = px.line(rs).update_layout(xaxis_title="shift (in days)", yaxis_title="correlation daily capacity_factor vs daily climate index")
fig.add_vline(0, annotation_text="center", annotation_position = "top left")
# fig.add_vline(offsets[np.argmax(rs)], annotation_text='Peak synchrony', annotation_position = "top right")
fig.update_traces(visible='legendonly')
fig.show()
fig.write_html("TLCC_daily_cf_GB.html")





