In [13]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import datetime
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [14]:
start_time = pd.to_datetime('07:00:00').time()
end_time = pd.to_datetime('17:00:00').time()

# Read generation power
power_df = pd.read_csv('pv_data_5minresample_concat.csv', 
                       usecols=['datetime','Ptot (kW)'], parse_dates=['datetime'])
power_df = power_df[(power_df['datetime'].dt.time >= start_time) & (power_df['datetime'].dt.time <= end_time)]

# Read actual irradiance data
irradiance_df = pd.read_csv('EE Station 1-20230101-20231231.csv', usecols=['Datetime', 'Irradiance_30 (W/m2)'], parse_dates=['Datetime'])
irradiance_df = irradiance_df.rename(columns={'Datetime': 'datetime', 'Irradiance_30 (W/m2)': 'irradiance'})
irradiance_df = irradiance_df[(irradiance_df['datetime'].dt.time >= start_time) & (irradiance_df['datetime'].dt.time <= end_time)]

df = pd.merge(power_df, irradiance_df, on='datetime', how='inner')
df = df[['datetime', 'irradiance', 'Ptot (kW)']]
df = df.rename(columns={'irradiance': 'Actual irradiance', 'Ptot (kW)':'Power'})
df['month'] = df['datetime'].dt.month

In [15]:
# Filter only data for the month of August
df1 = df[df['month'] < 8]

In [16]:
# Filter only data for the month of August
df2 = df[df['month'] >= 8]

In [17]:
df1 = df1.dropna()
df2 = df2.dropna()

In [18]:
# Linear regression
from sklearn.linear_model import LinearRegression
import numpy as np

In [19]:
X1 = df1['Actual irradiance'].values.reshape(-1, 1)
y1 = df1['Power'].values.reshape(-1, 1)

X2 = df2['Actual irradiance'].values.reshape(-1, 1)
y2 = df2['Power'].values.reshape(-1, 1)

In [20]:
reg1 = LinearRegression().fit(X1, y1)
y_pred1 = reg1.predict(X1)

reg2 = LinearRegression().fit(X2, y2)
y_pred2 = reg2.predict(X2)

In [21]:
df

Unnamed: 0,datetime,Actual irradiance,Power,month
0,2023-03-09 07:00:00,49.0,0.076115,3
1,2023-03-09 07:05:00,54.0,0.095170,3
2,2023-03-09 07:10:00,66.0,0.141439,3
3,2023-03-09 07:15:00,72.0,0.181216,3
4,2023-03-09 07:20:00,84.0,0.227722,3
...,...,...,...,...
35448,2023-12-31 16:40:00,98.0,0.105032,12
35449,2023-12-31 16:45:00,84.0,0.090773,12
35450,2023-12-31 16:50:00,79.0,0.095681,12
35451,2023-12-31 16:55:00,84.0,0.097808,12


In [22]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()
             
fig.add_trace(go.Scatter(x=df['Actual irradiance'], 
                         y=df['Power'], 
                         mode='markers',
                         name='Data points',
                         marker=dict(size=3,
                                     color=df['month'],
                                     colorscale='tealrose',  # Define colorscale for the color bar
                                     colorbar=dict(title='Month')),  # Add color bar with title
                         ))

fig.add_trace(go.Scatter(x=df1['Actual irradiance'], 
                         y=y_pred1.flatten(), 
                         mode='lines', 
                         name='Linear regression 1',
                         marker=dict(color='blue'),
                         line=dict(width=2)))

fig.add_trace(go.Scatter(x=df2['Actual irradiance'], 
                         y=y_pred2.flatten(), 
                         mode='lines', 
                         name='Linear regression 2',
                         marker=dict(color='blue'),
                         line=dict(width=2)))

fig.add_annotation(
    x=0.1,
    y=0.85,
    xref='paper',
    yref='paper',
    text='R-squared: 0.89',
    showarrow=False,
    font=dict(size=30),
    # move the annotation to the top left
    xshift=300,
    yshift=100
)

fig.add_annotation(
    x=0.1,
    y=0.1,
    xref='paper',
    yref='paper',
    text='R-squared: 0.88',
    showarrow=False,
    font=dict(size=30),
    # move the annotation to the top left
    xshift=300,
    yshift=100
)

fig.update_layout(title='Scatter Plot of Generation Power (kW) vs Actual irradiance',
                  legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01,
                              font=dict(size=18)),  # Set the font size here
                  width=800, 
                  height=800,
                  plot_bgcolor='white',
                  xaxis=dict(linecolor='black', showgrid=True, gridcolor='lightgrey'),
                  yaxis=dict(linecolor='black', showgrid=True, gridcolor='lightgrey'),
                  title_font=dict(size=24))

fig.show()
# fig.write_html('irradiance_to_pv_conversion.html')
# fig.write_image('irradiance_to_pv_conversion.pdf')

In [23]:
# Print result
print('Linear regression:')
print('Slope:', reg1.coef_[0][0])
print('Intercept:', reg1.intercept_[0])
print('Mean Absolute Error:', mean_absolute_error(y1, y_pred1))
print('Mean Squared Error:', mean_squared_error(y1, y_pred1))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y1, y_pred1)))
print('R2 score:', reg1.score(X1, y1))


Linear regression:
Slope: 0.00436929074130436
Intercept: 0.027723465462420105
Mean Absolute Error: 0.26038693982131705
Mean Squared Error: 0.16161092543264932
Root Mean Squared Error: 0.4020086136299188
R2 score: 0.8929240606452344


In [24]:
# Print result
print('Linear regression:')
print('Slope:', reg2.coef_[0][0])
print('Intercept:', reg2.intercept_[0])
print('Mean Absolute Error:', mean_absolute_error(y2, y_pred2))
print('Mean Squared Error:', mean_squared_error(y2, y_pred2))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y2, y_pred2)))
print('R2 score:', reg2.score(X2, y2))


Linear regression:
Slope: 0.0025168117331814737
Intercept: -0.04122701888731106
Mean Absolute Error: 0.14952539490352318
Mean Squared Error: 0.0518945360780341
Root Mean Squared Error: 0.2278037227045118
R2 score: 0.8838774339350601
