In [61]:
# Importing packages
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import sklearn
from plotly.subplots import make_subplots

In [51]:
# Reading pvwatt data
df = pd.read_csv("pvwatts_hourly.csv", skiprows=30)
df.head(5)

Unnamed: 0,Month,Day,Hour,Beam Irradiance (W/m2),Diffuse Irradiance (W/m2),Ambient Temperature (C),Wind Speed (m/s),Albedo,Plane of Array Irradiance (kW/m2),Cell Temperature (C),DC Array Output (W),AC System Output (W)
0,1,1,0,0,0,16.0,0.5,0.27,0.0,16.0,0.0,0.0
1,1,1,1,0,0,15.6,2.1,0.27,0.0,15.6,0.0,0.0
2,1,1,2,0,0,15.1,2.1,0.27,0.0,15.1,0.0,0.0
3,1,1,3,0,0,14.8,2.1,0.27,0.0,14.8,0.0,0.0
4,1,1,4,0,0,14.4,1.0,0.27,0.0,14.4,0.0,0.0


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 12 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Month                              8760 non-null   int64  
 1   Day                                8760 non-null   int64  
 2   Hour                               8760 non-null   int64  
 3   Beam Irradiance (W/m2)             8760 non-null   int64  
 4   Diffuse Irradiance (W/m2)          8760 non-null   int64  
 5   Ambient Temperature (C)            8760 non-null   float64
 6   Wind Speed (m/s)                   8760 non-null   float64
 7   Albedo                             8760 non-null   float64
 8   Plane of Array Irradiance (kW/m2)  8760 non-null   float64
 9   Cell Temperature (C)               8760 non-null   float64
 10  DC Array Output (W)                8760 non-null   float64
 11  AC System Output (W)               8760 non-null   float

In [53]:
df.describe()

Unnamed: 0,Month,Day,Hour,Beam Irradiance (W/m2),Diffuse Irradiance (W/m2),Ambient Temperature (C),Wind Speed (m/s),Albedo,Plane of Array Irradiance (kW/m2),Cell Temperature (C),DC Array Output (W),AC System Output (W)
count,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0,8760.0
mean,6.526027,15.720548,11.5,261.977397,69.260731,27.135297,3.644521,0.241479,268.96017,31.379833,1684.112963,1608.241745
std,3.448048,8.796749,6.922582,343.125973,88.571262,7.394359,2.252985,0.025772,356.446872,12.609671,2216.293048,2125.999656
min,1.0,1.0,0.0,0.0,0.0,5.0,0.0,0.19,0.0,0.0,0.0,0.0
25%,4.0,8.0,5.75,0.0,0.0,21.5,2.1,0.21,0.0,22.39975,0.0,0.0
50%,7.0,16.0,11.5,0.0,10.0,27.0,3.1,0.25,9.248,29.805,35.86,2.8805
75%,10.0,23.0,17.25,605.0,132.0,32.5,5.1,0.26,582.44575,39.31425,3783.69525,3617.91575
max,12.0,31.0,23.0,975.0,538.0,47.0,24.2,0.27,1077.771,77.531,6897.414,6621.303


In [54]:
# Adding year column and datetime column
df['year'] = 2022
#df['datetime'] = pd.to_datetime(year = '2022', month = 'Month', day = 'Day', hour = 'Hour')
df['datetime'] = pd.to_datetime(df[['year', 'Month', 'Day', 'Hour']])
#df = df.set_index(['datetime'])
df.head(4)

Unnamed: 0,Month,Day,Hour,Beam Irradiance (W/m2),Diffuse Irradiance (W/m2),Ambient Temperature (C),Wind Speed (m/s),Albedo,Plane of Array Irradiance (kW/m2),Cell Temperature (C),DC Array Output (W),AC System Output (W),year,datetime
0,1,1,0,0,0,16.0,0.5,0.27,0.0,16.0,0.0,0.0,2022,2022-01-01 00:00:00
1,1,1,1,0,0,15.6,2.1,0.27,0.0,15.6,0.0,0.0,2022,2022-01-01 01:00:00
2,1,1,2,0,0,15.1,2.1,0.27,0.0,15.1,0.0,0.0,2022,2022-01-01 02:00:00
3,1,1,3,0,0,14.8,2.1,0.27,0.0,14.8,0.0,0.0,2022,2022-01-01 03:00:00


In [55]:
df.columns

Index(['Month', 'Day', 'Hour', 'Beam Irradiance (W/m2)',
       'Diffuse Irradiance (W/m2)', 'Ambient Temperature (C)',
       'Wind Speed (m/s)', 'Albedo', 'Plane of Array Irradiance (kW/m2)',
       'Cell Temperature (C)', 'DC Array Output (W)', 'AC System Output (W)',
       'year', 'datetime'],
      dtype='object')

In [59]:
var_cals_list = ['Beam Irradiance (W/m2)',
       'Diffuse Irradiance (W/m2)', 'Ambient Temperature (C)',
       'Wind Speed (m/s)', 'Albedo', 'Plane of Array Irradiance (kW/m2)',
       'Cell Temperature (C)', 'DC Array Output (W)', 'AC System Output (W)']
for col in var_cals_list:
    fig = px.line(df, x = 'datetime', y = col, title= str(col) + "days")
    fig.show()

## Hourly Analysis

In [60]:
# create scatter plot with subplots using Plotly Express
for col in var_cals_list :
    fig = px.line(df, x='datetime', y=col, color='Hour', facet_col='Hour', title='Subplots of '+str(col)+ ' Grouped Data')
    fig
    # show plot
    fig.show()

In [72]:
fig = make_subplots(rows=24, cols=1, horizontal_spacing=0.1, vertical_spacing=0.04)

for i, group in enumerate(df['Hour'].unique()):
    group_data = df[df['Hour'] == group]
    scatter = go.Scatter(x=group_data['datetime'], y=group_data['DC Array Output (W)'], mode='markers')
    fig.add_trace(scatter, row=i+1, col=1)

fig.update_layout(title='Subplots of Grouped Data')
fig.show()

In [80]:
for hr, df_hr in enumerate(df.groupby(['Hour'])):
        #fig = px.line(df, x='datetime', y=col, color='Hour', facet_col='Hour', title='Subplots of '+str(col)+ ' Grouped Data')
    fig = px.line(df_hr, x='datetime', y = 'DC Array Output (W)',color='Hour', title = str('DC Array Output (W)') + "VS Datetime")
    fig.show()





ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of [0] but received: datetime

In [65]:
df['Hour'].unique()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [82]:
for hr, df_hr in enumerate(df.groupby(['Hour'])):
    print(hr)
    print(df_hr.columns)

0






AttributeError: 'tuple' object has no attribute 'columns'