![Callysto.ca Banner](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-top.jpg?raw=true)

In [None]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from datetime import datetime


In [None]:
solar_data = pd.read_csv("https://raw.githubusercontent.com/callysto/data-files/main/data-viz-of-the-week/solar-energy/solar_energy.csv")
solar_data

In [None]:
null_count = solar_data.isnull().sum()
print(null_count)

In [None]:
names = solar_data['name'].unique()
addresses = solar_data['address'].unique()

for i in range(len(names)):
    print(names[i], '(' + addresses[i] + ')')

In [None]:
counts_of_names = solar_data.name.value_counts()
counts_of_names

In [None]:
# convert to datetime object
solar_data['date'] = pd.to_datetime(solar_data['date'])
solar_data['date'] = solar_data['date'].dt.date

# find the average and median kWh produced by each solar panel station
avg_solar_df = solar_data.groupby(['name', 'date'])['kWh'].agg(['mean', 'median']).reset_index()

# rename columns to average_kWh and median_kWh
avg_solar_df = avg_solar_df.rename(columns={'mean': 'average_kWh', 'median': 'median_kWh'})

avg_solar_df

In [None]:
station1 = 'Southland Leisure Centre'
station2 = 'Whitehorn Multi-Service Centre'

station_fig1 = make_subplots(rows=1, cols=2, subplot_titles=(f'Mean kWh produced by {station1}', f'Median kWh produced by {station1}'))
station_fig1.add_trace(
    go.Scatter(
        x=avg_solar_df[avg_solar_df['name'] == station1]['date'],
        y=avg_solar_df[avg_solar_df['name'] == station1]['average_kWh'],
        name=f'Mean kWh produced by {station1}'
    ),
    row=1, col=1
)

station_fig1.add_trace(
    go.Scatter(
        x=avg_solar_df[avg_solar_df['name'] == station1]['date'],
        y=avg_solar_df[avg_solar_df['name'] == station1]['median_kWh'],
        name=f'Median kWh produced by {station1}'
    ),
    row=1, col=2   
)

station_fig2 = make_subplots(rows=1, cols=2, subplot_titles=(f'Mean kWh produced by {station2}', f'Median kWh produced by {station2}'))

station_fig2.add_trace(
    go.Scatter(
        x=avg_solar_df[avg_solar_df['name'] == station2]['date'],
        y=avg_solar_df[avg_solar_df['name'] == station2]['average_kWh'],
        name=f'Mean kWh produced by {station2}'
    ),
    row=1, col=1
)

station_fig2.add_trace(
    go.Scatter(
        x=avg_solar_df[avg_solar_df['name'] == station2]['date'],
        y=avg_solar_df[avg_solar_df['name'] == station2]['median_kWh'],
        name=f'Median kWh produced by {station2}'
    ),
    row=1, col=2
)

station_fig1.show()
station_fig2.show()

In [None]:
solar_data['date'] = pd.to_datetime(solar_data['date'])
solar_data['year'] = solar_data['date'].dt.year

mean_kWh_per_year = solar_data.groupby('year')['kWh'].mean().reset_index()

mean_kWh_per_year

In [None]:
solar_data['installationDate'] = pd.to_datetime(solar_data['installationDate'])

mean_avg_kWh = solar_data.groupby(['name', 'installationDate'])['kWh'].agg(['mean']).reset_index()

fig = px.scatter(mean_avg_kWh, x='installationDate', y='mean', title='Installation Date vs Average kWh',
             text='mean', hover_data=['installationDate'], color='name', labels={'mean': 'Avg kWh'})

fig.update_traces(texttemplate='%{text:.2f}')
fig.update_layout(
    xaxis_title='Solar Panel Station',
    yaxis_title='Average kWh',
)

fig.show()