# Data Analysis with Time Series Data
This notebook covers the data analysis of time series data using pandas.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
dataset_path = './opsd_germany_daily.csv'
opsd_daily = pd.read_csv(dataset_path)

# Display the shape and data types
print(opsd_daily.shape)
print(opsd_daily.dtypes)
opsd_daily.head(3)

In [None]:
# Set the 'Date' column as the index
opsd_daily = opsd_daily.set_index('Date')
opsd_daily.head(3)

In [None]:
# Reload dataset with Date as index and parse dates
opsd_daily = pd.read_csv(dataset_path, index_col=0, parse_dates=True)

# Add Year, Month, and Weekday columns
opsd_daily['Year'] = opsd_daily.index.year
opsd_daily['Month'] = opsd_daily.index.month
opsd_daily['Weekday Name'] = opsd_daily.index.day_name()

# Display a random sampling of 5 rows
opsd_daily.sample(5, random_state=0)

In [None]:
# Set up the plotting environment
sns.set(rc={'figure.figsize': (11, 4)})

# Plot the 'Consumption' column
opsd_daily['Consumption'].plot(linewidth=0.5)
plt.show()

In [None]:
# Boxplot to visualize seasonality in Consumption, Solar, and Wind
fig, axes = plt.subplots(3, 1, figsize=(11, 10), sharex=True)

for name, ax in zip(['Consumption', 'Solar', 'Wind'], axes):
    sns.boxplot(data=opsd_daily, x='Month', y=name, ax=ax)
    ax.set_ylabel('GWh')
    ax.set_title(name)

plt.show()

In [None]:
# Resample to weekly frequency, aggregating with mean
data_columns = ['Consumption', 'Wind', 'Solar', 'Wind+Solar']
opsd_weekly_mean = opsd_daily[data_columns].resample('W').mean()

# Plot the resampled data
fig, ax = plt.subplots()
ax.plot(opsd_daily.loc['2017-01':'2017-06', 'Solar'], marker='.', linestyle='-', linewidth=0.5, label='Daily')
ax.plot(opsd_weekly_mean.loc['2017-01':'2017-06', 'Solar'], marker='o', markersize=8, linestyle='-', label='Weekly Mean Resample')
ax.set_ylabel('Solar Production (GWh)')
ax.legend()
plt.show()

In [None]:
# Compute the centered 7-day rolling mean
opsd_7d = opsd_daily[data_columns].rolling(7, center=True).mean()

# Display the first 10 rows of the rolling mean
opsd_7d.head(10)

In [None]:
# Compute the 365-day rolling mean
opsd_365d = opsd_daily[data_columns].rolling(window=365, center=True, min_periods=360).mean()

# Plot the trends in electricity consumption
fig, ax = plt.subplots()
ax.plot(opsd_daily['Consumption'], marker='.', markersize=2, color='0.6', linestyle='None', label='Daily')
ax.plot(opsd_7d['Consumption'], linewidth=2, label='7-d Rolling Mean')
ax.plot(opsd_365d['Consumption'], color='0.2', linewidth=3, label='Trend (365-d Rolling Mean)')
ax.xaxis.set_major_locator(plt.MaxNLocator(6))
ax.legend()
ax.set_xlabel('Year')
ax.set_ylabel('Consumption (GWh)')
ax.set_title('Trends in Electricity Consumption')
plt.show()