In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pybats.shared import load_us_inflation_forecasts
from pybats.define_models import define_dglm
from pybats.analysis import analysis
from pybats.latent_factor import dlm_coef_lf
from pybats.point_forecast import mean
from pybats.loss_functions import MAPE
from pybats.plot import plot_corr, plot_data_forecast

In [2]:
data = pd.read_csv('out_clean.csv')
data

Unnamed: 0,Date,Total Generation (KWH),Insolation,Grid Failure,Inverter,Module Cleaning,Rainy day,No Module Cleaning,Transformer replacement and maintenance,Cable and Fuse maintenance,Plant Shutdown,Internet,Battery,Cloudy,Module Cleaning by rains,NO-OFF-CLEAN MODULES
0,2012-07-06,3218.00,9.37536,1,0,0,0,0,0,0,0,0,0,0,0,0.0
1,2012-07-07,3633.00,9.37536,1,0,0,0,0,0,0,0,0,0,0,0,0.0
2,2012-07-08,4377.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0,0.0
3,2012-07-09,4625.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0,0.0
4,2012-07-10,648.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2365,2018-12-27,4129.29,5.74700,1,0,1,0,0,0,0,0,0,0,0,0,971.0
2366,2018-12-28,4266.50,5.68900,0,0,1,0,0,0,0,0,0,0,0,0,0.0
2367,2018-12-29,3732.03,5.83400,1,0,1,0,0,0,0,0,0,0,0,0,720.0
2368,2018-12-30,4063.88,5.58400,0,1,0,0,0,0,0,0,0,0,0,0,0.0


In [3]:
data.columns

Index(['Date', 'Total Generation (KWH)', 'Insolation', 'Grid Failure',
       'Inverter', 'Module Cleaning', 'Rainy day', 'No Module Cleaning',
       'Transformer replacement and maintenance', 'Cable and Fuse maintenance',
       'Plant Shutdown', 'Internet', 'Battery', 'Cloudy',
       'Module Cleaning by rains', 'NO-OFF-CLEAN MODULES'],
      dtype='object')

In [4]:

data.mean(axis=0)

  data.mean(axis=0)


Total Generation (KWH)                      3968.274734
Insolation                                     9.375360
Grid Failure                                   0.362025
Inverter                                       0.102532
Module Cleaning                                0.336287
Rainy day                                      0.018565
No Module Cleaning                             0.036287
Transformer replacement and maintenance        0.008017
Cable and Fuse maintenance                     0.027848
Plant Shutdown                                 0.000422
Internet                                       0.025316
Battery                                        0.011392
Cloudy                                         0.169198
Module Cleaning by rains                       0.000000
NO-OFF-CLEAN MODULES                       11330.055696
dtype: float64

In [5]:
data = pd.DataFrame(data)
data

Unnamed: 0,Date,Total Generation (KWH),Insolation,Grid Failure,Inverter,Module Cleaning,Rainy day,No Module Cleaning,Transformer replacement and maintenance,Cable and Fuse maintenance,Plant Shutdown,Internet,Battery,Cloudy,Module Cleaning by rains,NO-OFF-CLEAN MODULES
0,2012-07-06,3218.00,9.37536,1,0,0,0,0,0,0,0,0,0,0,0,0.0
1,2012-07-07,3633.00,9.37536,1,0,0,0,0,0,0,0,0,0,0,0,0.0
2,2012-07-08,4377.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0,0.0
3,2012-07-09,4625.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0,0.0
4,2012-07-10,648.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2365,2018-12-27,4129.29,5.74700,1,0,1,0,0,0,0,0,0,0,0,0,971.0
2366,2018-12-28,4266.50,5.68900,0,0,1,0,0,0,0,0,0,0,0,0,0.0
2367,2018-12-29,3732.03,5.83400,1,0,1,0,0,0,0,0,0,0,0,0,720.0
2368,2018-12-30,4063.88,5.58400,0,1,0,0,0,0,0,0,0,0,0,0,0.0


In [6]:
dat = data.drop(['NO-OFF-CLEAN MODULES'], axis=1)
dat

Unnamed: 0,Date,Total Generation (KWH),Insolation,Grid Failure,Inverter,Module Cleaning,Rainy day,No Module Cleaning,Transformer replacement and maintenance,Cable and Fuse maintenance,Plant Shutdown,Internet,Battery,Cloudy,Module Cleaning by rains
0,2012-07-06,3218.00,9.37536,1,0,0,0,0,0,0,0,0,0,0,0
1,2012-07-07,3633.00,9.37536,1,0,0,0,0,0,0,0,0,0,0,0
2,2012-07-08,4377.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0
3,2012-07-09,4625.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0
4,2012-07-10,648.00,9.37536,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2365,2018-12-27,4129.29,5.74700,1,0,1,0,0,0,0,0,0,0,0,0
2366,2018-12-28,4266.50,5.68900,0,0,1,0,0,0,0,0,0,0,0,0
2367,2018-12-29,3732.03,5.83400,1,0,1,0,0,0,0,0,0,0,0,0
2368,2018-12-30,4063.88,5.58400,0,1,0,0,0,0,0,0,0,0,0,0


In [7]:
dat.mean(axis=0)

  dat.mean(axis=0)


Total Generation (KWH)                     3968.274734
Insolation                                    9.375360
Grid Failure                                  0.362025
Inverter                                      0.102532
Module Cleaning                               0.336287
Rainy day                                     0.018565
No Module Cleaning                            0.036287
Transformer replacement and maintenance       0.008017
Cable and Fuse maintenance                    0.027848
Plant Shutdown                                0.000422
Internet                                      0.025316
Battery                                       0.011392
Cloudy                                        0.169198
Module Cleaning by rains                      0.000000
dtype: float64

In [8]:
def load_us_inflation_forecasts():


    data = pd.read_csv('out_clean.csv')
    Date = data.values[:,0]
    agent_mean = pd.read_csv('out_clean.csv')
    agent_mean.columns = ['Date']
    agent_mean.set_index('Date', inplace=True)

    agent_var = pd.read_csv('out_clean.csv').values
    agent_dof = pd.read_csv('out_clean.csv').values
    agent_var[:,1:] = agent_var[:,1:] * agent_dof[:,1:] / (agent_dof[:,1:]-2) # Adjust the agent variance for d.o.f. b/c they're t-distributed
    agent_var = pd.DataFrame(agent_var)
    agent_dof.div(agent_var.where(agent_var != 0, np.nan))
    # agent_var.columns = ['Date']
    agent_var.set_index('Date', inplace=True)

    Date = pd.date_range('2012-07-06', '2018-12-31', freq='3M')
   
    # data= 
    return data

In [None]:
data = load_us_inflation_forecasts()
dates = data['Data']
dates

In [9]:
from pybats.latent_factor import latent_factor

In [11]:
k = 1 #Forecast horizon: 1-quarter ahead
forecast_start = 50
forecast_end = 149
agents=[1,2,3,4]
nagents = len(agents)
Y = data['Total Generation (KWH)']

lf = latent_factor(
    mean = {d:m.astype('float64') for d, m in zip(dates, list(data['model_mean'].values))},
    var={d:np.diag(v).astype('float64') for d, v in zip(dates, list(data['model_var'].values))},
    forecast_mean={d:[m.astype('float64')] for d, m in zip(dates, list(data['model_mean'].values))},
    forecast_var={d:[np.diag(v).astype('float64')] for d,v in zip(dates, list(data['model_var'].values))},
    forecast_dates=dates,
    p = nagents,
    k = k)

NameError: name 'dates' is not defined