In [6]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import statsmodels.api as sm
import matplotlib.pyplot as plt
from datetime import datetime, date
import datetime
import psycopg2
#import MySQLdb
import pycountry
import pandas.io.sql as psql
from pandas import Series, DataFrame, Panel


In [11]:
import pymysql
MySQLdb=pymysql.install_as_MySQLdb()

In [14]:
from preprocessing import preprocess
import os
from os.path import isfile, join
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot

def parser(x):
	return datetime.strptime(x, '%Y-%m-%d')

# Get the data.
filename='433440-99999-merge.csv'

#filename='india_all_stations_comma.txt'
series = read_csv( filename, header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
    
series=series.dropna(axis=0)
#series = series.iloc[:1000,]  # independent variables
print(series.head())
series.describe()

YEARMODA
2000-01-01    76.0
2000-01-02    77.1
2000-01-03    76.0
2000-01-04    77.3
2000-01-05    76.9
Name: TEMP, dtype: float64


count    6201.000000
mean       84.337687
std         4.764565
min        69.000000
25%        80.200000
50%        85.000000
75%        88.300000
max        95.100000
Name: TEMP, dtype: float64

In [27]:

class Mixed_model:
    

    def __init__(self, stTrain, endTrain, stPred, endPred, target_country, baseline_country, order,
                 seasonal_order=None, paid=True, baseorg=True, view=True, rank=True):
        
        self.stTrain = stTrain
        self.endTrain = endTrain
        self.stPred = stPred
        self.endPred = endPred
        self.target = target_country
        self.baseline = baseline_country
        self.order = order
        self.seasonal_order = seasonal_order
        self.paid = paid
        self.baseorg = baseorg
        self.view = view
        self.rank = rank
        stT = date(int(self.stTrain.split('-')[0]), int(self.stTrain.split('-')[1]), int(self.stTrain.split('-')[2]))
        endT = date(int(self.endTrain.split('-')[0]), int(self.endTrain.split('-')[1]), int(self.endTrain.split('-')[2]))
        stP = date(int(self.stPred.split('-')[0]), int(self.stPred.split('-')[1]), int(self.stPred.split('-')[2]))
        self.endP = date(int(self.endPred.split('-')[0]), int(self.endPred.split('-')[1]), int(self.endPred.split('-')[2]))
        self.period = (self.endP - stT).days + 1
        self.pred_period = (self.endP - stP).days + 1
        
        
    def source_data(self):
        
        # Get the data.
        filename='433440-99999-merge.csv'

        #filename='india_all_stations_comma.txt'
        series = read_csv( filename, header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
    
        series=series.dropna(axis=0)
        
        return (series, series)
        
    def fit_model(self):
        
        endog, exog = self.source_data()
        if self.seasonal_order != None:
            mod = sm.tsa.statespace.SARIMAX(endog.ix[self.stTrain:self.endTrain], exog = exog.ix[self.stTrain:self.endTrain],
                                            order=self.order, trend = 'c', seasonal_order = self.seasonal_order, enforce_stationarity=False,
                                            enforce_invertibility=False)
        else:
            mod = sm.tsa.statespace.SARIMAX(endog.ix[self.stTrain:self.endTrain], exog = exog.ix[self.stTrain:self.endTrain],
                                            order=self.order, trend = 'c', enforce_stationarity=False, enforce_invertibility=False)
                                            
        fit_res = mod.fit(trend='c', disp=False,transparams=True)
        
        
        if self.seasonal_order != None:
            mod = sm.tsa.statespace.SARIMAX(endog, exog = exog, order=self.order, trend = 'c', 
                                            seasonal_order = self.seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
        else:
            mod = sm.tsa.statespace.SARIMAX(endog, exog = exog, order=self.order, trend = 'c', enforce_stationarity=False, enforce_invertibility=False)
            
        mod.update(fit_res.params)
        res = mod.filter(fit_res.params)
        
        return (res, fit_res)
        
    def predict_ts(self):
        
        res, fit_res = self.fit_model()
        predict_res = res.get_prediction(full_results=True)#res.predict(full_results=True)
        predict = predict_res.predicted_mean#forecasts
        idx = res.data.predict_dates._mpl_repr()        
        predict_dy_res = res.predict(dynamic=self.period-self.pred_period-1, full_results=True)
        predict_dy = predict_dy_res.predicted_mean#forecasts
        cov = predict_res.forecasts_error_cov
        
        # 95% confidence intervals
        critical_value = norm.ppf(1 - 0.05 / 2.)
        std_errors = np.sqrt(cov.diagonal().T)
        ci = np.c_[
            (predict - critical_value*std_errors)[:, :, None],
            (predict + critical_value*std_errors)[:, :, None],
        ]
                
        # Dynamic predictions
        cov_dy = predict_dy_res.forecasts_error_cov
        
        # 95% confidence intervals
        critical_value = norm.ppf(1 - 0.05 / 2.)
        std_errors_dy = np.sqrt(cov_dy.diagonal().T)
        ci_dy = np.c_[
            (predict_dy - critical_value*std_errors_dy)[:, :, None],
            (predict_dy + critical_value*std_errors_dy)[:, :, None],
        ]        
        
        return (ci, predict, idx, ci_dy, predict_dy)
        
    def plot_ts(self):
        
        ci, predict, idx, ci_dy, predict_dy = self.predict_ts()
        endog, exog = self.source_data()

        fig, ax = plt.subplots(figsize=(11,7))
        npre = 7
        plt.title('%s Organic MMM' % self.target, fontsize=18)
        plt.ylabel('Organic Installs', fontsize=14)
        plt.xlabel('Date', fontsize=14)
        dates = pd.date_range(self.stTrain, self.endPred, freq='D')
        ax.plot(dates, endog[-self.period:], color="#3F5D7D",linewidth=2, label='Observed')
        
        
        ax.plot(idx[-self.pred_period-npre:], predict_dy[0, -self.pred_period-npre:], 'y',linewidth=1.5, label='Dynamic forecast')
        ax.plot(idx[-self.pred_period-npre:], ci_dy[0, -self.pred_period-npre:], 'y--', alpha=0.3)
        

        ax.spines["top"].set_visible(False)  
        ax.spines["right"].set_visible(False) 
        ax.get_xaxis().tick_bottom()  
        ax.get_yaxis().tick_left()  
        
        legend = ax.legend(loc='upper left',fontsize=12)
        legend.get_frame().set_facecolor('w')
        
    def mape(self):
        
        res, fit_res = self.fit_model()
        endog, exog = self.source_data()
        ci, predict, idx, ci_dy, predict_dy = self.predict_ts()
        print ("Model Params:")
        print (fit_res.params)
        print ('MAPE =', np.mean(np.abs((endog.iloc[-self.pred_period:] - predict_dy[0, -self.pred_period:])
                                        / endog.iloc[-self.pred_period:])) * 100)
  

In [28]:
m = Mixed_model(stTrain='2015-5-15', endTrain='2015-6-28', stPred='2015-6-29', endPred='2015-7-5', target_country='US',
                baseline_country='CA', order=(1,0,1), seasonal_order=None, paid=True, baseorg=True, view=False, rank=False)
                 
m.plot_ts()
m.mape()
#endog, exog= m.source_data()

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


AttributeError: 'Series' object has no attribute 'predicted_mean'