In [33]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from collections import Counter
from abc import ABC, abstractclassmethod, abstractmethod
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima_model import ARIMA
from sklearn import metrics

pd.set_option("display.max_rows",4999)

class FederalRates(ABC):
    
    def __init__(self, fundRate, inflation):
        try: 
            self.fundRate = fundRate
            self.inflation = inflation
            super(FederalRates, self).__init__()
        except AttributeError as e:
            print(e)
            
    @abstractmethod
    def predict(self):
        return self.__init__()   

    @classmethod
    def readData(cls, path):
        rawData = pd.read_csv(path)
        return (rawData)


class InterestRate(FederalRates):
    
    def filtered(self):
        rawInterestRate = self.fundRate["DATE"] > "2019-05-01"
        return self.fundRate[rawInterestRate].set_index("DATE")
        
    def StartARIMAForecasting(self, Actual, P, D, Q):
        """
            P — Auto regressive feature of the model
            D — Differencing order
            Q — Moving average feature of the model

        """
        model = ARIMA(Actual, order=(P, D, Q))
        model_fit = model.fit(disp=0)
        prediction = model_fit.forecast()[0]
        return prediction
    
    def testValues(self):
        values = sorted(self.predict()["FF"], reverse=True)
        freqTable = dict(Counter(values))
        pV = sum(values)/len(values)
        des = self.predict()["FF"].describe()
        print(des)
        #predict next value
        predicted = self.StartARIMAForecasting(values, 2,1,0)
        print('Predicted=%f' % (predicted))
        NumberOfElements = len(ActualData)
        
   def predict(self):
        interestRate = self.filtered()
        #Use 70% of data as training, rest 30% to Test model
        TrainingSize = int(NumberOfElements * 0.7)
        TrainingData = ActualData[0:TrainingSize]
        TestData = ActualData[TrainingSize:NumberOfElements]

        #new arrays to store actual and predictions
        Actual = [x for x in TrainingData]
        Predictions = list()


        #in a for loop, predict values using ARIMA model
        for timepoint in range(len(TestData)):
            ActualValue =  TestData[timepoint]
            #forcast value
            Prediction = StartARIMAForecasting(Actual, 3,1,0)    
            print('Actual=%f, Predicted=%f' % (ActualValue, Prediction))
            #add it in the list
            Predictions.append(Prediction)
            Actual.append(ActualValue)

        #Print MSE to see how good the model is
        Error = MeanSquaredError(TestData, Predictions)
        print('Test Mean Squared Error (smaller the better fit): %.3f' % Error)
        # plot
        pyplot.plot(TestData)
        pyplot.plot(Predictions, color='red')
        pyplot.show()
        return interestRate     

        




           

    
def main(rawInterestRateData, rawInflationRateData):
    interestRate = InterestRate(FederalRates.readData(rawInterestRateData), FederalRates.readData(rawInflationRateData))
    return interestRate.testValues()
 
if __name__ == "__main__":
    main("./FF.csv", "./index.csv")


count    29.000000
mean      2.130345
std       0.289772
min       1.550000
25%       1.850000
50%       2.130000
75%       2.390000
max       2.410000
Name: FF, dtype: float64
Predicted=1.517714
