### SECTION 1

This section contains the forecasting functions.

In [None]:
### SIMPLE MOVING AVERAGE ###

In [1]:
def SMA(uploadedFile, windowVal):
    if not type(windowVal) is int:
        raise TypeError("Only integers are allowed")
    elif windowVal<=0:
        raise TypeError("Only integers greater than zero are allowed")
    else:
        import numpy as np
        import pandas as pd 
        from dateutil.relativedelta import relativedelta
        
        df_uploaded = uploadedFile
        df_uploaded[['Invoice Date','Paid Date']] = df_uploaded[['Invoice Date','Paid Date']].apply(pd.to_datetime, format='%m/%d/%Y') #for now let's assume user uploads in m/d/Y format

        calcDate = []
        cashInflow = []
        custList = []

        for cust in df_uploaded['Customer'].drop_duplicates():
            df_uploaded_cust = df_uploaded[df_uploaded['Customer']==cust].copy()

            i = min(df_uploaded_cust['Invoice Date'])
            i = i + relativedelta(day=31)

            while i <= max(df_uploaded_cust['Invoice Date']) + relativedelta(day=31):
                ### Calculating Payments Recieved in the month at the end of which AR aging calculations are made.### 
                j = i - relativedelta(months=+1)
                j = j - relativedelta(day=31)
                temp = df_uploaded_cust.loc[(df_uploaded_cust['Paid Date']<=i) & (df_uploaded_cust['Paid Date']>j) ].copy() ## dataframe of all payments made in month of calcDate

                calcDate.append(i)
                cashInflow.append(sum(temp['Amount']))
                custList.append(cust)

                i = i + relativedelta(months=+1)
                i = i + relativedelta(day=31)

        df = pd.DataFrame()
        df['Customer'] = custList
        df['calcDate'] = calcDate
        df['actualCashInflows'] = cashInflow
        df = df.groupby('calcDate').sum().reset_index()
        SMA = df.actualCashInflows.rolling(window=windowVal).mean()
        df['SMA_forecast'] = SMA
        
        return(np.round(df,2))

In [2]:
### EXPONENTIAL SMOOTHING ###

In [3]:
def EMA(uploadedFile, smoothVal):
    if not 0< smoothVal <1:
        raise TypeError("Only values between 0 and 1 are allowed")
    else:
        import numpy as np
        import pandas as pd 
        from dateutil.relativedelta import relativedelta

        df_uploaded = uploadedFile
        df_uploaded[['Invoice Date','Paid Date']] = df_uploaded[['Invoice Date','Paid Date']].apply(pd.to_datetime, format='%m/%d/%Y') #for now let's assume user uploads in m/d/Y format

        calcDate = []
        cashInflow = []
        custList = []

        for cust in df_uploaded['Customer'].drop_duplicates():
            df_uploaded_cust = df_uploaded[df_uploaded['Customer']==cust].copy()

            i = min(df_uploaded_cust['Invoice Date'])
            i = i + relativedelta(day=31)

            while i <= max(df_uploaded_cust['Invoice Date']) + relativedelta(day=31):
                ### Calculating Payments Recieved in the month at the end of which AR aging calculations are made.### 
                j = i - relativedelta(months=+1)
                j = j - relativedelta(day=31)
                temp = df_uploaded_cust.loc[(df_uploaded_cust['Paid Date']<=i) & (df_uploaded_cust['Paid Date']>j) ].copy() ## dataframe of all payments made in month of calcDate

                calcDate.append(i)
                cashInflow.append(sum(temp['Amount']))
                custList.append(cust)

                i = i + relativedelta(months=+1)
                i = i + relativedelta(day=31)

        df = pd.DataFrame()
        df['Customer'] = custList
        df['calcDate'] = calcDate
        df['actualCashInflows'] = cashInflow
        df = df.groupby('calcDate').sum().reset_index()
        ES = df.ewm(alpha=smoothVal).mean()
        df['ES_forecast'] = ES
        
        return(np.round(df,2))

In [4]:
### CORCORAN METHOD ###

In [5]:
def corcoran(uploadedFile,alphaVal):
    if not 0< alphaVal <1:
        raise TypeError("alpha can only take values between 0 and 1")
    else:    
        import pandas as pd
        import numpy as np
        import datetime
        from datetime import timedelta as td
        import calendar
        from dateutil.relativedelta import relativedelta
        import math

        df = uploadedFile.copy()
        df[['Invoice Date','Paid Date']] = df[['Invoice Date','Paid Date']].apply(pd.to_datetime, format='%m/%d/%Y')
        df['Due Date'] = df['Invoice Date'] + td(days=30)

        def ARaging(ARlog, calcDate):
            ################################################
            '''Function takes the ARlog of a customer and returns AR aging report as it would have looked on calcDate'''

            ARaged = ARlog.loc[(ARlog['Invoice Date']<=calcDate) & (ARlog['Paid Date']>calcDate)].copy() # all invoices issued before date "calcDate" and not paid until date "calcDate". 
            ARaged.loc[:,'Days Remaining']=(ARaged['Due Date'] - calcDate).dt.days

            # create a list of our conditions
            conditions = [
                (ARaged.loc[:,'Days Remaining'] < 0) & (ARaged.loc[:,'Days Remaining'] >= -30),
                (ARaged.loc[:,'Days Remaining'] < -30) & (ARaged.loc[:,'Days Remaining'] >= -60),
                (ARaged.loc[:,'Days Remaining'] < -60) & (ARaged.loc[:,'Days Remaining'] >= -90),
                (ARaged.loc[:,'Days Remaining'] < -90) & (ARaged.loc[:,'Days Remaining'] >= -120),
                (ARaged.loc[:,'Days Remaining'] < -120), # if payment is overdue 120 days then it is put under Bad Debt
                (ARaged.loc[:,'Days Remaining'] >= 0) & (ARaged.loc[:,'Days Remaining'] <= 31), # represents current invoices. Invoices that are not due yet
                ]

            # list of the values we want to assign for each condition
            values = ['1','2','3','4','5','0']

            # create a new column and assign values to it using the lists "conditions" and "values" as arguments
            ARaged.loc[:,'status'] = np.select(conditions, values)
            ARaged = ARaged.groupby(by=['status']).sum().reset_index()
            del(ARaged['Days Remaining'])

            missing = list(set(['0','1','2','3','4','5']) - set(ARaged.status.drop_duplicates()))
            missing = [int(ele) for ele in missing]; missing.sort()
            missing = [list(ele) for ele in zip(missing,[0]*len(missing))] 
            missing = pd.DataFrame(missing,columns=['status','Amount'])

            ARaged = ARaged.append(missing)
            ARaged.loc[:,'status'] = ARaged['status'].astype(int) 
            ARaged = ARaged.sort_values(by=['status']).reset_index(); del(ARaged['index']); del(ARaged['status'])
            ARaged = ARaged.T
            ARaged['calcDate'] = calcDate
            return(ARaged)

        def reporting(df_uploaded):
            GrandARaging = pd.DataFrame([])
            for cust in df_uploaded['Customer'].drop_duplicates():
                df_uploaded_cust = df_uploaded[df_uploaded['Customer']==cust].copy()

                i = min(df_uploaded_cust['Invoice Date'])
                i = i + relativedelta(day=31)

                while i <= max(df_uploaded_cust['Invoice Date']) + relativedelta(day=31):

                    temp1 = ARaging(df_uploaded_cust,i); temp1['customer'] = cust

                    ### Calculating Payments Recieved in the month at the end of which AR aging calculations are made.### 
                    j = i - relativedelta(months=+1)
                    j = j - relativedelta(day=31)
                    temp2 = df_uploaded_cust.loc[(df_uploaded_cust['Paid Date']<=i) & (df_uploaded_cust['Paid Date']>j) ].copy() ## dataframe of all payments made in month of calcDate
                    temp1['ActualCashFlowIn'] = sum(temp2['Amount'])
                    i = i + relativedelta(months=+1)
                    i = i + relativedelta(day=31)
                    #####################################################################################################

                    GrandARaging = GrandARaging.append(temp1)
                    del(temp1); del(temp2)

            GrandARaging = GrandARaging.reset_index()
            del(GrandARaging['index'])
            return(GrandARaging)


        def calcProbs(ARagingReport):
            s1ToP = []; s2ToP = [];s3ToP = []; s4ToP = []
            s0ToP = (ARagingReport[0] - ARagingReport[1].shift(-1))/ARagingReport[0]
            s1ToP = (ARagingReport[1] - ARagingReport[2].shift(-1))/ARagingReport[1]
            s2ToP = (ARagingReport[2] - ARagingReport[3].shift(-1))/ARagingReport[2]
            s3ToP = (ARagingReport[3] - ARagingReport[4].shift(-1))/ARagingReport[3]
            s4ToP = (ARagingReport[4] - ARagingReport[5].shift(-1))/ARagingReport[4]
            return(s0ToP,s1ToP,s2ToP,s3ToP,s4ToP)

        def stochForecast(df_uploaded):
            GrandARaging = reporting(df)
            aggGrandARaging = GrandARaging.groupby('calcDate').sum()
            # s1ToP, s2ToP,..,s4ToP are equaivalent to T1, T2,...,T4 in Corcoran
            s0ToP,s1ToP, s2ToP, s3ToP, s4ToP = calcProbs(aggGrandARaging)
            aggGrandARaging['s0ToP'] = s0ToP.shift(1)
            aggGrandARaging['s1ToP'] = s1ToP.shift(1)
            aggGrandARaging['s2ToP'] = s2ToP.shift(1)
            aggGrandARaging['s3ToP'] = s3ToP.shift(1)
            aggGrandARaging['s4ToP'] = s4ToP.shift(1)
            aggGrandARaging = aggGrandARaging.reset_index()
            # We assume stage 5 is bad payments. I.e., once account payable goes to stage 5, no payment is coming back 
            # Here, we are calculating exponential forecasts of transition probabilities
            aggGrandARaging['expProb_s0ToP'] = aggGrandARaging['s0ToP'].ewm(alpha=alphaVal, adjust=False).mean()
            aggGrandARaging['expProb_s1ToP'] = aggGrandARaging['s1ToP'].ewm(alpha=alphaVal, adjust=False).mean()
            aggGrandARaging['expProb_s2ToP'] = aggGrandARaging['s2ToP'].ewm(alpha=alphaVal, adjust=False).mean()
            aggGrandARaging['expProb_s3ToP'] = aggGrandARaging['s3ToP'].ewm(alpha=alphaVal, adjust=False).mean()
            aggGrandARaging['expProb_s4ToP'] = aggGrandARaging['s4ToP'].ewm(alpha=alphaVal, adjust=False).mean()
            aggGrandARaging['expProb_s5ToP'] = 0*len(aggGrandARaging)
            # Here we are using exponential smoothing to forecast the amount of payments that would go from stage i to P in time period t
            aggGrandARaging['fore0'] = aggGrandARaging[0]*aggGrandARaging['expProb_s0ToP']
            aggGrandARaging['fore1'] = aggGrandARaging[1]*aggGrandARaging['expProb_s1ToP']
            aggGrandARaging['fore2'] = aggGrandARaging[2]*aggGrandARaging['expProb_s2ToP']
            aggGrandARaging['fore3'] = aggGrandARaging[3]*aggGrandARaging['expProb_s3ToP']
            aggGrandARaging['fore4'] = aggGrandARaging[4]*aggGrandARaging['expProb_s4ToP']
            aggGrandARaging['fore5'] = aggGrandARaging[5]*aggGrandARaging['expProb_s5ToP']
            aggGrandARaging['stochasticForecast'] = aggGrandARaging['fore0']+aggGrandARaging['fore1']+aggGrandARaging['fore2']+aggGrandARaging['fore3']+aggGrandARaging['fore4']+aggGrandARaging['fore5']
            return(aggGrandARaging)
        
        result_corcoran = stochForecast(df)
        result = result_corcoran[['calcDate','ActualCashFlowIn','stochasticForecast']]
        result = np.round(result,2)
        return(result)

In [6]:
### PATE-CORNELL METHOD ###

In [7]:
def pate(uploadedFile):
    import pandas as pd
    import numpy as np
    import datetime
    from datetime import timedelta as td
    import calendar
    from dateutil.relativedelta import relativedelta

    df = uploadedFile.copy()
    df[['Invoice Date','Paid Date']] = df[['Invoice Date','Paid Date']].apply(pd.to_datetime, format='%m/%d/%Y')
    df['Due Date'] = df['Invoice Date'] + td(days=30)

    def shapeOfYou(df_on_calcDate):
        PLT = df_on_calcDate['Paid Date'] - df_on_calcDate['Invoice Date']
        xbar = PLT.mean().days
        gamma = PLT.min().days
        shape = np.round(2*(xbar-gamma)/(np.pi)**0.5,4)
        return(shape,gamma)

     ## Calculate payment probability
    def calcPayProb(df_monthly,shapeVal,gammaVal):
        probPaid = []
        for j in range(0,len(df_monthly)):
            if df_monthly[j:j+1]['Invoice Sent days ago'].dt.days.item()>= gammaVal:
                pp = np.round(1 - np.exp(-(2*(df_monthly['Invoice Sent days ago'][j:j+1].dt.days.item()-gammaVal)*df_monthly['delta t'][j:j+1].item()+df_monthly['delta t'][j:j+1].item()**2)/shapeVal**2),4)
            elif (df_monthly[j:j+1]['Invoice Sent days ago'].dt.days.item()< gammaVal) and (df_monthly[j:j+1]['Invoice Sent days ago'].dt.days.item() + df_monthly[j:j+1]['delta t'].item()>= gammaVal):
                pp = np.round(1 - np.exp(-((df_monthly['Invoice Sent days ago'][j:j+1].dt.days.item() + df_monthly['delta t'][j:j+1].item()-gammaVal)**2)/shapeVal**2),4)
            elif df_monthly[j:j+1]['Invoice Sent days ago'].dt.days.item() + df_monthly[j:j+1]['delta t'].item()<= gammaVal:
                pp = 0 
            probPaid.append(pp)
        return(probPaid)

    def bayForecast(df_uploaded):
        custList = []
        calcDateList = []
        forecastList = []
        actualCashflowIn = []
        bayesianForecast = pd.DataFrame([])
        for cust in df_uploaded['Customer'].drop_duplicates():
            df_uploaded_cust = df_uploaded[df_uploaded['Customer']==cust].copy()

            i = min(df_uploaded_cust['Invoice Date'])
            i = i + relativedelta(day=31)

            while i < max(df_uploaded_cust['Invoice Date']):
                i = i + relativedelta(months=+1)
                i = i + relativedelta(day=31)
                temp = df_uploaded_cust.loc[(df_uploaded_cust['Invoice Date']<i)].copy()
                shape,gamma = shapeOfYou(temp)

                temp1 = temp[temp['Paid Date']>i].copy(); temp1.reset_index(drop=True) #Keep only the invoices not cleared till calcDate, i. Forecast will be made only for these invoices
                temp1['Invoice Sent days ago'] = i-temp1['Invoice Date']
                temp1['delta t'] = [30]*len(temp1)

                custList.append(cust)
                calcDateList.append(i)
                forecastList.append(sum(temp1['Amount']*calcPayProb(temp1,shape,gamma))) #forecast of dollars coming in
        uploadedFile['month_year'] = pd.to_datetime(uploadedFile['Paid Date']).dt.to_period('M')
        inflows = uploadedFile.groupby('month_year').sum().reset_index(drop=True)
        
        bayesianForecast['Customer'] = custList
        bayesianForecast['calcDate'] = calcDateList
        bayesianForecast['bayForecast'] = forecastList
        bayesianForecast = bayesianForecast.groupby('calcDate').sum().reset_index()
        bayesianForecast['ActualCashflowIn'] = inflows
        return(bayesianForecast)

    result = bayForecast(df)
    result = np.round(result.dropna(),2)
    return(result)

In [8]:
### SFA METHOD ###

In [9]:
def SFA(uploadedFile,alphaVal,betaVal):
    if not 0< alphaVal <1:
        raise TypeError("alpha can only take values between 0 and 1")
    elif not 0< betaVal <1:
        raise TypeError("beta can only take values between 0 and 1")
    else:    
        import pandas as pd
        import numpy as np
        import datetime
        from datetime import timedelta as td
        import calendar
        from dateutil.relativedelta import relativedelta

        df = uploadedFile.copy()
        df[['Invoice Date','Paid Date']] = df[['Invoice Date','Paid Date']].apply(pd.to_datetime, format='%m/%d/%Y')
        df['Due Date'] = df['Invoice Date'] + td(days=30)

        
        result_corcoran = corcoran(df,alphaVal)[['calcDate','stochasticForecast']]
        result_pate = pate(df)
        result = result_pate.merge(result_corcoran,on='calcDate')
        
        # The next 4 lines do the following: If stochasticForecast is NaN, SFA_Forecast = bayForecast. Otherwise, SFA_forecast = convex combination of the 2 forecasts
        beta = betaVal
        mask = ~result['stochasticForecast'].isna()
        result['SFA_Forecast'] = result['bayForecast']
        result.loc[mask,'SFA_Forecast'] = beta*result['stochasticForecast'] + (1-beta)*result['bayForecast']
    
        result = np.round(result,2)
        return(result)

In [10]:
### SVR Method ###

In [11]:
def SVR_method(uploadedFile):
    import pandas as pd
    import numpy as np
    import datetime
    from datetime import timedelta as td
    import calendar
    from dateutil.relativedelta import relativedelta
    import math
    #from sklearn.preprocessing import StandardScaler
    from sklearn.svm import SVR

    df = uploadedFile.copy()
    df[['Invoice Date','Paid Date']] = df[['Invoice Date','Paid Date']].apply(pd.to_datetime, format='%m/%d/%Y')
    df['Due Date'] = df['Invoice Date'] + td(days=30)

    def ARaging(ARlog, calcDate):
        ################################################
        '''Function takes the ARlog of a customer and returns AR aging report as it would have looked on calcDate'''

        ARaged = ARlog.loc[(ARlog['Invoice Date']<=calcDate) & (ARlog['Paid Date']>calcDate)].copy() # all invoices issued before date "calcDate" and not paid until date "calcDate". 
        ARaged.loc[:,'Days Remaining']=(ARaged['Due Date'] - calcDate).dt.days

        # create a list of our conditions
        conditions = [
            (ARaged.loc[:,'Days Remaining'] < 0) & (ARaged.loc[:,'Days Remaining'] >= -30),
            (ARaged.loc[:,'Days Remaining'] < -30) & (ARaged.loc[:,'Days Remaining'] >= -60),
            (ARaged.loc[:,'Days Remaining'] < -60) & (ARaged.loc[:,'Days Remaining'] >= -90),
            (ARaged.loc[:,'Days Remaining'] < -90) & (ARaged.loc[:,'Days Remaining'] >= -120),
            (ARaged.loc[:,'Days Remaining'] < -120), # if payment is overdue 120 days then it is put under Bad Debt
            (ARaged.loc[:,'Days Remaining'] >= 0) & (ARaged.loc[:,'Days Remaining'] <= 31), # represents current invoices. Invoices that are not due yet
            ]

        # list of the values we want to assign for each condition
        values = ['1','2','3','4','5','0']

        # create a new column and assign values to it using the lists "conditions" and "values" as arguments
        ARaged.loc[:,'status'] = np.select(conditions, values)
        ARaged = ARaged.groupby(by=['status']).sum().reset_index()
        del(ARaged['Days Remaining'])

        missing = list(set(['0','1','2','3','4','5']) - set(ARaged.status.drop_duplicates()))
        missing = [int(ele) for ele in missing]; missing.sort()
        missing = [list(ele) for ele in zip(missing,[0]*len(missing))] 
        missing = pd.DataFrame(missing,columns=['status','Amount'])

        ARaged = ARaged.append(missing)
        ARaged.loc[:,'status'] = ARaged['status'].astype(int) 
        ARaged = ARaged.sort_values(by=['status']).reset_index(); del(ARaged['index']); del(ARaged['status'])
        ARaged = ARaged.T
        ARaged['calcDate'] = calcDate
        return(ARaged)

    def reporting(df_uploaded):
        GrandARaging = pd.DataFrame([])
        for cust in df_uploaded['Customer'].drop_duplicates():
            df_uploaded_cust = df_uploaded[df_uploaded['Customer']==cust].copy()

            i = min(df_uploaded_cust['Invoice Date'])
            i = i + relativedelta(day=31)

            while i <= max(df_uploaded_cust['Invoice Date']) + relativedelta(day=31):

                temp1 = ARaging(df_uploaded_cust,i); temp1['customer'] = cust

                ### Calculating Payments Recieved in the month at the end of which AR aging calculations are made.### 
                j = i - relativedelta(months=+1)
                j = j - relativedelta(day=31)
                temp2 = df_uploaded_cust.loc[(df_uploaded_cust['Paid Date']<=i) & (df_uploaded_cust['Paid Date']>j) ].copy() ## dataframe of all payments made in month of calcDate
                temp1['ActualCashFlowIn'] = sum(temp2['Amount'])
                i = i + relativedelta(months=+1)
                i = i + relativedelta(day=31)
                #####################################################################################################

                GrandARaging = GrandARaging.append(temp1)
                del(temp1); del(temp2)

        GrandARaging = GrandARaging.reset_index()
        del(GrandARaging['index'])
        return(GrandARaging)


    def calcProbs(ARagingReport):
        s1ToP = []; s2ToP = [];s3ToP = []; s4ToP = []
        s0ToP = (ARagingReport[0] - ARagingReport[1].shift(-1))/ARagingReport[0]
        s1ToP = (ARagingReport[1] - ARagingReport[2].shift(-1))/ARagingReport[1]
        s2ToP = (ARagingReport[2] - ARagingReport[3].shift(-1))/ARagingReport[2]
        s3ToP = (ARagingReport[3] - ARagingReport[4].shift(-1))/ARagingReport[3]
        s4ToP = (ARagingReport[4] - ARagingReport[5].shift(-1))/ARagingReport[4]
        return(s0ToP,s1ToP,s2ToP,s3ToP,s4ToP)
    
    def svrForecast(df_uploaded):
        GrandARaging = reporting(df)
        aggGrandARaging = GrandARaging.groupby('calcDate').sum()
        # s1ToP, s2ToP,..,s4ToP are equaivalent to T1, T2,...,T4 in Corcoran
        s0ToP,s1ToP, s2ToP, s3ToP, s4ToP = calcProbs(aggGrandARaging)
        aggGrandARaging['s0ToP'] = s0ToP.shift(1)
        aggGrandARaging['s1ToP'] = s1ToP.shift(1)
        aggGrandARaging['s2ToP'] = s2ToP.shift(1)
        aggGrandARaging['s3ToP'] = s3ToP.shift(1)
        aggGrandARaging['s4ToP'] = s4ToP.shift(1)
        aggGrandARaging = aggGrandARaging.reset_index()
        # We assume stage 5 is bad payments. I.e., once account payable goes to stage 5, no payment is coming back 
        # Here, we are calculating exponential forecasts of transition probabilities
              
        aggGrandARaging['time_index'] = range(1,len(aggGrandARaging)+1)
        aggGrandARaging = aggGrandARaging.replace(np.nan, 0)
        aggGrandARaging = aggGrandARaging.replace(-np.inf, 0)
        aggGrandARaging = aggGrandARaging.replace(np.inf, 0)

        s0ToP = aggGrandARaging.iloc[:,8:9].values.astype(float)
        s1ToP = aggGrandARaging.iloc[:,9:10].values.astype(float)
        s2ToP = aggGrandARaging.iloc[:,10:11].values.astype(float)
        s3ToP = aggGrandARaging.iloc[:,11:12].values.astype(float)
        s4ToP = aggGrandARaging.iloc[:,12:13].values.astype(float)
        time_index = aggGrandARaging.iloc[:,13:14].values.astype(float)

        pred_s0ToP = []; pred_s1ToP = []; pred_s2ToP = []; pred_s3ToP = []; pred_s4ToP = []
        for time in range(1,len(aggGrandARaging)+1):
            regressor = SVR(kernel='rbf')
            regressor.fit(time_index[:time],s0ToP[:time].ravel())
            pred_s0ToP.append(regressor.predict(np.array([[time+1]])).item())

            regressor = SVR(kernel='rbf')
            regressor.fit(time_index[:time],s1ToP[:time].ravel())
            pred_s1ToP.append(regressor.predict(np.array([[time+1]])).item())

            regressor = SVR(kernel='rbf')
            regressor.fit(time_index[:time],s2ToP[:time].ravel())
            pred_s2ToP.append(regressor.predict(np.array([[time+1]])).item())

            regressor = SVR(kernel='rbf')
            regressor.fit(time_index[:time],s3ToP[:time].ravel())
            pred_s3ToP.append(regressor.predict(np.array([[time+1]])).item())

            regressor = SVR(kernel='rbf')
            regressor.fit(time_index[:time],s4ToP[:time].ravel())
            pred_s4ToP.append(regressor.predict(np.array([[time+1]])).item())

        pred_s0ToP = [0 if i < 0 else i for i in pred_s0ToP]
        pred_s1ToP = [0 if i < 0 else i for i in pred_s1ToP]
        pred_s2ToP = [0 if i < 0 else i for i in pred_s2ToP]
        pred_s3ToP = [0 if i < 0 else i for i in pred_s3ToP]
        pred_s4ToP = [0 if i < 0 else i for i in pred_s4ToP]

        aggGrandARaging['s0ToP'] = pred_s0ToP
        aggGrandARaging['s1ToP'] = pred_s1ToP
        aggGrandARaging['s2ToP'] = pred_s2ToP
        aggGrandARaging['s3ToP'] = pred_s3ToP
        aggGrandARaging['s4ToP'] = pred_s4ToP
        aggGrandARaging['s5ToP'] = [0]*len(aggGrandARaging)
        del(aggGrandARaging['time_index'])
        
        # Here we are using exponential smoothing to forecast the amount of payments that would go from stage i to P in time period t
        aggGrandARaging['fore0'] = aggGrandARaging[0]*aggGrandARaging['s0ToP']
        aggGrandARaging['fore1'] = aggGrandARaging[1]*aggGrandARaging['s1ToP']
        aggGrandARaging['fore2'] = aggGrandARaging[2]*aggGrandARaging['s2ToP']
        aggGrandARaging['fore3'] = aggGrandARaging[3]*aggGrandARaging['s3ToP']
        aggGrandARaging['fore4'] = aggGrandARaging[4]*aggGrandARaging['s4ToP']
        aggGrandARaging['fore5'] = aggGrandARaging[5]*aggGrandARaging['s5ToP']
        aggGrandARaging['svrForecast'] = aggGrandARaging['fore0']+aggGrandARaging['fore1']+aggGrandARaging['fore2']+aggGrandARaging['fore3']+aggGrandARaging['fore4']+aggGrandARaging['fore5']
        return(aggGrandARaging)
    
    result_svr = svrForecast(df)
    result_svr = result_svr[['calcDate','ActualCashFlowIn','svrForecast']]
    result = np.round(result_svr,2)
    
    return(result)

### SECTION 2
This section shows how the forecasting functions are to be called. 

In [12]:
### SAMPLE CALCULATIONS ###

In [13]:
# "uploadedFile" is the file uploaded by user. 
# All other variables/arguements in functions are user defined.

import pandas as pd 
uploadedFile = pd.read_csv('template_test.csv')

In [14]:
### Simple Moving Average (SMA) Method ####

# For users, "windowVal" will be called "Moving Window Size"

# RESULTS:
# The returned file is a dataframe. 
# The "calcDate" column represents the date on which forecast is calculated. 
# The "ActualCashInflows" column reports the actual cash inflow for each month. E.g., in the dataframe returned below, row indexed 1 reports the actual cash inflows and 
# cash inflow forecasts calculated on calcDate = 2012-05-31. Note, the forecast in row i is the forecast for the i+1th row. Consider row indexed 2, calcDate = 2012-06-30, 
# actualCashInflows = 31718.359856, SMA_forecast = 13279.198451. This means actual cash that flowed into the firm in the month June-2012 (06-2012) is 31718.359856, and the 
# forecast for next month (July-2012, 07-2012) is 13279.198451.
windowVal = 3
SMA(uploadedFile,windowVal)

Unnamed: 0,calcDate,actualCashInflows,SMA_forecast
0,2012-04-30,0.0,
1,2012-05-31,8119.24,
2,2012-06-30,31718.36,13279.2
3,2012-07-31,21744.89,20527.49
4,2012-08-31,71524.52,41662.59
5,2012-09-30,30554.26,41274.56
6,2012-10-31,39161.62,47080.13
7,2012-11-30,56439.72,42051.87
8,2012-12-31,31078.1,42226.48
9,2013-01-31,33003.56,40173.79


In [15]:
### Exponential Smoothing Method ###

# For users, "smoothVal" will be called "Smoothing Constant"
smoothVal = 0.5
EMA(uploadedFile,smoothVal)

Unnamed: 0,calcDate,actualCashInflows,ES_forecast
0,2012-04-30,0.0,0.0
1,2012-05-31,8119.24,5412.82
2,2012-06-30,31718.36,20444.56
3,2012-07-31,21744.89,21138.07
4,2012-08-31,71524.52,47143.98
5,2012-09-30,30554.26,38717.45
6,2012-10-31,39161.62,38941.29
7,2012-11-30,56439.72,47724.81
8,2012-12-31,31078.1,39385.17
9,2013-01-31,33003.56,36191.25


In [16]:
### Corcoran Method ###

# For users, "alpha" will be called "Smoothing Constant"
alpha = 0.4
corcoran(uploadedFile,alpha)

Unnamed: 0,calcDate,ActualCashFlowIn,stochasticForecast
0,2012-04-30,0.0,
1,2012-05-31,8119.24,
2,2012-06-30,31718.36,
3,2012-07-31,21744.89,
4,2012-08-31,71524.52,
5,2012-09-30,30554.26,30931.85
6,2012-10-31,39161.62,37710.25
7,2012-11-30,56439.72,33391.41
8,2012-12-31,31078.1,35423.35
9,2013-01-31,33003.56,33636.03


In [17]:
### Pate-Cornell Methos ###

pate(uploadedFile)

Unnamed: 0,calcDate,bayForecast,ActualCashflowIn
0,2012-05-31,17068.85,8119.24
1,2012-06-30,30921.91,31718.36
2,2012-07-31,64359.98,21744.89
3,2012-08-31,38317.5,71524.52
4,2012-09-30,37331.02,30554.26
5,2012-10-31,41751.75,39161.62
6,2012-11-30,30878.17,56439.72
7,2012-12-31,33248.39,31078.1
8,2013-01-31,36792.87,33003.56
9,2013-02-28,28096.37,39240.24


In [18]:
### Stochastic Financial Analytics (SFA) Method ###

# For users, "alpha" will be called "Smoothing Constant"
# For users, "beta" will be called "Weighting Parameter"
alpha = 0.4
beta = 0.5
SFA(uploadedFile,alpha,beta)

Unnamed: 0,calcDate,bayForecast,ActualCashflowIn,stochasticForecast,SFA_Forecast
0,2012-05-31,17068.85,8119.24,,17068.85
1,2012-06-30,30921.91,31718.36,,30921.91
2,2012-07-31,64359.98,21744.89,,64359.98
3,2012-08-31,38317.5,71524.52,,38317.5
4,2012-09-30,37331.02,30554.26,30931.85,34131.44
5,2012-10-31,41751.75,39161.62,37710.25,39731.0
6,2012-11-30,30878.17,56439.72,33391.41,32134.79
7,2012-12-31,33248.39,31078.1,35423.35,34335.87
8,2013-01-31,36792.87,33003.56,33636.03,35214.45
9,2013-02-28,28096.37,39240.24,28815.23,28455.8


In [19]:
### SUpport Vector Regression Method ###
SVR_method(uploadedFile)

Unnamed: 0,calcDate,ActualCashFlowIn,svrForecast
0,2012-04-30,0.0,0.0
1,2012-05-31,8119.24,10742.4
2,2012-06-30,31718.36,29686.81
3,2012-07-31,21744.89,17704.76
4,2012-08-31,71524.52,38333.76
5,2012-09-30,30554.26,17442.93
6,2012-10-31,39161.62,30828.39
7,2012-11-30,56439.72,36511.34
8,2012-12-31,31078.1,30089.07
9,2013-01-31,33003.56,26161.58
