## Effect of Inflation and Deficits on Interest Rates

In [24]:

import math
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import statsmodels.stats.api as sms
from statsmodels.stats.outliers_influence import OLSInfluence
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.compat import lzip
import statsmodels.formula.api as smf
from IPython.display import Image
import plotly
import plotly.graph_objs as go
import matplotlib.pyplot as plt


In [25]:
def inspect_col_names(xls_name):
    #print data dictionary after file is loaded
    f = open('../../data/{0}.des'.format(xls_name), 'r')
    file_contents = f.read()
    print(file_contents)
    return None



In [26]:
inspect_col_names(xls_name='INTDEF')

INTDEF.DES

year      i3        inf       rec       out       def       i3_1      inf_1    
def_1     ci3       cinf      cdef      y77

  Obs:    49

  1. year                     1948-1996
  2. i3                       3 mo. T bill rate
  3. inf                      CPI inflation rate
  4. rec                      federal receipts, % GDP
  5. out                      federal outlays, % GDP
  6. def                      out - rec (deficit as % GDP)
  7. i3_1                     i3[t-1]
  8. inf_1                    inf[t-1]
  9. def_1                    def[t-1]
 10. ci3                      i3 - i3_1
 11. cinf                     inf - inf_1
 12. cdef                     def - def_1
 13. y77                      =1 year >= 1977; change in FY




In [27]:

def rename_cols_and_save(xls_name):
    df = pd.read_excel("../../data/{0}.xls".format(xls_name), index_col=None, header=None)
    if xls_name == 'hprice1':
        names_dict = {0:'price', 
                     1:'assess', 
                     2:'bdrms',
                     3:'lotsize', 
                     4:'sqrft', 
                     5:'colonial',
                     6:'lprice', 
                     7:'lassess', 
                     8:'llotsize',
                     9:'lsqrft', 
                    }
    elif xls_name == 'saving':
         names_dict = {0:'sav', 
                     1:'inc', 
                     2:'size',
                     3:'edu', 
                     4:'age', 
                     5:'black',
                     6:'cons', 
                      }
    elif xls_name == '401k':    
        names_dict = {0:'prate', 
                     1:'mrate', 
                     2:'totpart',
                     3:'totelg', 
                     4:'age', 
                     5:'totemp',
                     6:'sole', 
                     7:'ltotemp', 
                      }
        
    elif xls_name == '401ksubs': 
        names_dict = {0:'e401k', 
                     1:'inc', 
                     2:'marr',
                     3:'male', 
                     4:'age', 
                     5:'fsize',
                     6:'nettfa', 
                     7:'p401k',
                     8:'pira',
                     9:'incsq', 
                     10:'agesq',
                      }
    elif xls_name == 'INTDEF':
             names_dict = {0:'year', 
                     1:'i3', 
                     2:'inf',
                     3:'rec', 
                     4:'out', 
                     5:'def',
                     6:'i3_1', 
                     7:'inf_1',
                     8:'def_1',
                     9:'ci3', 
                     10:'cinf',
                     11:'cdef', 
                     12:'y77',
                      }
        
        
        
    df.rename(columns = names_dict, inplace = True)
    df.to_csv("../../data/{0}.csv".format(xls_name), index=False)
    
    
    return df

In [30]:
df = rename_cols_and_save(xls_name='INTDEF')
df.head()

Unnamed: 0,year,i3,inf,rec,out,def,i3_1,inf_1,def_1,ci3,cinf,cdef,y77
0,1948,1.04,8.1,16.4,11.7,-4.7,.,.,.,.,.,.,0
1,1949,1.1,-1.2,14.6,14.4,-0.200001,1.04,8.1,-4.7,0.0600001,-9.3,4.5,0
2,1950,1.22,1.3,14.5,15.6,1.1,1.1,-1.2,-0.200001,0.12,2.5,1.3,0
3,1951,1.55,7.9,16.1,14.2,-1.900001,1.22,1.3,1.1,0.33,6.6,-3,0
4,1952,1.77,1.9,18.9,19.4,0.5,1.55,7.9,-1.9,0.22,-6,2.4,0


In [31]:
class EDA(object):
    
    def __init__(self, df, y):
        self.df = df
        self.y_string = y
        self.y = df[y]
        return None
    
    
    def inspect(self):
        inspect = {}
        inspect['head'] = self.df.head()
        inspect['describe'] = self.df.describe().T
        inspect['dtypes'] = self.df.dtypes
        return inspect
        
        return inspect 
    
    def _dist_plot(self, df, var):
        plt.figure()
        sns_plot = sns.distplot(df[var], color='b').get_figure()
        sns_plot.savefig("dist_plot_{0}.png".format(var))
        return sns_plot
    
    
    def variation(self):
        numerical = self.df.select_dtypes(include=np.number)
        for col in numerical.columns: 
            self._dist_plot(df=numerical, var=col)   
        return None
    
    
    def _scatter_matrix(self):
        numerical = self.df.select_dtypes(include=np.number)
        plt.figure()
        sns_plot = sns.pairplot(numerical)
        sns_plot.savefig("scatter_matrix_plot.png".format())
        return None
    
    def _box_plot(self, var_x):
        plt.figure()
        sns_plot = sns.boxplot(x=var_x, y=self.y, data=self.df).get_figure()
        sns_plot.savefig("box_plot_{0}_{1}.png".format(var_x, self.y_string))
        return None
    
    def _scatter_plot(self, var_x):
        sns_plot = sns.lmplot(x=var_x, y=self.y_string, data=self.df)
        sns_plot.savefig("correlation_{0}_{1}.png".format(var_x, self.y_string))
        return None
    
    def covariation(self):
        self._scatter_matrix()
        
        categorical = self.df.select_dtypes(include=['bool', 'category']) 
        for col in categorical.columns:
            self._box_plot(var_x = col)
         
        numerical = self.df.select_dtypes(include=np.number)
        for col in numerical.columns:
            self._scatter_plot(var_x=col)
        return None
    
    def run(self):
        self.variation()
        self.covariation()
        return None
    
    pass

In [32]:
my_eda = EDA(df, y='i3')

In [33]:
my_eda.inspect()

{'head':    year    i3  inf   rec   out       def  i3_1 inf_1     def_1        ci3  \
 0  1948  1.04  8.1  16.4  11.7 -4.700000     .     .         .          .   
 1  1949  1.10 -1.2  14.6  14.4 -0.200001  1.04   8.1      -4.7  0.0600001   
 2  1950  1.22  1.3  14.5  15.6  1.100000   1.1  -1.2 -0.200001       0.12   
 3  1951  1.55  7.9  16.1  14.2 -1.900001  1.22   1.3       1.1       0.33   
 4  1952  1.77  1.9  18.9  19.4  0.500000  1.55   7.9      -1.9       0.22   
 
   cinf cdef  y77  
 0    .    .    0  
 1 -9.3  4.5    0  
 2  2.5  1.3    0  
 3  6.6   -3    0  
 4   -6  2.4    0  ,
 'describe':       count         mean        std      min      25%          50%      75%  \
 year   49.0  1972.000000  14.288690  1948.00  1960.00  1972.000000  1984.00   
 i3     49.0     5.068980   2.965661     0.95     2.93     4.880000     6.69   
 inf    49.0     4.108163   3.182821    -1.20     1.70     3.200000     5.70   
 rec    49.0    17.838776   1.058855    14.50    17.50    17.800000  