# Introduction

# Research Question

# Literature Review

# Methods

$$r_{it} = B_1 T_{it} + I_{it} + P_t + M_t + e_i$$

# Data Collection

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
import pandas as pd
import statsmodels.formula.api as sm

## Macroeconomic Factors

In [11]:
macro = pd.read_csv('macro.csv')
macro.head()

Unnamed: 0,Date,equal_equity,bill_1m,bill_30y,consump,inflation,low_bond,oil,produc,sp500
0,2009,34.0,0.11,4.25,10202.2,1.640043,6.04,79.48,94.2238,47.22
1,2010,34.725072,0.04,3.91,10689.3,3.156842,5.66,94.88,97.1387,26.45
2,2011,35.756063,0.07,2.92,11050.6,2.069337,4.94,94.05,100.0,4.95
3,2012,59.440359,0.05,3.45,11361.2,1.464833,5.1,97.98,101.9753,22.25
4,2013,117.523283,0.03,3.34,11863.7,1.622223,4.85,93.17,105.1294,39.72


## Industry Returns

In [3]:
industry_data = pd.read_csv('bloomberg_data.csv')
industry_data.head()

Unnamed: 0,Date,Industry,Return
0,2008,comm,-41.26
1,2009,comm,73.76
2,2010,comm,33.61
3,2011,comm,11.61
4,2012,comm,36.26


## VC Activity

In [4]:
vc_data = pd.read_csv('vc_data_new.csv')
vc_data.head()

Unnamed: 0,Date,Industry,total,avg
0,2009,comm,13092.02,15.05
1,2010,comm,8252.16,14.66
2,2011,comm,8202.69,16.24
3,2012,comm,10708.05,21.33
4,2013,comm,19560.26,37.91


# Data Wrangling

In [5]:
data_temp = pd.merge(vc_data, macro, on='Date')
data = pd.merge(data_temp, industry_data, on=('Date', 'Industry'))
data.head()

Unnamed: 0,Date,Industry,total,avg,equal_equity,bill_1m,bill_30y,consump,inflation,low_bond,oil,produc,sp500,Return
0,2009,comm,13092.02,15.05,34.0,0.11,4.25,10202.2,1.640043,6.04,79.48,94.2238,47.22,73.76
1,2009,consumer,8232.73,15.56,34.0,0.11,4.25,10202.2,1.640043,6.04,79.48,94.2238,47.22,41.225
2,2009,energy,5382.27,13.26,34.0,0.11,4.25,10202.2,1.640043,6.04,79.48,94.2238,47.22,54.47
3,2009,tech,39075.09,9.085117,34.0,0.11,4.25,10202.2,1.640043,6.04,79.48,94.2238,47.22,80.55
4,2009,health,11745.67,10.422067,34.0,0.11,4.25,10202.2,1.640043,6.04,79.48,94.2238,47.22,38.53


In [6]:
r = data['Return']

total = data['total']
avg = data['avg']

inflation = data['inflation']
consump = data['consump']
produc = data['produc']
oil = data['oil']
bill_1m = data['bill_1m']
bill_30y = data['bill_30y']
low_bond = data['low_bond']
equal_equity = data['equal_equity']
sp500 = data['sp500']

# Summary Statistics

## VC Activity

In [7]:
vc_data.describe()

Unnamed: 0,Date,total,avg
count,40.0,40.0,40.0
mean,2012.5,15786.5705,18.129323
std,2.320477,8999.206916,8.580243
min,2009.0,3670.2,7.267166
25%,2010.75,8714.1875,11.248385
50%,2012.5,13011.7,15.552434
75%,2014.25,20949.505,23.2675
max,2016.0,39075.09,41.96


## Industry Returns

In [8]:
industry_data.describe()

Unnamed: 0,Date,Return
count,45.0,45.0
mean,2012.0,17.282333
std,2.611165,27.715829
min,2008.0,-42.56
25%,2010.0,5.22
50%,2012.0,18.51
75%,2014.0,34.24
max,2016.0,80.55


## Macroeconomic Factors

In [9]:
macro.describe()

Unnamed: 0,Date,equal_equity,bill_1m,bill_30y,consump,inflation,low_bond,oil,produc,sp500
count,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0
mean,2012.5,94.024398,0.18,3.27375,11714.175,1.541686,5.09375,75.28875,101.38735,21.965
std,2.44949,59.621841,0.280153,0.57587,1092.179388,0.870647,0.517547,23.66646,3.994098,15.713838
min,2009.0,34.0,0.03,2.59,10202.2,0.118627,4.44,43.29,94.2238,1.8
25%,2010.75,35.498315,0.04,2.8775,10960.275,1.196187,4.8175,50.265,99.284675,12.8325
50%,2012.5,88.481821,0.06,3.13,11612.45,1.543528,4.97,86.325,102.54055,20.06
75%,2014.25,144.411788,0.145,3.565,12454.4,1.747367,5.24,94.2575,104.57035,29.7675
max,2016.0,179.101266,0.85,4.25,13393.4,3.156842,6.04,97.98,105.1418,47.22


# Regression

In [10]:
reg = sm.ols(formula="r ~ total + avg + inflation + consump + produc + oil + bill_1m + bill_30y + low_bond + equal_equity + sp500", data=data).fit()
print(reg.summary())

                            OLS Regression Results                            
Dep. Variable:                      r   R-squared:                       0.797
Model:                            OLS   Adj. R-squared:                  0.736
Method:                 Least Squares   F-statistic:                     13.11
Date:                Thu, 29 Mar 2018   Prob (F-statistic):           3.50e-08
Time:                        19:17:27   Log-Likelihood:                -146.90
No. Observations:                  40   AIC:                             313.8
Df Residuals:                      30   BIC:                             330.7
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept        0.9795      0.804      1.218   

# Results

# Conclusion