In [3]:
# LIBRARIES
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

In [4]:
# DATA
LL = pd.read_stata('data\LL_train.dta')
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 10)

SE = pd.read_stata('data\self_employment.dta')
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 10)

In [5]:
# MANIPULATION

## Create income per million variable
## # data['newvar'] = data.oldvar / 1000000
LL['income_mill'] = LL.income_month/1000000

## Create new quadratic variable
## data['newvar'] = data.oldvar ** 2
SE['agesqr'] = SE.age ** 2

In [None]:
# OLS REGRESSION

## Y = income_month
### OLS Regression
ols1 = smf.ols('income_month ~ age + agesqr + female', data=LL).fit()

### OLS Regression with quadratic variable (assuming agesqr is not exist)
ols2 = smf.ols('income_month ~ age + I(age**2) + female', data=LL).fit()


## Y = income_mill
#### OLS Regression
ols3 = smf.ols('income_mill ~ age + agesqr + female', data=LL).fit()

#### OLS Regression with quadratic variable (assuming agesqr is not exist)
ols4 = smf.ols('income_mill ~ age + I(age**2) + female', data=LL).fit()

In [None]:
# RESULT 
## [income_month]
print(ols1.summary())
print(ols2.summary())

## [income_mill]
print(ols3.summary())
print(ols4.summary())

In [None]:
# DUMMY REGRESSION
## Probit Regression
probit1 = smf.probit('selfemployed ~ age + agesqr + female', data=SE).fit()

## Logit Regression
logit1 = smf.logit('selfemployed ~ age + agesqr + female', data=SE).fit()

In [None]:
# RESULT
## Probit
print(probit1.summary())

## Logit
print(logit1.summary())

In [None]:
## Marginal effect [Probit]
## ref: https://www.statsmodels.org/dev/generated/statsmodels.discrete.discrete_model.LogitResults.get_margeff.html#statsmodels.discrete.discrete_model.LogitResults.get_margeff
probit1.get_margeff(at='mean', method='dydx', atexog=None, dummy=True, count=False).summary()

In [None]:
## Marginal effect [Logit]
logit1.get_margeff(at='mean', method='dydx', atexog=None, dummy=True, count=False).summary()