## Multiple Linear Regression 
### life expectancy

In [None]:
# Import packages
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

# The fundamental data type of pandas is the data frame

In [None]:
# Get the data and view it
data = pd.read_csv('gapminder.csv')
data.head()

In [None]:
data.shape

In [None]:
data.columns

## Task: Find out how life expectancy in Asia depends on age5_surviving, babies_per_woman, population, gdp_per_day.

In [None]:
# filter the data for Asia
Asia = data.loc[data['region'] == 'Asia']

### Build the multiple linear regression model 

In [None]:
# define the variables

# dependent variable (output) - y - life_expectancy 
y = Asia['life_expectancy']

# independent variables (inputs) - x - age5_surviving, babies_per_woman, population, gdp_per_day
x = Asia[['age5_surviving', 'babies_per_woman', 'population', 'gdp_per_day']]
# to x add a column of ones
x = sm.add_constant(x)

In [None]:
y.head()

In [None]:
x.head()

In [None]:
# Create and fit the model
# NOTE: the first parameter is y, the second is x
model = sm.OLS(y,x)

In [None]:
# Get the results
results = model.fit()
print(results.summary())

#### Population has a high p-value: 0.087 > 0.05. It is not statistically significant and has to be dropped from the model.

In [None]:
# remove population from the model
x.drop('population',axis=1,inplace=True)
x.head()

In [None]:
# Create and fit the model
# NOTE: the first parameter is y, the second is x
model = sm.OLS(y,x)

In [None]:
# Get the results
results = model.fit()
print(results.summary())

In [None]:
# model coefficients
results.params

In [None]:
# prediction (in sample)
yp = results.fittedvalues
print('predicted y:',yp,sep='\n')

In [None]:
# prediction (in sample) another way
yp = results.predict(x)
print('predicted y:',yp,sep='\n')

In [None]:
# price of a 120 year-old clock with 10 bidders
clock_age = 120
clock_bidders = 10
clock_price = results.params[0] + results.params[1]*clock_age + results.params[2]*clock_bidders
clock_price