#### A quick introduction to linear regression using Python.

### Packages

In [None]:
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
import seaborn as sns

### Data

Simulate some data for this exercise

In [None]:
# Set-up
n_samples = 20
np.random.seed(314)

def true_fun(X):
    return -np.sin(2.2 * np.pi * X)

In [None]:
# Generate data
x = np.sort(np.random.rand(n_samples))

y = true_fun(x) + np.random.randn(n_samples) * 0.1

In [None]:
# Plot the data
sns.set(style="darkgrid")
plt.figure().set_size_inches(12, 9)

plt.scatter(x, y, color='royalblue', s=100)

plt.xlabel('x', fontsize=14)
plt.ylabel('y', fontsize=14)
plt.show()

### Correlation coefficient

In [None]:
# Calculate the correlation coefficient using numpy
corr_coef = np.corrcoef(x, y)[1][0]

print (corr_coef)

Is this the same as the sumproduct of the standarized x's and y's?

In [None]:
# Standardize x and y using numpy
x_std = (x - np.mean(x)) / np.std(x)
y_std = (y - np.mean(y)) / np.std(y)

# Take the sum product of standardized x and y, and divide by sample size
print (np.dot(x_std, y_std) / n_samples)

### Linear Regression

In [None]:
# Create linear regression object
linreg = linear_model.LinearRegression()

In [None]:
# Fit the model
linreg.fit(x[:, np.newaxis], y)

In [None]:
# Plot the data with the regression line
sns.set(style="darkgrid")
plt.figure().set_size_inches(12, 9)

plt.scatter(x, y,  color='royalblue', s=100)

plt.plot(x, linreg.predict(x[:, np.newaxis]), color='indianred', linewidth=3)

plt.text(.28, 0, f'y = {linreg.intercept_:.2f} + {linreg.coef_[0]:.2f}*x', 
         fontsize=18, color='indianred', weight='semibold')

plt.xlabel('x', fontsize=14)
plt.ylabel('y', fontsize=14)
plt.show()

### Regression Slope

Calculate the regression slope by using the correlation coefficient

In [None]:
reg_slope = corr_coef * np.std(y) / np.std(x)

print (reg_slope)