<a href="https://colab.research.google.com/github/ionut-banu/lr-stock-prices/blob/main/lr_stock_price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [None]:
from pandas_datareader import data
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
import matplotlib.dates as mdates
import pandas as pd
import numpy as np

# Utils

In [None]:
def predict(X, y, date):
    # m - number of examples
    # n - number of features
    m, n = X.shape

    # test set is 30%
    # train set is 70%
    ratio = 0.4
    X_test = X.loc[:np.floor(m*ratio)]
    X_train = X.loc[np.floor(m*ratio)+1:]

    y_test = y.loc[:np.floor(m*ratio)]
    y_train = y.loc[np.floor(m*ratio)+1:]

    date_test = date.loc[:np.floor(m*ratio)]
    date_train = date.loc[np.floor(m*ratio)+1:]

    # Create linear regression object
    lr = LinearRegression()

    # Train the model using the training sets
    lr.fit(X_train, y_train)

    # The coefficients
    print('Coefficients: \n', lr.coef_)
    # The mean square error
    print("Number of variables: %d; Residual sum of squares: %.2f; Variance score: %.2f"
          % (n, np.mean((lr.predict(X_test) - y_test) ** 2), lr.score(X_test, y_test)))
    plot_data(date_test, lr.predict(X_test), y_test, 'Historical chart', symbol)
    plot_data(date, lr.predict(X), y, 'Historical chart', symbol)
    return lr
    
def plot_data(x, predict, y, title, suptitle):
    plt.xticks(rotation=45)
    plt.plot_date(x, y, fmt='b-', xdate=True, ydate=False, label='Real value')
    if predict is not None:
      plt.plot_date(x, predict, fmt='r-', xdate=True, ydate=False, label='Predicted value')
    plt.legend(loc='upper center')
    plt.ylabel('Close prices')
    plt.title(title, y=1.1)
    plt.suptitle(suptitle, y=0.97)
    plt.grid()
    plt.show()


# Import data

In [None]:
symbol = 'FB'
data_source='yahoo'
start_date = '2019-11-01'
end_date = '2020-11-01'

df = data.DataReader(symbol, data_source, start_date, end_date)
df.reset_index(inplace=True,drop=False)

# Features and output variables

In [None]:
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df['Date2num'] = df['Date'].apply(lambda x: mdates.date2num(x))

X = df
y = pd.DataFrame (df['Close'], columns = ['Close'])

date = X.loc[:, ['Date']]

# Plot histotical data

In [None]:
plot_data(X['Date'], None, y, 'Historical chart', symbol)

# Train

In [None]:
features = ['Date2num']
X_input = X.loc[:, features]
lr = predict(X_input, y, date)

In [None]:
features = ['Date2num', 'Open']
X_input = X.loc[:, features]
lr = predict(X_input, y, date)

In [None]:
features = ['Date2num', 'Open', 'High']
X_input = X.loc[:, features]
lr = predict(X_input, y, date)

In [None]:
features = ['Date2num', 'Open', 'High', 'Low']
X_input = X.loc[:, features]
lr = predict(X_input, y, date)