<a href="https://colab.research.google.com/github/Valphai/StockAnalysis/blob/main/StockAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yahoofinancials
!pip install plotly



In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from yahoofinancials import YahooFinancials
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
import datetime

In [3]:
def month_delta(date, delta):
    m, y = (date.month+delta) % 12, date.year + ((date.month)+delta-1) // 12
    if not m: m = 12
    d = min(date.day, [31,
        29 if y%4==0 and (not y%100==0 or y%400 == 0) else 28,
        31,30,31,30,31,31,30,31,30,31][m-1])
    return date.replace(day=d,month=m, year=y)

def support_resistance_line(high_frame, low_frame, current):
    high, low = max(high_frame), min(low_frame)
    P = (high + low + current)/3
    R1 = (P*2)-low
    R2 = P+(high-low)
    S1=(P*2)-high
    S2 = P-(high-low)
    return max(S1,S2), min(R1,R2)

def moving_average(column, day=200):
    return column.rolling(day).mean()

In [4]:
PREDICTION_DAYS = 60
EPOCHS = 15
BATCH_SIZE = 32
MONTHS_FROM = -24
MONTHS_TO = -3

In [5]:
now = datetime.datetime.now().date()
ticker = "MSFT"
yf = YahooFinancials(ticker)

current_price = yf.get_stock_price_data()[ticker]["regularMarketPrice"]

history = yf.get_historical_price_data(str(month_delta(now, MONTHS_FROM)), 
                                       str(month_delta(now, MONTHS_TO)),
                                       "daily")[ticker]["prices"]

history_test = yf.get_historical_price_data(str(month_delta(now, MONTHS_TO)), 
                                       str(now), "daily")[ticker]["prices"]                            

In [6]:
from_history = lambda hist, string : [hist[i][str(string)] for i in range(len(hist))]

df = pd.DataFrame({"Date" : from_history(history, "formatted_date"),
                   "Open" : from_history(history, "open"),
                   "Close" : from_history(history, "close"),
                   "High" : from_history(history, "high"),
                   "Low" : from_history(history, "low")})
df["Day Average"] = [(df.High[i] + df.Low[i])/2 for i in range(df.shape[0])]
print(df)

df_test = pd.DataFrame({"Date" : from_history(history_test, "formatted_date"),
                        "Close" : from_history(history_test, "close")})
print(df_test.head())

           Date        Open       Close        High         Low  Day Average
0    2019-04-03  119.860001  119.970001  120.430000  119.150002   119.790001
1    2019-04-04  120.099998  119.360001  120.230003  118.379997   119.305000
2    2019-04-05  119.389999  119.889999  120.230003  119.370003   119.800003
3    2019-04-08  119.809998  119.930000  120.019997  118.639999   119.329998
4    2019-04-09  118.629997  119.279999  119.540001  118.580002   119.060001
..          ...         ...         ...         ...         ...          ...
437  2020-12-24  221.419998  222.750000  223.610001  221.199997   222.404999
438  2020-12-28  224.449997  224.960007  226.029999  223.020004   224.525002
439  2020-12-29  226.309998  224.149994  227.179993  223.580002   225.379997
440  2020-12-30  225.229996  221.679993  225.630005  221.470001   223.550003
441  2020-12-31  221.699997  222.419998  223.000000  219.679993   221.339996

[442 rows x 6 columns]
         Date       Close
0  2021-01-04  217.690002


In [7]:
SIX_MONTHS = -30*6
x = df.Date
ma_y = moving_average(df.Close)
s_y, r_y = support_resistance_line(df.High[SIX_MONTHS:], df.Low[SIX_MONTHS:], current_price)
supp_y = [s_y for _ in range(len(x[SIX_MONTHS:]))]

fig = go.Figure(data=[go.Candlestick(
                            x=x,
                            name=ticker,
                            open=df.Open,
                            high=df.High,
                            low=df.Low,
                            close=df.Close)])

fig.add_trace(go.Scatter(x=x, y=ma_y, 
                        mode='lines', marker = dict( color = '#E377C2'),
                        name='Moving average' ))

fig.add_trace(go.Scatter(x=x[SIX_MONTHS:], y=supp_y, 
                        mode='lines', marker = dict( color = 'blue'),
                        name='Support line' ))

fig.show()

In [8]:
# normalize data
scaler = MinMaxScaler(feature_range=(0,1))

fit_scaler = lambda x : scaler.fit_transform(x.values.reshape(-1,1))

normalized_open = fit_scaler(df["Open"])
normalized_close = fit_scaler(df["Close"])

normalized_test_close = fit_scaler(df_test["Close"])

In [9]:
def split_data(prediction_days):
  """
  Split data to predict 3 months in advance. This data comes from yahoo,
  which means its predicting already existing history.
  """
  end_point = len(normalized_close) - prediction_days
  inputs = np.array(normalized_close[:end_point])

  train_inputs = tf.expand_dims(inputs, -1)
  train_labels = np.array(normalized_close[prediction_days:])

  inputs = np.array(normalized_test_close[:])
  test_inputs = tf.expand_dims(inputs, -1)

  return train_inputs, train_labels, test_inputs

In [10]:
x_train, y_train, x_test = split_data(PREDICTION_DAYS)

In [35]:
def My_model():
    model = tf.keras.Sequential([
      tf.keras.layers.LSTM(256, return_sequences=True, input_shape=(x_train.shape[1],1)),
      tf.keras.layers.Dropout(0.25),
      tf.keras.layers.LSTM(512, return_sequences=True),
      tf.keras.layers.Dropout(0.25),
      tf.keras.layers.LSTM(128),
      tf.keras.layers.Dropout(0.25),
      tf.keras.layers.Dense(1),
    ])
    return model

model = My_model()
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 1, 256)            264192    
_________________________________________________________________
dropout_6 (Dropout)          (None, 1, 256)            0         
_________________________________________________________________
lstm_10 (LSTM)               (None, 1, 512)            1574912   
_________________________________________________________________
dropout_7 (Dropout)          (None, 1, 512)            0         
_________________________________________________________________
lstm_11 (LSTM)               (None, 128)               328192    
_________________________________________________________________
dropout_8 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                

In [36]:
model.compile(
  optimizer="adam",
  loss="mean_absolute_error"
)

In [37]:
model.fit(x=x_train,y=y_train,epochs=EPOCHS, batch_size=BATCH_SIZE)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7faff8f6dcd0>

In [38]:
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

In [39]:
predictions[:10]

array([[220.90479],
       [221.10828],
       [215.54636],
       [221.48553],
       [222.76509],
       [220.71072],
       [218.2063 ],
       [219.5903 ],
       [216.31438],
       [215.9457 ]], dtype=float32)

In [40]:
fig = go.Figure(data=go.Scatter(x=df["Date"], y=df["Close"],
                                name="Price before predictions"))
fig.add_trace(go.Scatter(x=df_test["Date"], y=df_test["Close"], 
                        mode="lines", name="Real values"))
fig.add_trace(go.Scatter(x=df_test["Date"], y=tf.squeeze(predictions), 
                        mode="lines", name="Predictions"))

fig.show()

In [41]:
def predict_future(days_into):
  predicted_prices = []
  input = np.array([normalized_test_close[-1]])
  input = tf.expand_dims(input, -1)

  for i in range(days_into):
    prediction = model.predict(input)

    prediction = tf.squeeze(prediction, 0)

    predicted_prices.append(prediction)

    input = prediction
    input = tf.expand_dims([input], -1)

  return predicted_prices

predictions = predict_future(5)
predictions = scaler.inverse_transform(predictions)

In [42]:
print(predictions)

[[242.64138869]
 [242.86864945]
 [243.04537919]
 [243.18249828]
 [243.28869263]]
