In [3]:
# We are going to build a stock prediction that gets data from Yahoo Finance and predicts the stock price for the next day. We are going to use the LSTM model to predict the stock price.
# Also we will get the technical indicators of the stock and use them as features to predict the stock price.
# We will also get the news of the company and use it as a feature to predict the stock price.
# We will also do sentiment analysis on the news to get the sentiment of the news and use it as a feature to predict the stock price.
# Then do a time series analysis on the stock price to predict the stock price for the next day.
# We will use the stock of Apple Inc. (AAPL) as an example.

# Import the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader as web
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
import yfinance as yf
import requests
from bs4 import BeautifulSoup
from sklearn.preprocessing import MinMaxScaler

# Get the stock data
# Get the stock data of Apple Inc. (AAPL) from Yahoo Finance
start = dt.datetime(2012, 1, 1)
end = dt.datetime(2024, 1, 1)

data = yf.download('AAPL', start=start, end=end)

# Show the data
print(data)

[*********************100%%**********************]  1 of 1 completed

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2012-01-03   14.621429   14.732143   14.607143   14.686786   12.416984   
2012-01-04   14.642857   14.810000   14.617143   14.765714   12.483714   
2012-01-05   14.819643   14.948214   14.738214   14.929643   12.622306   
2012-01-06   14.991786   15.098214   14.972143   15.085714   12.754258   
2012-01-09   15.196429   15.276786   15.048214   15.061786   12.734028   
...                ...         ...         ...         ...         ...   
2023-12-22  195.179993  195.410004  192.970001  193.600006  193.091385   
2023-12-26  193.610001  193.889999  192.830002  193.050003  192.542816   
2023-12-27  192.490005  193.500000  191.089996  193.149994  192.642548   
2023-12-28  194.139999  194.660004  193.169998  193.580002  193.071426   
2023-12-29  193.899994  194.399994  191.729996  192.529999  192.024185   

               Volume  
Date         




In [4]:
# Next we are going to get the technical indicators of the stock and use them as features to predict the stock price.
# We are going to get the following technical indicators:
# 1. Moving Average Convergence Divergence (MACD)
# 2. Relative Strength Index (RSI)
# 3. Stochastic Oscillator
# 4. Commodity Channel Index (CCI)
# 5. Average True Range (ATR)
# 6. Bollinger Bands

# Calculate the technical indicators
# Calculate the Moving Average Convergence Divergence (MACD)
data['EMA12'] = data['Close'].ewm(span=12, adjust=False).mean()
data['EMA26'] = data['Close'].ewm(span=26, adjust=False).mean()
data['MACD'] = data['EMA12'] - data['EMA26']
data['Signal Line'] = data['MACD'].ewm(span=9, adjust=False).mean()

# Calculate the Relative Strength Index (RSI)
delta = data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
data['RSI'] = 100 - (100 / (1 + rs))

# Calculate the Stochastic Oscillator
data['L14'] = data['Low'].rolling(window=14).min()
data['H14'] = data['High'].rolling(window=14).max()
data['%K'] = 100 * (data['Close'] - data['L14']) / (data['H14'] - data['L14'])
data['%D'] = data['%K'].rolling(window=3).mean()

# Calculate the Commodity Channel Index (CCI)
TP = (data['High'] + data['Low'] + data['Close']) / 3
data['SMA20'] = TP.rolling(window=20).mean()
data['CCI'] = (TP - data['SMA20']) / (0.015 * TP.rolling(window=20).std())

# Calculate the Average True Range (ATR)
data['H-L'] = abs(data['High'] - data['Low'])
data['H-PC'] = abs(data['High'] - data['Close'].shift(1))
data['L-PC'] = abs(data['Low'] - data['Close'].shift(1))
data['TR'] = data[['H-L', 'H-PC', 'L-PC']].max(axis=1)
data['ATR'] = data['TR'].rolling(window=14).mean()

# Calculate the Bollinger Bands
data['SMA20'] = data['Close'].rolling(window=20).mean()
data['STD20'] = data['Close'].rolling(window=20).std()
data['Upper Band'] = data['SMA20'] + (data['STD20'] * 2)
data['Lower Band'] = data['SMA20'] - (data['STD20'] * 2)

# Show the data
print(data)

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2012-01-03   14.621429   14.732143   14.607143   14.686786   12.416984   
2012-01-04   14.642857   14.810000   14.617143   14.765714   12.483714   
2012-01-05   14.819643   14.948214   14.738214   14.929643   12.622306   
2012-01-06   14.991786   15.098214   14.972143   15.085714   12.754258   
2012-01-09   15.196429   15.276786   15.048214   15.061786   12.734028   
...                ...         ...         ...         ...         ...   
2023-12-22  195.179993  195.410004  192.970001  193.600006  193.091385   
2023-12-26  193.610001  193.889999  192.830002  193.050003  192.542816   
2023-12-27  192.490005  193.500000  191.089996  193.149994  192.642548   
2023-12-28  194.139999  194.660004  193.169998  193.580002  193.071426   
2023-12-29  193.899994  194.399994  191.729996  192.529999  192.024185   

               Volume       EMA12    

In [6]:
# Next we are going to get the news of the company and use it as a feature to predict the stock price.
# We are going to get the news of Apple Inc. (AAPL) from Yahoo Finance.

# Get the news of the company from investing.com
url = 'https://www.investing.com/equities/apple-computer-inc-news'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
news = soup.find_all('article', class_='js-article-item articleItem')

# Get the news of the company
news_list = []
for article in news:
    title = article.find('a', class_='title').text
    date = article.find('span', class_='date').text
    news_list.append([date, title])

# Convert the news list to a DataFrame
news_df = pd.DataFrame(news_list, columns=['Date', 'Title'])

In [10]:
!pip install htmldate
!pip install twython
!pip3 install newspaper3k
     
import pandas as pd
import numpy as np
import time
import twython
import requests
import nltk
import warnings
warnings.filterwarnings('ignore')

from htmldate import find_date
from tqdm import tqdm
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.downloader.download('vader_lexicon')
from newspaper import Article




[notice] A new release of pip is available: 23.3.1 -> 24.1.1
[notice] To update, run: C:\Users\My-PC\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.3.1 -> 24.1.1
[notice] To update, run: C:\Users\My-PC\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip




[nltk_data] Downloading package vader_lexicon to C:\Users\My-
[nltk_data]     PC\AppData\Roaming\nltk_data...
[notice] A new release of pip is available: 23.3.1 -> 24.1.1
[notice] To update, run: C:\Users\My-PC\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip

[nltk_data]   Package vader_lexicon is already up-to-date!


In [11]:
# Set up Selenium

!pip install selenium
!apt-get update 
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')

from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

Collecting selenium
  Downloading selenium-4.22.0-py3-none-any.whl.metadata (7.0 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.25.1-py3-none-any.whl.metadata (8.7 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting typing_extensions>=4.9.0 (from selenium)
  Downloading typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting websocket-client>=1.8.0 (from selenium)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Downloading attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto

ERROR: Exception:
Traceback (most recent call last):
  File "C:\Users\My-PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pip\_vendor\urllib3\response.py", line 438, in _error_catcher
    yield
  File "C:\Users\My-PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pip\_vendor\urllib3\response.py", line 561, in read
    data = self._fp_read(amt) if not fp_closed else b""
  File "C:\Users\My-PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pip\_vendor\urllib3\response.py", line 527, in _fp_read
    return self._fp.read(amt) if amt is not None else self._fp.read()
  File "C:\Users\My-PC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pip\_vendor\cachecontrol\filewrapper.py", line 98, in

ModuleNotFoundError: No module named 'selenium'