<a href="https://colab.research.google.com/github/gdabrow/Investments/blob/main/stockAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Information on companies listed on stock exchanges obtained from:**
* https://seekingalpha.com/
* https://www.zacks.com/
* https://finance.yahoo.com/

In [None]:
# installation libraries
! pip install yfinance --upgrade --no-cache-dir &> /dev/null
! pip install beautifulsoup4 &> /dev/null
! pip install cloudscraper &> /dev/null

In [None]:
# import libraries
import yfinance as yf
import pandas as pd
import cloudscraper
import urllib.parse
import plotly.graph_objects as go
import plotly.express as px
import random

scraper = cloudscraper.create_scraper()

# disk connection
from google.colab import drive
drive.mount('/content/drive')

**List with the data we are interested in**

In [None]:
# columns = ['symbol','currentPrice','recommendationKey', 'recommendationMean', 'targetLowPrice', 'targetLowPrice [%]',
#          'targetMeanPrice', 'targetMeanPrice [%]', 'targetMedianPrice', 'targetMedianPrice [%]', 'targetHighPrice', 'targetHighPrice [%]',
#          'P/E', 'Forward P/E', 'EPS past 5Y', 'EPS (ttm)', 'forwardEps', 'EPS next Q', 'EPS next Y', 'EPS next 5Y', 'Profit Margin',
#           'ROA', 'ROE', 'ROI', 'currentRatio', 'quickRatio']

columns = ['symbol','currentPrice','recommendationKey', 'recommendationMean', 'targetLowPrice [%]',
           'targetMeanPrice [%]', 'targetMedianPrice [%]', 'targetHighPrice [%]', 'numberOfAnalystOpinions',
           'P/E', 'EPS next Y', 'EPS next 5Y', 'Profit Margin',
           'ROA', 'ROE', 'ROI', 'currentRatio', 'quickRatio', 'Insider Trans', 'Inst Trans']

**Lists with tickers we are interested in**

In [None]:
finansowa_edu = [ 'NFLX','MSFT','AAPL', 'NVDA','V','JNJ','AZN','SAP', 'RGEN','PG','AMT','GOOGL','AMZN']
Trela = ['TSM','INTU','NOW','PTC','ALGN','AMZN','QCOM', 'MA','ASML','MPWR','NVDA', 'AMD']
Kupione = ['EXAS', 'META', 'NIO', 'PDD', 'CRWD', 'MKSI', 'MDT', 'TSM', 'NFLX', 'MSFT', 'AAPL', 'V', 'BABA', 'TSM']
DNA = ['ROKU', 'TWLO', 'SE', 'DIS', 'UUUU', 'DNN']

**Tickers from seekingalpha if we want**

In [None]:
# data from a file on disk
name='TopHealthcare'
seekingalpha = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/'+name+'.xlsx')
alpha = list(seekingalpha.Symbol)

**Selection of a list of tickers for analysis**

In [None]:
Tickers = DNA

**Downloading data from finviz and finance.yahoo**

In [None]:
asset = pd.DataFrame(columns=columns)

for tic in Tickers: 

  url = 'https://finviz.com/quote.ashx?t=' + tic
  page = scraper.get(url)
  page_html = BeautifulSoup(page.content, 'html.parser')
  table = page_html.find_all('tr', class_='table-dark-row')
  
  if table == []:
    continue

  names = []
  value = []

  # Downloading data from finviz (website source)
  for row in table:
    for name in row.find_all('td', class_='snapshot-td2-cp'):
      names.append(name.text)

    for name in row.find_all('td', class_='snapshot-td2'):
      if name.text == '-':
        v = 0
      else:
        v = name.text
      value.append(v)

  # Data connection in dictionary
  finviz = dict(zip(names, value))

  # Downloading data from finance.yahoo API
  for _ in range(10):
    try:
      #data = yf.Ticker(tic).info
      data = yf.Ticker.basic_info(tic)
      break
    except:
      continue

  # Data connection finance.yahoo + finviz
  data.update(finviz)

# Conversion to percentages
  for_percentage = {'targetLowPrice [%]':'targetLowPrice', 'targetMeanPrice [%]':'targetMeanPrice',
                    'targetMedianPrice [%]': 'targetMedianPrice', 'targetHighPrice [%]':'targetHighPrice'}

  for col in for_percentage:
    try:
      data[col] = round(100 * float(data[for_percentage[col]])/float(data['currentPrice'])-100)
    except:
      data[col] = '-'

  row={}
  for col in columns:

    try:
      row[col] = data[col]
    except:
      row[col] = '-'

  asset = asset.append(row, ignore_index=True)


# Columns that need to be reformatted to number
colum = ['EPS next Y', 'EPS next 5Y', 'Profit Margin', 'ROA', 'ROE', 'ROI', 'Insider Trans', 'Inst Trans']

for col in colum:
  asset[col] = asset[col].str.replace("%", "").astype(float)

asset.set_index('symbol', inplace=True)

**Downloading data from Zacks**

In [None]:
# In case Zacks realizes the page is scrapped you can get help from scrapeops.io

API_KEY = 'xxxxx'

def get_scrapeops_url(url):
  payload = {'api_key': API_KEY, 'url': url, 'bypass': 'cloudflare'}
  proxy_url = 'https://proxy.scrapeops.io/v1/?' + urllib.parse.urlencode(payload)
  return proxy_url

#Fake user agent for scraper
user_agents_list = ['Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148',
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36',
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36']

zacks = []

for tic in Tickers:
  url = 'https://www.zacks.com/stock/quote/' + tic
  try:
    page = scraper.get(url, headers={'User-Agent': random.choice(user_agents_list)})
    #page = scraper.get(get_scrapeops_url(url))
  except:
    zacks.append(0)
    continue

  page_html = BeautifulSoup(page.content, 'html.parser')
  table = page_html.find_all('p', class_="rank_view")
  try:
    zac = list(table[0].get_text().replace(" ", ""))[1]
  except IndexError:
    zacks.append(0)
    continue

  zacks.append(zac)

**Table formatting for future connection (if you use seekingalpha)**

In [None]:
seekingalpha['zacks'] = zacks
seekingalpha.drop(columns='Rank', inplace=True)
seekingalpha.set_index('Symbol', inplace=True)
asset.rename_axis("Symbol", inplace=True)

#Table joins
asset = pd.merge(left=seekingalpha, right=asset, on='Symbol')

**Preparation of a table for scoring purposes**

In [None]:
scoring = pd.DataFrame()

In [None]:
# Changing the ranking to the "more the better" and rescaling it to values between 0 and 1
# Check if the column is an object, if so, change "nan" to 100 and change to float
if asset.recommendationMean.dtypes == 'O':
  scoring['recommendationMean'] = asset['recommendationMean'].str.replace(",", ".")
  scoring['recommendationMean'] = scoring['recommendationMean'].str.replace("-", "0")
  scoring['recommendationMean'] = scoring.recommendationMean.fillna(100).astype('float')
  scoring['recommendationMean'] = (5/scoring.recommendationMean)/5
else:
  scoring['recommendationMean'] = (5/asset.recommendationMean)/5


# Convert str to float and rescaling to values between 0 and 1

if asset['Quant'].dtypes == 'O':
  scoring['Quant'] = asset.Quant.str.replace(",", ".").astype('float')
  scoring['Quant'] = scoring['Quant']/5
else:
  scoring['Quant'] = asset['Quant']/5


if asset['SA Authors'].dtypes == 'O':
  scoring['SA Authors'] = asset['SA Authors'].str.replace(",", ".")
  scoring['SA Authors'] = scoring['SA Authors'].str.replace("-", "0")
  scoring['SA Authors'] = scoring['SA Authors'].astype('float')/5
else:
  scoring['SA Authors'] = asset['SA Authors'].astype('float')/5


if asset['Wall St.'].dtypes == 'O':
  scoring['Wall St.'] = asset['Wall St.'].str.replace(",", ".")
  scoring['Wall St.'] = scoring['Wall St.'].str.replace("-", "0")
  scoring['Wall St.'] = scoring['Wall St.'].astype('float')/5
else:
  scoring['Wall St.'] = asset['Wall St.'].astype('float')/5

In [None]:
def conversion(x, thresholds):

  """
    Converts the value of x to a value between (0,1) based on a thresholds array.

    Args:
      x (float) - value to converts
      thresholds - (length 3 array of int) - thresholds for conversion

    Returns:
      value between (0,1)
  """

  if pd.isna(x):
    return 0
  elif x <= 0:
    return 0
  elif 0 < x <= thresholds[0]:
    return .25
  elif thresholds[0]<x<=thresholds[1]:
    return .5
  elif thresholds[1]<x<=thresholds[2]:
    return .75
  else:
    return 1

In [None]:
# Dictionary with thresholds to convert the corresponding columns
dictForConv = {'targetLowPrice [%]':[5,10,20],
                 'targetMeanPrice [%]':[20,40,60],
                 'targetMedianPrice [%]':[20,40,60],
                 'targetHighPrice [%]':[40,60,80],
                 'numberOfAnalystOpinions':[3,10,15],
                 'EPS next Y':[5,10,15],
                 'EPS next 5Y':[5,15,25],
                 'Profit Margin':[10,20,30],
                 'ROA':[10,20,30],
                 'ROE':[15,30,45],
                 'ROI':[15,30,40],
                 'currentRatio':[1,3,5],
                 'quickRatio':[1,3,5],
                 'Insider Trans':[5,10,20],
                 'Inst Trans':[5,10,20]}

In [None]:
# Column conversion
for col in dictForConv:
  if asset[col].dtypes == 'O':
    scoring[col] = asset[col].str.replace(",", ".")
    scoring[col] = scoring[col].str.replace("-", "0")
    scoring[col] = scoring[col].fillna('0').astype('float')
  else:
    scoring[col] = asset[col].apply(lambda x: conversion(x,dictForConv[col]))

In [None]:
# Dictionaries to convert the remaining columns
seekingalphaMap = {'A+':1, 'A': .9, 'A-': .8, 'B+': .7, 'B': .6, 'B-': .5, 'C+': .4, 'C-': .3, 'D+': .2, 'D': .1, 'D-': 0, 'F': 0, '-':0}
colToConvert = ['Valuation',	'Growth',	'Profitability', 'Momentum']

# conversion
for col in colToConvert:
  scoring[col] = asset[col].map(seekingalphaMap)

# Dictionery to convert 'zacks' column
#zacksMap = {'1':1, '2':.75, '3':.5, '4':.25, '5':0}
zacksMap = {1:1, 2:.75, 3:.5, 4:.25, 5:0, 0:0}

# conversion
scoring['zacks'] = asset['zacks'].map(zacksMap)

In [None]:
weightDict = {'recommendationMean':1,
           'Quant':1.2,
           'SA Authors':1,
           'Wall St.':1.2,
           'targetLowPrice [%]':1,
           'targetMeanPrice [%]':1.2,
           'targetMedianPrice [%]':1.2,
           'targetHighPrice [%]':1,
           'numberOfAnalystOpinions':1,
           'EPS next Y':1.2,
           'EPS next 5Y':1,
           'Profit Margin':1,
           'ROA':1,
           'ROE':1,
           'ROI':1,
           'currentRatio':1,
           'quickRatio':1,
           'Insider Trans':1,
           'Inst Trans':1,
           'Valuation':1,
           'Growth':1,
           'Profitability':1,
           'Momentum':1,
           'zacks':1.2}

weight = list(weightDict.values())
  

**Weighing and adding scoring**

In [None]:
scoring = scoring * weight
scoring = scoring.sum(axis=1)
asset['scoring']= scoring

**Saving data to disk**

In [None]:
name='TopHealthcare'
asset.to_excel('/content/drive/MyDrive/Colab Notebooks/output_'+ name +'.xlsx')

**Data visualization**

In [None]:
def chart(symbol):

  tic = yf.Ticker(symbol)
  data = tic.history(period = "ytd")
  data['targetLowPrice'] = asset.loc[symbol, 'targetLowPrice']
  data['targetMeanPrice'] = asset.loc[symbol, 'targetMeanPrice']
  data['targetHighPrice'] = asset.loc[symbol, 'targetHighPrice']

  fig = go.Figure()

  fig.add_trace(go.Scatter(x=data.index, y=data.Close,
                             mode='lines',
                             name='Close'))
  
  fig.add_trace(go.Scatter(x=data.index, y=data['targetLowPrice'],
                             mode='lines',
                             name=asset.loc[symbol, 'targetLowPrice [%]']))
  
  fig.add_trace(go.Scatter(x=data.index, y=data['targetMeanPrice'],
                           mode='lines',
                           name=asset.loc[symbol, 'targetMeanPrice [%]']))

  fig.add_trace(go.Scatter(x=data.index, y=data['targetHighPrice'],
                           mode='lines',
                           name=asset.loc[symbol, 'targetHighPrice [%]']))
  fig.show()

In [None]:
# chart titles
title = ['Recommendation mean', 'Target low price [%]',
           'Target mean price [%]', 'Target median price [%]', 'Target high price [%]',
           'P/E', 'EPS next Y', 'EPS next 5Y', 'Profit Margin',
           'ROA', 'ROE', 'ROI', 'Current ratio', 'Quick ratio']

col_tite = dict(zip(columns[3:], title))

for statistics in col_tite:
  fig = px.bar(asset, x=asset.index, y=statistics, title=col_tite[statistics])
  fig.show()