# Stock Recommendation System

In [6]:
#importing libraries

import pandas as pd
from bs4 import BeautifulSoup as bs
import requests

In [26]:
# parsing data

def fundamental_metric(soup, metric):
    return soup.find(text= metric).find_next(class_='snapshot-td2').text

def get_fundamental_data(df):
    for symbol in df.index:
        
        try:
            url = ('https://finviz.com/quote.ashx?t=' + symbol.lower())
            soup = bs(requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}).content)
            
            for m in df.columns:
                df.loc[symbol,m] = fundamental_metric(soup,m)
        except Exception as e:
            print(symbol, 'not found')
            
    return df
            

In [27]:
#getting list of stocks from SP500

sp500_list = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

first_table = sp500_list[0]

In [28]:
first_table.head()

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M Company,reports,Industrials,Industrial Conglomerates,"St. Paul, Minnesota",1976-08-09,66740,1902
1,ABT,Abbott Laboratories,reports,Health Care,Health Care Equipment,"North Chicago, Illinois",1964-03-31,1800,1888
2,ABBV,AbbVie Inc.,reports,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
3,ABMD,ABIOMED Inc,reports,Health Care,Health Care Equipment,"Danvers, Massachusetts",2018-05-31,815094,1981
4,ACN,Accenture plc,reports,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [29]:
# extracting all lists of only information technology companies

df = first_table[first_table['GICS Sector'] == 'Information Technology']

#information on new dataset

print(df.shape)

#extracting information

tickers = df['Symbol'].values.tolist()


(71, 9)


### Stock Tickers

In [30]:
print(tickers)

['ACN', 'ADBE', 'AMD', 'AKAM', 'APH', 'ADI', 'ANSS', 'AAPL', 'AMAT', 'ANET', 'ADSK', 'ADP', 'AVGO', 'BR', 'CDNS', 'CDW', 'CSCO', 'CTXS', 'CTSH', 'GLW', 'DXC', 'FFIV', 'FIS', 'FISV', 'FLT', 'FLIR', 'FTNT', 'IT', 'GPN', 'HPE', 'HPQ', 'INTC', 'IBM', 'INTU', 'IPGP', 'JKHY', 'JNPR', 'KEYS', 'KLAC', 'LRCX', 'LDOS', 'MA', 'MXIM', 'MCHP', 'MU', 'MSFT', 'MSI', 'NTAP', 'NLOK', 'NVDA', 'ORCL', 'PAYX', 'PAYC', 'PYPL', 'QRVO', 'QCOM', 'CRM', 'STX', 'NOW', 'SWKS', 'SNPS', 'TEL', 'TXN', 'TYL', 'VRSN', 'V', 'WDC', 'WU', 'XRX', 'XLNX', 'ZBRA']


### Metrics for analysis

In [31]:
metric = ['P/B',
'P/E',
'Forward P/E',
'PEG',
'Debt/Eq',
'EPS (ttm)',
'Dividend %',
'ROE',
'ROI']

In [32]:
## Initializing Pandas DataFrame

data = pd.DataFrame(index=tickers, columns=metric)

data = get_fundamental_data(data)

data.head()

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI
ACN,9.54,31.29,29.63,4.09,0.0,7.68,1.33%,32.40%,33.90%
ADBE,22.0,65.59,44.66,4.2,0.38,7.59,-,35.10%,20.60%
AMD,27.99,156.92,47.66,4.38,0.21,0.5,-,21.50%,12.40%
AKAM,4.53,32.99,19.88,2.98,0.0,3.3,-,14.70%,9.00%
APH,6.78,30.2,27.44,10.07,0.79,3.59,0.92%,24.70%,15.60%


### Parsing strings to data numeric

In [34]:
data['Dividend %'] = data['Dividend %'].str.replace('%', '')
data['ROE'] = data['ROE'].str.replace('%','')
data['ROI'] = data['ROI'].str.replace('%','')

data = data.apply(pd.to_numeric, errors='coerce')
data

Unnamed: 0,P/B,P/E,Forward P/E,PEG,Debt/Eq,EPS (ttm),Dividend %,ROE,ROI
ACN,9.54,31.29,29.63,4.09,0.00,7.68,1.33,32.4,33.9
ADBE,22.00,65.59,44.66,4.20,0.38,7.59,,35.1,20.6
AMD,27.99,156.92,47.66,4.38,0.21,0.50,,21.5,12.4
AKAM,4.53,32.99,19.88,2.98,0.00,3.30,,14.7,9.0
APH,6.78,30.20,27.44,10.07,0.79,3.59,0.92,24.7,15.6
ADI,3.63,38.84,20.92,4.60,0.47,2.99,2.14,9.5,9.2
ANSS,8.18,71.54,47.50,10.08,0.12,4.59,,12.1,11.2
AAPL,27.58,35.13,29.88,2.82,1.57,3.29,0.71,70.7,26.9
AMAT,5.47,16.61,12.46,0.83,0.57,3.44,1.54,35.9,20.4
ANET,5.15,21.97,21.36,3.26,0.00,9.39,,26.3,27.8


### To CSV

In [36]:
data.to_csv(r'C:\Users\Administrator\Desktop\DATA\Python Test Data\stock_recomm.csv')