In [1]:
import pandas as pd
import os
import quandl
import numpy as np
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import requests



In [2]:
# config file
parent_path = os.path.abspath(os.path.pardir)
cfg = os.path.join(parent_path,'cfg','quandl_api_key')


In [3]:
# quandl config

with open(cfg, 'r') as file:
    api_key = file.readlines()[0]
    
quandl.ApiConfig.api_key = api_key

In [4]:
# load metadata 

metadata_file = 'FSE_metadata.csv' # as of Jan 9, 2019 NSE data is no longer a free data available on quandl   
metadata_path = os.path.join(parent_path, 'dat', metadata_file)

# get list of stocks from metadata

quandl_stocks = pd.read_csv(metadata_path, parse_dates=True)
quandl_stocks = quandl_stocks.loc[quandl_stocks['from_date'] < '2017-01-01', 'code']

total_stocks = quandl_stocks.count()

In [5]:
# visualize

print('Stocks in universe: {}'.format(total_stocks))
print(quandl_stocks.head())

Stocks in universe: 162
0    1COV_X
1     2HR_X
2     AAD_X
3     AB1_X
4     ADS_X
Name: code, dtype: object


In [6]:
# make codes compatible with Yahoo! finance

yahoo_stocks = quandl_stocks.apply(lambda x: x.split('_')[0])
yahoo_stocks = yahoo_stocks.apply(lambda x: x + '.DE' )
print(yahoo_stocks.head())

0    1COV.DE
1     2HR.DE
2     AAD.DE
3     AB1.DE
4     ADS.DE
Name: code, dtype: object


In [7]:
# convert yahoo_stocks to list

y_stock = list(yahoo_stocks.values)

In [None]:
# get p/e ratio from Yahoo! Finance

stock_list = list()
pe_list = list()

for stock in enumerate(y_stock):
    try:
        url = 'https://finance.yahoo.com/quote/{}/'.format(stock[1])
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        pe = soup.findAll(attrs={'data-test':'PE_RATIO-value'})[0].text
        stock_list.append(stock[1])
        pe_list.append(pe)
        
    except:
        continue

In [None]:
# clean P/E values list 

def _cleanPEValues(x):
    if x == 'N/A':
        return -999
    else:
        return float(''.join(x.split(',')))

pe = list(map(_cleanPEValues, pe_list))

In [None]:
# create dataframe

df_stocks_pe = pd.DataFrame( {'stocks':stock_list, 'pe':pe} )

In [None]:
# remove rows with improper values

rows_to_keep = df_stocks_pe.loc[ :, 'pe' ] != -999

df_stocks_pe = df_stocks_pe.loc[ rows_to_drop , : ] 
df_stocks_pe.sort_values(by='pe',inplace=True)

df_stocks_pe.head(10)

In [None]:
# short-listed candidates

df_stocks_pe[ df_stocks_pe.loc[:, 'pe'] < 10 ]
