In [1]:
# Library

import pandas as pd
import os
import numpy as np
from bs4 import BeautifulSoup
import requests

In [2]:
# load metadata 

parent_path = os.path.abspath(os.path.pardir)
metadata_file = 'PEA_PME_List_20181227.csv' # PEA/PME list from euronext as of 20181227
metadata_path = os.path.join(parent_path, 'dat', metadata_file)

# get list of stocks from metadata

universe = pd.read_csv(metadata_path, usecols=['Code','Delisted','Country of Incorporation'])

# eliminate delisted stocks

universe = universe[ universe.Delisted == 'No' ]
universe.drop(axis=1, columns='Delisted',inplace=True)

# eliminate non-french stocks

universe = universe[universe['Country of Incorporation'] == 'France' ]
universe.drop(axis=1, columns='Country of Incorporation', inplace=True)

# eliminate null values and NaNs

universe = universe[~universe['Code'].isna()]
universe = universe[~universe['Code'].isnull()]

total_stocks = universe.Code.count()
print('Size of universe: {}'.format(total_stocks))

Size of universe: 338


In [3]:
# visualize

print(universe.head())

    Code
0  ALMIL
1  2CRSI
3     AB
4   ABCA
5   ABEO


In [4]:
# make codes compatible with Yahoo! finance

yahoo_stocks = universe.apply(lambda x: x + '.PA' )
print(yahoo_stocks.head())

       Code
0  ALMIL.PA
1  2CRSI.PA
3     AB.PA
4   ABCA.PA
5   ABEO.PA


In [5]:
# convert to flat list

y_stock = [stk[0] for stk in yahoo_stocks.values]

In [15]:
# get p/e ratio from Yahoo! Finance

stock_list = list()
pe_list = list()
eps_list = list()
open_value_list = list()
mcap_list = list()

for stock in enumerate(y_stock):
    try:
        url = 'https://finance.yahoo.com/quote/{}/'.format(stock[1])
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        pe = soup.findAll(attrs={'data-test':'PE_RATIO-value'})[0].text
        eps = soup.findAll(attrs={'data-test':'EPS_RATIO-value'})[0].text
        open_value = soup.findAll(attrs={'data-test':'OPEN-value'})[0].text
        mcap = soup.findAll(attrs={'data-test':'MARKET_CAP-value'})[0].text
        
    except:
        continue
        
    else:
        stock_list.append(stock[1])
        pe_list.append(pe)
        eps_list.append(eps)
        open_value_list.append(open_value)
        mcap_list.append(mcap)

In [33]:
# clean P/E values list 

def _clean_list_values(x):
    if x == 'N/A':
        return -999
    else:
        return float(''.join(x.split(',')))

pe = list(map(_cleanListValues, pe_list))
eps = list(map(_cleanListValues, eps_list))
open_value = list(map(_clean_list_values, open_value_list))

In [42]:
def _clean_MCap_values(x):
    if x == 'N/A':
        final_value = -999
        return final_value
    
    else:
        comma_stripped = ''.join(x.split(','))
        
        if comma_stripped[-1] == 'B':
            final_value = float(comma_stripped[:-1]) * 10**9
            
        elif comma_stripped[-1] == 'M': 
            final_value = float(comma_stripped[:-1]) * 10**6
            
        else:
            final_value = float(comma_stripped)
            
        return final_value


mcap = list(map(_clean_MCap_values, mcap_list))

In [75]:
# create dictionary

data_dictionary = {
                     'stock_list': stock_list, 
                     'mcap':mcap,
                     'pe':pe,
                     'eps':eps,
                     'open_value':open_value
                  }

In [53]:
# create dataframe

df_data = pd.DataFrame( data_dictionary )
df_data['pe_calculated'] = df_data['open_value'] / df_data['eps']
df_data.head()

Unnamed: 0,stock_list,mcap,pe,eps,open_value,pe_calculated
0,ALMIL.PA,72416000.0,18.72,1.47,27.7,18.843537
1,2CRSI.PA,127051000.0,48.48,0.18,8.84,49.111111
2,AB.PA,158919000.0,-999.0,-0.68,3.94,-5.794118
3,ABCA.PA,357339000.0,21.99,0.28,6.24,22.285714
4,ABEO.PA,232189000.0,28.27,1.09,30.8,28.256881


In [62]:
# cleaned dataframe

df_cleaned_data = df_data[df_data['pe_calculated'].gt(1.0)]
df_cleaned_data.head()

Unnamed: 0,stock_list,mcap,pe,eps,open_value,pe_calculated
0,ALMIL.PA,72416000.0,18.72,1.47,27.7,18.843537
1,2CRSI.PA,127051000.0,48.48,0.18,8.84,49.111111
3,ABCA.PA,357339000.0,21.99,0.28,6.24,22.285714
4,ABEO.PA,232189000.0,28.27,1.09,30.8,28.256881
7,ATI.PA,67178000.0,8.36,0.4,3.37,8.425


In [69]:
# sort based on pe ratio 

df_value_list = df_cleaned_data.sort_values('pe_calculated',ascending=True).head(50)
df_value_list.head(10)

Unnamed: 0,stock_list,mcap,pe,eps,open_value,pe_calculated
13,ALADM.PA,5211000.0,1.24,0.69,0.85,1.231884
54,MLONE.PA,845824.0,1.44,0.156,0.224,1.435897
267,PSB.PA,122112000.0,1.92,17.49,33.7,1.926815
163,IDIP.PA,281045000.0,2.16,18.12,39.5,2.179912
42,BUI.PA,4716000.0,2.27,3.08,7.0,2.272727
105,ALNN6.PA,27986000.0,4.64,0.73,3.3705,4.617123
258,PVL.PA,207971000.0,4.54,2.08,9.67,4.649038
225,ALMOU.PA,15918000.0,5.19,1.01,5.14,5.089109
159,HOP.PA,17207000.0,5.6,1.18,6.6,5.59322
97,ALDLS.PA,43841000.0,5.56,3.11,17.75,5.707395


In [71]:
# sort based on market cap

df_cap_list = df_value_list.sort_values('mcap',ascending=True)
df_cap_list.head(10)

Unnamed: 0,stock_list,mcap,pe,eps,open_value,pe_calculated
54,MLONE.PA,845824.0,1.44,0.156,0.224,1.435897
42,BUI.PA,4716000.0,2.27,3.08,7.0,2.272727
13,ALADM.PA,5211000.0,1.24,0.69,0.85,1.231884
320,MLVER.PA,5510000.0,8.15,1.52,12.4,8.157895
81,COUR.PA,9150000.0,11.96,10.54,126.0,11.954459
143,ALGEP.PA,11707000.0,9.1,0.195,1.72,8.820513
76,ALCOF.PA,12376000.0,6.97,45.9,322.0,7.015251
263,ALPRI.PA,12399000.0,5.77,1.64,9.46,5.768293
285,GIRO.PA,13574000.0,11.28,1.12,12.65,11.294643
256,ALPLA.PA,14389000.0,9.49,0.27,2.58,9.555556


In [74]:
# short listed stocks as of 17 Jan 2019

df_short_listed_stocks = df_cap_list.head(15)
print(df_short_listed_stocks)

    stock_list        mcap     pe     eps  open_value  pe_calculated
54    MLONE.PA    845824.0   1.44   0.156       0.224       1.435897
42      BUI.PA   4716000.0   2.27   3.080       7.000       2.272727
13    ALADM.PA   5211000.0   1.24   0.690       0.850       1.231884
320   MLVER.PA   5510000.0   8.15   1.520      12.400       8.157895
81     COUR.PA   9150000.0  11.96  10.540     126.000      11.954459
143   ALGEP.PA  11707000.0   9.10   0.195       1.720       8.820513
76    ALCOF.PA  12376000.0   6.97  45.900     322.000       7.015251
263   ALPRI.PA  12399000.0   5.77   1.640       9.460       5.768293
285    GIRO.PA  13574000.0  11.28   1.120      12.650      11.294643
256   ALPLA.PA  14389000.0   9.49   0.270       2.580       9.555556
225   ALMOU.PA  15918000.0   5.19   1.010       5.140       5.089109
159     HOP.PA  17207000.0   5.60   1.180       6.600       5.593220
175     ITE.PA  22110000.0   7.16   0.530       3.800       7.169811
297   ALSAS.PA  24078000.0   9.37 