In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import yfinance as yf
from datetime import datetime
import time

In [2]:
REQ = requests.get('https://globefunder.com/revolut-stocks-list/') 

In [3]:
soup = BeautifulSoup(REQ.content,'lxml')
table = soup.find('table')
str_tab = str(table)
df = pd.read_html(str_tab)[0].drop(columns=['#'])
df.to_csv('Revolut_stocks_'+datetime.now().strftime('%Y-%m-%d')+'.csv')
df

Unnamed: 0,Company name,Symbol,Stock price (USD),Sector,Industry,Market
0,Agilent Technologies,A,$154.75,Health Technology,Medical Specialties,NYSE
1,Alcoa,AA,$57.11,Non-Energy Minerals,Aluminum,NYSE
2,American Airlines,AAL,$20.02,Transportation,Airlines,NASDAQ
3,Advance Auto Parts,AAP,$220.99,Retail Trade,Specialty Stores,NYSE
4,Apple,AAPL,$144.52,Electronic Technology,Telecommunications Equipment,NASDAQ
...,...,...,...,...,...,...
941,Zynga,ZNGA,$7.37,Technology Services,Internet Software/Services,NASDAQ
942,China Southern Airlines,ZNH,$30.79,Transportation,Airlines,NYSE
943,Zscaler,ZS,$298.59,Technology Services,Packaged Software,NASDAQ
944,ZTO Express,ZTO,$31.02,Transportation,Air Freight/Couriers,NYSE


In [4]:
y_df = df.copy()
y_df

Unnamed: 0,Company name,Symbol,Stock price (USD),Sector,Industry,Market
0,Agilent Technologies,A,$154.75,Health Technology,Medical Specialties,NYSE
1,Alcoa,AA,$57.11,Non-Energy Minerals,Aluminum,NYSE
2,American Airlines,AAL,$20.02,Transportation,Airlines,NASDAQ
3,Advance Auto Parts,AAP,$220.99,Retail Trade,Specialty Stores,NYSE
4,Apple,AAPL,$144.52,Electronic Technology,Telecommunications Equipment,NASDAQ
...,...,...,...,...,...,...
941,Zynga,ZNGA,$7.37,Technology Services,Internet Software/Services,NASDAQ
942,China Southern Airlines,ZNH,$30.79,Transportation,Airlines,NYSE
943,Zscaler,ZS,$298.59,Technology Services,Packaged Software,NASDAQ
944,ZTO Express,ZTO,$31.02,Transportation,Air Freight/Couriers,NYSE


In [5]:
def hours_minutes_seconds(seconds):
    '''
    takes in a value for seconds and returns a string showing length of time in hours, minutes and seconds. 
    '''
    seconds = round(seconds)

    if seconds > 3600:
        hours = round(np.floor(seconds/3600))
        left_over_seconds = (seconds-hours*3600)
        minutes = round(np.floor(left_over_seconds/60))
        seconds = round(left_over_seconds%60)
        return str(hours)+' hours '+str(minutes)+' minutes '+str(seconds)+' seconds.'
        
    elif seconds>60:
        minutes = round(np.floor(seconds/60))
        seconds = round(seconds%60) 
        return str(minutes)+' minutes '+str(seconds)+' seconds.'
    
    else: return str(seconds)+' seconds.'


In [6]:
ld = []
timings_arr = []
for i in range(df.shape[0]):
    if i%115 == 0 or i == 0:
        print_condition = True
        print('Grabbing data for:',df.loc[i,'Company name'])
    else: print_condition = False
    
    start_time = time.time()
    try:
        ticker = df.Symbol[i]
        stock = yf.Ticker(ticker)
        info = stock.info
        enviro = stock.sustainability
        info_of_interest = ['profitMargins','fullTimeEmployees','trailingEps','lastDividendDate','dividendRate','trailingPE','marketCap','dividendYield','quickRatio','payoutRatio','shortPercentOfFloat','bookValue']
        enviromental_data_of_interest = ['environmentScore','palmOil','furLeather','catholic','animalTesting','highestControversy']
        
        try:
            upcoming_earnings_date = stock.calendar.T['Earnings Date'].values
        except:
            upcoming_earnings_date = np.nan
        
        d = {'Symbol':ticker,'nextEarningsDate':upcoming_earnings_date}

        #stats
        for col in info_of_interest:
            try:
                d[col] = info[col]
            except:
                d[col] = np.nan
        
        #sustainability
        for col in enviromental_data_of_interest:
            try:
                d[col] = enviro.Value[col]
            except:
                d[col] = np.nan

        #last quarter earnings and revenue
        try:
            d.update(stock.quarterly_earnings.iloc[-1].rename({'Revenue':'lastQuarterRevenue','Earnings':'lastQuarterEarnings'}).to_dict())
        except:
            d.update({'lastQuarterRevenue':np.nan,'lastQuarterEarnings':np.nan})

        ld.append(d) #add dictionaries to list of dictionaries
        end_time = time.time()
        time_taken = np.round(end_time - start_time,2)
        timings_arr.append(time_taken)
        average_time = np.round(np.mean(timings_arr),2)
        
        if print_condition:
            print('Data successfully added in: ',(time_taken),'seconds!',
            '\nAverage time per stock: ',average_time,
            '\nCompleted: ',str(i+1),'/',str(df.shape[0]),
            '\nEstimated time remaining: ',hours_minutes_seconds((df.shape[0]-i)*average_time),
            '\n_____________________________________________________\n')

    except:
        print('************************************************',
        'Failed to collect data on : ',df.loc[i,'Company_name'],
        '************************************************\n')
        


Grabbing data for: Agilent Technologies
Data successfully added in:  6.22 seconds! 
Average time per stock:  6.22 
Completed:  1 / 946 
Estimated time remaining:  1 hours 38 minutes 4 seconds. 
_____________________________________________________

Grabbing data for: Biogen
Data successfully added in:  6.38 seconds! 
Average time per stock:  6.24 
Completed:  116 / 946 
Estimated time remaining:  1 hours 26 minutes 25 seconds. 
_____________________________________________________

Grabbing data for: Chevron
Data successfully added in:  6.41 seconds! 
Average time per stock:  6.25 
Completed:  231 / 946 
Estimated time remaining:  1 hours 14 minutes 35 seconds. 
_____________________________________________________

Grabbing data for: TechnipFMC plc
Data successfully added in:  6.19 seconds! 
Average time per stock:  6.3 
Completed:  346 / 946 
Estimated time remaining:  1 hours 3 minutes 6 seconds. 
_____________________________________________________

Grabbing data for: Invesco Mort

In [None]:
stock_data_df = pd.DataFrame(ld)
stock_data_df

In [None]:
merged = pd.merge(y_df,stock_data_df,on='Symbol')
merged

In [None]:
merged.to_csv('all_stocks_with_pulled_data.csv')