In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from datetime import datetime

In [5]:
URL = "https://stockchase.com/opinions/recenttop/sort/date/page/1/direction/desc/max/15"
PARSER = 'html.parser'

In [22]:
def soup_maker(url, parser):
    """This function creates a soup from a url"""
    page = requests.get(url)
    soup = BeautifulSoup(page.content, parser)
    return soup

In [28]:
TEMP_TRADE_TSX = ['-T', 'T ']
TEMP_TRADE_US = ['-Q','-N', '-O', '-A' ]

def modify_ticker(my_ticker, TRADE_US = ['-Q','-N', '-O', '-A' ]):
    if my_ticker[-2:] in TRADE_US:
        return my_ticker[0:-2]
    elif my_ticker[-2:] == 'US':
        return my_ticker[0:-3]
    elif my_ticker[-2:] == '-T':
        return 'TSE:' + my_ticker[0:-2]
    elif my_ticker[-2:] == 'T ':
        return 'TSE:' + my_ticker[0:-3]
    else:
        return 'CASH'

class StockChasePageInfo(object):
    def __init__(self, url, parser):
        """This is the constructor for the triangle class."""
        self.page = requests.get(url)
        self.soup = BeautifulSoup(self.page.content, parser)
        
    
    def get_expert_names(self):
        """This method returns the list of all experts on the page"""
        findall_experts = self.soup.find_all('a', class_="expert-name")
        experts_series = pd.Series(findall_experts)

        experts_list_text = experts_series.apply(lambda x:x.text.strip())

        return experts_list_text
    
    
    def get_expert_titles(self):
        """This method returns the list of all expert titles on the page"""
        findall_expert_titles= self.soup.find_all('div', class_="expert-title")
        expert_titles_series = pd.Series(findall_expert_titles)

        expert_titles_list_text = expert_titles_series.apply(lambda x:x.text.strip())

        return expert_titles_list_text
    
    
    def get_expert_tickers(self):
        """This method returns the list of all expert tickers on the page"""
        findall_expert_tickers= self.soup.find_all('span', class_="opinion-mini__symbol")
        expert_tickers_series = pd.Series(findall_expert_tickers)

        expert_tickers_list_text = expert_tickers_series.apply(lambda x:x.text.strip("()"))
        
        expert_tickers_list_ready = expert_tickers_list_text.apply( lambda x: modify_ticker(x))
        
        expert_tickers_list_ready = expert_tickers_list_ready.apply( lambda x: x.replace('.', '-'))

        return expert_tickers_list_ready
    
    
    def get_expert_picks(self):
        """This method returns the list of all expert picks on the page"""
        findall_expert_picks= self.soup.find_all('span', class_="opinion-mini__name")
        expert_picks_series = pd.Series(findall_expert_picks)

        expert_picks_list_text = expert_picks_series.apply(lambda x:x.text.strip())

        return expert_picks_list_text
    
    
    def get_expert_dates(self):
        """This method returns the list of all expert dates on the page"""
        findall_expert_dates= self.soup.find_all('div', class_="opinion-mini__date")
        expert_dates_series = pd.Series(findall_expert_dates)

        expert_dates_list_text = expert_dates_series.apply(lambda x:x.text.strip())
        
        expert_dates_list_dt = expert_dates_list_text.apply(lambda x: datetime.strptime(x, '%B %d, %Y').date())

        return expert_dates_list_dt
    
    
    def get_expert_comments(self):
        """This method returns the list of all expert comments on the page"""
        findall_expert_comments= self.soup.find_all('div', class_="opinion-mini__comment")
        expert_comments_series = pd.Series(findall_expert_comments)

        expert_comments_list_text = expert_comments_series.apply(lambda x:x.text.strip())

        return expert_comments_list_text
    
    
    def get_prices(self):
        """This method returns the list of all expert prices on the page"""
        findall_prices= self.soup.find_all('div', class_="expert-cell d-none d-lg-block")
        prices_series = pd.Series(findall_prices)

        prices_list_text = prices_series.apply(lambda x:x.find_all('div')[3].text.strip())

        return prices_list_text
      
    
    def get_all(self):
        """This method returns all the data on the page"""
        df = pd.DataFrame()
        df['Expert Name'] = self.get_expert_names()
        df['Expert Title'] = self.get_expert_titles()
        df['Top Pick'] = self.get_expert_picks()
        df['Ticker'] = self.get_expert_tickers()
        df['Price'] = self.get_prices()
        df['Date'] = self.get_expert_dates()
        df['Comments'] = self.get_expert_comments()
        
        return df

In [9]:
from tqdm import tqdm
LAST_PAGE = 1798


def scrape_all_stock_chase_top_picks(mod_ticker_func, LAST_PAGE,\
                            URL = "https://stockchase.com/opinions/recenttop/sort/date/page/1/direction/desc/max/15",\
                            PARSER = 'html.parser', save=True, filename = 'stockchase_all_top_picks'):
    info_dict = {} 
    modify_ticker = mod_ticker_func
    for i in tqdm(range(LAST_PAGE)):
        print("Loading Page {}...".format(i+1))
        p_URL = URL.replace('/1/','/'+str(i+1) + '/')
        p=StockChasePageInfo(p_URL, PARSER)
        info_dict['p'+ str(i+1)] = p.get_all()
    if save:
        print("Saving to CSV...")
        my_df=pd.concat(info_dict.values())
        my_df.to_csv(filename + '.csv')
    return info_dict