# Weekly Digest

In [2]:
from requests_html import HTMLSession
from bs4 import BeautifulSoup
from datetime import datetime,timedelta
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import sys
import re 
import nums_from_string
import json
from email.utils import formataddr
import tweepy
from Google import Create_Service
import base64
from google.oauth2 import service_account
from googleapiclient.discovery import build
from newsapi.newsapi_client import NewsApiClient
import FormatFunctions
from GeneratePostFiles import generatePostFiles

In [3]:
def fetchSession(url):
    session = HTMLSession()
    r = session.get(url)
    return r

def getTrades(r):
    table = r.html.find('table')[0]
    rows = table.find('tr')
    return rows[1:]

def value_to_ints(value):
    bad_chars = [
        ',','$','-'
    ]
    for c in bad_chars:
        value = value.replace(c,'')
    low, high = [
        int(x) for x in (value.split('  ', 1))
    ]
    return [low,high]

def getTicker(t):
    try:
        return re.findall('\[(.*?)\]', t)[0]
    except IndexError:
        return ''

def getYahooInfo(ticker):
    url = 'https://finance.yahoo.com/quote/{}'.format(ticker)
    r = fetchSession(url)
    # handle invalid ticker
    tables = r.html.find('table')
    if len(tables) == 1:
        return -1,-1
    
    left_table = tables[0]
    right_table = tables[1]
    left_rows = left_table.find('td')
    right_rows = right_table.find('td')
    left_items = []
    left_values = []
    right_items = []
    right_values = []
    
    i = 0
    for l,r in zip(left_rows, right_rows):
        # evens = item headers
        if i % 2 == 0:
            left_items.append(l.text)
            right_items.append(r.text)
        # odds = values in table
        else:
            left_values.append(l.text)
            right_values.append(r.text)
        i += 1
    return (
        dict(
            zip(left_items, left_values)
        ),
        dict(
            zip(right_items, right_values)
        )
    )

def getCurrentSP500Price():
    url = 'https://finance.yahoo.com/quote/SPY/'
    r = fetchSession(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    x = soup.find_all(
        'fin-streamer', attrs={
            'data-symbol' : 'SPY',
            'data-field' : 'regularMarketPrice' 
            }
        )
    return float(x[0].text)

def isStock(right_table):
    return [*right_table][0] == 'Market Cap'

def getMktCap(right_table):
    return right_table['Market Cap']

def getOpen(left_table):
    return left_table['Open']

def getSectorIndustry(ticker):
    url = 'https://finance.yahoo.com/quote/{}/profile?p={}'.format(ticker, ticker)
    r = fetchSession(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    try:
        sect_ind = (
            (
                soup.find_all('p', attrs={'class' : 'D(ib) Va(t)'})
            )[0].text.strip()
        )
    # bad ticker was given 
    except IndexError:
        return ''
    sector = re.search('\xa0(.*)Industry', sect_ind).group(1)
    industry = re.search('Industry:\xa0(.*)Full', sect_ind).group(1)
    return sector, industry

def parseToMillions(mkt_cap):
    unit = mkt_cap[-1:]
    number = nums_from_string.get_nums(mkt_cap)[0]
    #keep in units of millions
    if unit == 'B':
        number = number * 1000
    elif unit == 'T':
        number = number * 1000000
    return number

def cleanQuery(t):
    trade = t['trade']
    trade =  re.sub(
        '[^0-9a-zA-Z]+', ' ', trade
    )
    return trade.split('Common')[0] + 'Stock'

def getTradesNews(t, key_path):
    with open(key_path) as f:
        key = f.read()
    newsapi = NewsApiClient(api_key=key)
    
    search = cleanQuery(t)
    try:
        articles = newsapi.get_everything(
            q=search, language='en', sort_by='relevancy'
        )['articles'][:3]
    except IndexError:
        return -1
    if len(articles) == 0:
        return -1
    titles_urls = []
    for n in articles:
        titles_urls.append(
            {
                'title' : n['title'],
                'url' : n['url']
            }
        )
    return titles_urls

def getLastname(senator):
    lastname = senator
    if ',' in senator:
        lastname = senator.split(',')[0]
    ind = len(lastname.split(' '))
    lastname = lastname.split(' ')[ind-1]
    return lastname

def getContactInfo(senator):
    lastname = getLastname(senator)
    form_url = 'https://www.senate.gov/senators/senators-contact.htm'
    r = fetchSession(form_url)
    form = r.html.find('form')[3]
    # first row header
    options = form.find('option')[1:]
    foundName = False
    for i in options:
        i = i.html
        lastname_options = i.split('>')[1].split(' ')[0]
        if lastname == lastname_options:
            url = i.split('"')[1].split('"')[0]
            info = {
                'lastname':lastname,
                'url':url
            }
            foundName = True
            break
    if not foundName:
        return {
            'lastname':'',
            'url':''
        }
    url = info['url']
    if url[-1] == '/':
        url += 'contact'
    else:
        url += '/contact'
    try:
        r = fetchSession(url)
        res = r.status_code
        if res != 200:
            url = url.split('/contact')[0]
    except:
        return {
            'lastname':'',
            'url':''
        }
    return {
        'lastname':lastname,
        'url':url
    }

def getPartyState(senator):
    lastname = getLastname(senator)
    party_info = 'https://en.wikipedia.org/wiki/List_of_current_United_States_senators'
    r = fetchSession(party_info)
    table = r.html.find('table')[5]
    senatorRows = table.find('tr')[1:]
    row = 0
    party = ''
    state = ''
    for s in senatorRows:
        names = s.find('th')
        for n in names:
            name = n.text
            if name.split(' ')[-1] == lastname:
                staterow = (len(s.find('td')) == 11)
                if staterow:
                    party = s.find('td')[3].text.split('[')[0].split('\n')[0]
                    state = s.find('td')[0].text
                else:
                    party = s.find('td')[2].text.split('[')[0].split('\n')[0]
                    state = senatorRows[row-1].find('td')[0].text
        row += 1
    return [party, state]

def writeTradeToFile(trade, path):
    with open(path, 'w') as f:
        for (key,item) in trade.items():
            if key == 'Yahoo!':
                f.write(
                    '%s\n' % (
                    item
                    )
                )
            else:
                f.write(
                    '%s : %s\n' % (
                    key,item
                    )
                )
        f.write('\n')

def scrapeImportantTrades(today=datetime.today().date(), onlyToday=False, backtest=False, backtestDate='2022-04-01'):
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    # if website is down
    try:
        trades = getTrades(r)
    except IndexError:
        sys.exit(1)

    n = len(trades)
    all_trades = []
    dt_backtest = datetime.strptime(backtestDate, '%Y-%m-%d').date()

    for i in range(0,n,2):
        imp_trade = False
        l1_elements = trades[i].find('td')
        l2_elements = trades[i+1].find('td')[:-1]

        # make sure trade happened today before doing anything 
        file_date, trade_date = l1_elements[0].text.split('\n')
        if file_date != str(today) and onlyToday:
            break

        if backtest:
            file_dt = datetime.strptime(file_date, '%Y-%m-%d').date()
            days = file_dt - dt_backtest
            if days < timedelta(days=0):
                break

        # ensure trade is a purchase, otherwise contniue to next trade
        trade_type = l2_elements[0].text.split('\n', 1)[0]
        if trade_type != 'Purchase':
            continue

        trade = l1_elements[1].text
        senator = l1_elements[2].text
        senator = senator.split(' [')[0]
        value = value_to_ints(l2_elements[1].text)
        
        ticker = getTicker(trade)
        # if no ticker is found, not an equity trade
        if ticker == '':
            continue

        # handle case of finding company debt, or rare case of fund having a mkt cap listed instead of an NAV  
        if ('Notes' or 'Matures' or 'Fund') in trade:
            continue
        
        left_table, right_table = getYahooInfo(ticker)
        # invalid ticker given 
        if left_table == -1:
            continue
        # if the ticker is an ETF, not a stock, or an options play
        if not isStock(right_table) or 'Option' in trade:
            continue
        
        sect, ind = getSectorIndustry(ticker)
        mkt_cap = getMktCap(right_table)
        try:
            mkt_cap = parseToMillions(mkt_cap)
        except IndexError:
            continue
        small_mktCap = mkt_cap < 2000 and mkt_cap > 0
        medium_mktCap = mkt_cap >= 2000 and mkt_cap <= 10000
        large_mktCap = mkt_cap > 10000
        # any small caps, medium purchase medium caps, large purchase large cap
        if small_mktCap:
            imp_trade = True
            cap_string = 'small'
        elif medium_mktCap and value[0] >= 50000:
            imp_trade = True
            cap_string = 'medium'
        elif large_mktCap and value[0] >= 100000:
            imp_trade = True
            cap_string = 'large'

        if imp_trade:
            url = 'https://finance.yahoo.com/quote/{}/'.format(ticker)
            trade_dict = {
                'trade date' : trade_date,
                'file date' : file_date,
                'senator' : senator,
                'trade' : trade,
                'trade type' : trade_type,
                'value' : value,
                'mkt cap' : cap_string,
                'sector' : sect,
                'industry' : ind,
                'yahoo finance' : url
            }
            all_trades.append(trade_dict)

    return all_trades

## Find all trades taking place over the last week

In [4]:
def getLastWeekTrades(today=datetime.today().date(), lastWeek=datetime.today().date() - timedelta(days=7)):
    r = fetchSession('https://sec.report/Senate-Stock-Disclosures')
    # if website is down
    try:
        trades = getTrades(r)
    except IndexError:
        sys.exit(1)

    n = len(trades)
    all_trades_wk = []
    for i in range(0,n,2):
        imp_trade = False
        l1_elements = trades[i].find('td')
        l2_elements = trades[i+1].find('td')[:-1]

        #stop if trade took place over a week ago 
        file_date, trade_date = l1_elements[0].text.split('\n')
        file_dt = datetime.strptime(file_date, '%Y-%m-%d').date()
        days = file_dt - lastWeek
        if days < timedelta(days=0):
            break

        trade = l1_elements[1].text
        senator = l1_elements[2].text.split(' [')[0]
        trade_type = l2_elements[0].text.split('\n', 1)[0].split(' (')[0]
        value = value_to_ints(l2_elements[1].text)
        all_trades_wk.append({
            'file date' : file_date,
            'trade date' : trade_date,
            'trade' : trade,
            'trade type' : trade_type,
            'senator' : senator,
            'value' : value
        })
    return all_trades_wk

In [5]:
all_trades_wk = getLastWeekTrades()

In [6]:
all_trades_wk

[{'file date': '2022-10-28',
  'trade date': '2022-10-20',
  'trade': 'Alabama Highway Finance Auth Revenue Bond Rate/Coupon: 5.0% Matures: 09/01/2026',
  'trade type': 'Purchase',
  'senator': 'Rick Scott',
  'value': [500001, 1000000]},
 {'file date': '2022-10-28',
  'trade date': '2022-09-28',
  'trade': 'Washington County Oregon School District General Obligation Bond Rate/Coupon: 5.0% Matures: 06/15/2027',
  'trade type': 'Purchase',
  'senator': 'Rick Scott',
  'value': [500001, 1000000]},
 {'file date': '2022-10-28',
  'trade date': '2022-09-28',
  'trade': 'Washington County Oregon School District General Obligation Bond Rate/Coupon: 5.0% Matures: 06/15/2027',
  'trade type': 'Purchase',
  'senator': 'Rick Scott',
  'value': [250001, 500000]}]

## Organize, count metrics, and get other data

In [19]:
securities = []
trade_values = []
buy_sell = []
buy_value = 0
sell_value = 0

for t in all_trades_wk:
    securities.append(t['trade'])
    value = int(round((t['value'][0] + t['value'][1]) / 2, -2))
    trade_values.append(value)
    if t['trade type'] == 'Purchase':
        buy_sell.append(1)
        buy_value += value
    else:
        buy_sell.append(-1)
        sell_value += value
    
num_buys = buy_sell.count(1)
num_sell = len(buy_sell) - num_buys
total_value = sum(trade_values)
print(securities)
print(total_value)
print(num_buys, num_sell)
print(buy_value, sell_value)

['Alabama Highway Finance Auth Revenue Bond Rate/Coupon: 5.0% Matures: 09/01/2026', 'Washington County Oregon School District General Obligation Bond Rate/Coupon: 5.0% Matures: 06/15/2027', 'Washington County Oregon School District General Obligation Bond Rate/Coupon: 5.0% Matures: 06/15/2027']
1875000
3 0
1875000 0
