# Quantitative Trading Strategies

In [38]:
import pandas as pd
import numpy as np
import requests 
import datetime as dt
import typing
from bs4 import BeautifulSoup
import statsmodels.formula.api as smf
import time
import logging
import random
from requests_html import HTMLSession

from typing import Any, Dict, List, Optional

from fake_useragent import UserAgent

In [20]:

class CapitolTrades():
    """A class for interacting with the API which supports https://capitoltrades.com."""

    def __init__(self):
        """init varz"""
        self.__url = 'https://www.capitoltrades.com/trades?assetType=abs&assetType=corporate-bond&assetType=etf&assetType=indices&assetType=stock&assetType=reit&mcap=4&mcap=3&mcap=2&mcap=1&tradeSize=2&tradeSize=3&tradeSize=4&tradeSize=5&tradeSize=6&tradeSize=7&tradeSize=8&tradeSize=9&tradeSize=10'
        self.__ua = UserAgent()
        self.__session = requests.Session()
        self.__session.get('https://www.capitoltrades.com/trades?assetType=abs&assetType=corporate-bond&assetType=etf&assetType=indices&assetType=stock&assetType=reit&mcap=4&mcap=3&mcap=2&mcap=1&tradeSize=2&tradeSize=3&tradeSize=4&tradeSize=5&tradeSize=6&tradeSize=7&tradeSize=8&tradeSize=9&tradeSize=10')
        try:
            data = self.__get_data()
        except Exception as e:
            raise Exception("Error initializing: " + str(e))
        self.__politicians = self.__parse_data(data)
    
    @property
    def politicians(self) -> Dict[str, str]:
        """Returns the map of politician ID to politician name of all known
        politicians on https://capitoltrades.com. Useful for debugging."""
        return self.__politicians

    def __get_headers(self) -> Dict[str, Any]:
        """Generates headers for the Capitol Trades API."""
        return {
            "User-Agent": self.__ua.random,
            "Accept": "application/json, text/plain, */*",
            "Accept-Language": "en-US,en;q=0.5",
            "Content-Type": "application/json",
            "Origin": "https://bff.capitoltrades.com",
            "DNT": "1",
            "Connection": "keep-alive",
            "Referer": "https://bff.capitoltrades.com/",
            "Sec-Fetch-Dest": "empty",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Site": "same-site",
            "Sec-GPC": "1",
            "Cache-Control": "max-age=0",
            "TE": "trailers",
        }

    def __get_data(self) -> Optional[Dict]:
        """Gather data on all known politicians from https://capitoltrades.com"""
        logging.debug("Getting seed data")

        seed_data = []
        page_num = 1
        paginating = True
        while paginating:
            params = (
                ("page", page_num),
                # 100 is the max return size of the API.
                ("pageSize", 100),
            )
            r = self.__session.get(
                self.__url + "/politicians",
                headers=self.__get_headers(),
                params=params,
            )
            r.raise_for_status()

            response_json = r.json()
            data = response_json["data"]
            seed_data.extend(data)

            if len(seed_data) >= response_json["meta"]["paging"]["totalItems"] or not data:
                paginating = False
            else:
                page_num += 1

        return seed_data

    def __parse_data(self, data: Dict) -> Dict[str, str]:
        """Reformat the API data into a hash map we can use to search for politicians by name."""
        logging.debug("Parsing list of politicians")
        return {
            p["_politicianId"]: p["fullName"] for p in data
        }

    def get_politician_id(self, name: str) -> Optional[str]:
        """Search for the politician ID of the provided name."""
        for pid in self.__politicians.keys():
            if name.lower() == self.__politicians[pid].lower():
                return pid
            if name.lower() == self.__politicians[pid].split(",")[0].lower():
                return pid
        return None


    def trades(self, politician_id: str) -> List[Dict]:
        """Returns all of the trades for the provided politician ids."""
        assert politician_id in self.__politicians.keys()
        
        all_trades = []
        page_num = 1
        paginating = True
        while paginating:
            params = (
                ("page", page_num),
                # 100 is the max return size of the API.
                ("pageSize", 100),
                ("txDate", "all"),
                ("politician", politician_id)
            )
            r = self.__session.get(
                self.__url,
                headers=self.__get_headers(),
                params=params,
            )
            r.raise_for_status()

            response_json = r.json()
            data = response_json["data"]
            all_trades.extend(data)

            if len(all_trades) >= response_json["meta"]["paging"]["totalItems"] or not data:
                paginating = False
            else:
                page_num += 1

        return all_trades

In [11]:
url_template = 'https://www.capitoltrades.com/trades?mcap=3&mcap=2&mcap=1&assetType=abs&assetType=corporate-bond&assetType=indices&assetType=etf&assetType=etn&assetType=reit&assetType=stock&page={}'



In [60]:
trade_transactions = pd.read_csv('C:/Users/dcste/OneDrive/Economics_Research/Economics_Research/trade_transactions.csv')

In [52]:
v = 'https://www.capitoltrades.com/trades?mcap=3&mcap=2&mcap=1&assetType=abs&assetType=corporate-bond&assetType=indices&assetType=etf&assetType=etn&assetType=reit&assetType=stock&page=1'
r = session.get(v)


In [43]:
session = HTMLSession()

In [45]:
for pg in range(1,5):
    ur = f'https://www.capitoltrades.com/trades?mcap=3&mcap=2&mcap=1&assetType=abs&assetType=corporate-bond&assetType=indices&assetType=etf&assetType=etn&assetType=reit&assetType=stock&page={pg}'
    print(ur)
    


RuntimeError: Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead.

In [95]:
trade_dataset = pd.DataFrame(data = None, columns = ['Politician','Asset_Name','Date_Traded','Filed_After','Owner','Trade_Type','Notional_Value','Price'])

In [6]:
def filter_tradesize(trade_size):
    value = trade_size.strip()
    if value == '<1K' or value == '< 1K':
        value = 500
    elif value == '1K\u201315K':
        value = np.mean([1e3,15e3])
    elif value == '15K\u201350K':
        value = np.mean([15e3,50e3])
    elif value == '50K\u2013100K':
        value = np.mean([50e3,100e3])
    elif value == '100K\u2013250K':
        value = np.mean([100e3,250e3])
    elif value == '250K\u2013500K':
        value = np.mean([250e3,500e3])
    elif value == '1M\u20135M':
        value = np.mean([1e6,5e6])
    elif value == '5M\u201325M':
        value = np.mean([5e6,25e6])
    elif value == '25M\u201350M':
        value = np.mean([25e6,50e6])
    
    return value


In [8]:
def filter_dates(dates: str):
    DATE = dates
    d_format = "%Y %d %b"
    output_format = "%m/%d/%Y"
    date_ob = dt.datetime.strptime(DATE,d_format)
    final_ob = date_ob.strftime(output_format)
    return final_ob
    

In [24]:
def sleep(a,b):
    return time.sleep(random.randint(a,b))

In [None]:
session = HTMLSession()

In [96]:

row_index = 0
page_num = 1
for pg in range(1,4):
    if pg == 1:
        url = f'https://www.capitoltrades.com/trades'
    else:
        url = f"https://www.capitoltrades.com/trades?page={pg}"
   
    session = HTMLSession()
    response = session.get(url)
    soup = BeautifulSoup(response.html.html, 'lxml')
    table = soup.find('table')
    rows = table.find_all('tr')


    #if len(rows) == 1:
        #break

    for row in rows[1:]:
        cols = row.find_all('td')

        if len(cols) > 0:
        # Extract the data from the columns
            #temp_df = pd.DataFrame(data = None,columns = ['Politician','Asset_Name','Date_Traded','Filed_After','Owner','Trade_Type','Notional_Value','Price'])
            Politician = cols[0].text.strip()
            Asset_Name = cols[1].text.strip()
            #published = cols[2].text.strip()
            Date_Traded = filter_dates(cols[3].text.strip())
            try:
                Filed_After = float(cols[4].text.strip()[-2:])
            except ValueError:
                Filed_After = np.nan
            Owner= cols[5].text.strip()
            Trade_Type = cols[6].text.strip()
            Notional_Value = filter_tradesize(cols[7].text.strip())

            try:
                Price = float(cols[8].text.strip())
            except ValueError:
                Price = np.nan
            #print(politician,asset_name,date_traded, filed_after, owner, trade_type, trade_size, price)
                       # Append the extracted data to the DataFrame
            trade_dataset.loc[row_index] = [Politician,Asset_Name,Date_Traded,Filed_After,Owner,Trade_Type,Notional_Value,Price]

            row_index += 1
            sleep(2,8)
            
            
         



    
            



In [97]:
trade_dataset.drop_duplicates('Price')

Unnamed: 0,Politician,Asset_Name,Date_Traded,Filed_After,Owner,Trade_Type,Notional_Value,Price
0,Debbie Wasserman SchultzDemocratHouseFL,ALAMOS GOLD INCAGI:US,01/24/2023,27.0,Child,sell,8000.0,11.37
2,Jeff JacksonDemocratHouseNC,ANKR $ANKR,01/29/2023,19.0,Undisclosed,sell,8000.0,0.028
3,Jeff JacksonDemocratHouseNC,CARDANO$ADA,01/29/2023,19.0,Undisclosed,sell,8000.0,0.396
4,Neal DunnRepublicanHouseFL,KeyCorpKEY:US,01/05/2023,43.0,Undisclosed,sell,8000.0,22.04
5,Neal DunnRepublicanHouseFL,Regions Financial CorpRF:US,01/05/2023,43.0,Undisclosed,sell,8000.0,
6,Neal DunnRepublicanHouseFL,The Southern CoSO:US,01/05/2023,43.0,Undisclosed,sell,32500.0,70.38
8,Josh GottheimerDemocratHouseNJ,AbbVie IncABBV:US,01/27/2023,21.0,Joint,sell,8000.0,146.28
9,Josh GottheimerDemocratHouseNJ,AbbVie IncABBV:US,01/20/2023,28.0,Joint,sell,8000.0,149.59
10,Josh GottheimerDemocratHouseNJ,Adobe IncADBE:US,01/31/2023,17.0,Joint,buy,8000.0,370.34
11,Josh GottheimerDemocratHouseNJ,Align Technology IncALGN:US,01/27/2023,21.0,Joint,buy,8000.0,269.2


In [98]:
for pg in range(1,3):
    url = f"https://www.capitoltrades.com/trades?mcap=3&mcap=2&mcap=1&assetType=abs&assetType=corporate-bond&assetType=etf&assetType=stock&assetType=reit&assetType=indices&assetType=etn&page={pg}"
   
    print(url)

https://www.capitoltrades.com/trades?mcap=3&mcap=2&mcap=1&assetType=abs&assetType=corporate-bond&assetType=etf&assetType=stock&assetType=reit&assetType=indices&assetType=etn&page=1
https://www.capitoltrades.com/trades?mcap=3&mcap=2&mcap=1&assetType=abs&assetType=corporate-bond&assetType=etf&assetType=stock&assetType=reit&assetType=indices&assetType=etn&page=2


In [14]:
t.politicians()

TypeError: 'dict' object is not callable