In [17]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import numpy as np
import datetime

from datetime import datetime
from dateutil.relativedelta import relativedelta

## Pulling all SEC 13 F links

In [50]:
## Most Famous 13F investors
investor_dict = {
    "1536411": "Stanley_Druckenmiller",
    "1336528": "Bill_Ackman",
    "1067983": "Warren_Buffet",
    "1649339": "Michael_Burray",
    #"921669": "Carl_Icahn",
    "1656456": "David_Tepper",
    "1040273": "Daniel_Loeb",
    "1345471": "Nelson_Peltz"

}

## Pulling reported porfolio values

In [37]:
def cik_data_pull2(acik, filing_name):
    # Constants
    CIK = acik
    SEC_API_URL = f"https://data.sec.gov/submissions/CIK000{CIK}.json"

    # Headers for the SEC API request
    headers = {'User-Agent': "bfassnacht17@gmail.com"}

    base_url = "https://sec.gov"

    # Function to get 13F filings
    def get_13f_filings(cik):
        url = f"https://data.sec.gov/submissions/CIK000{cik}.json"
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            
            # Extract recent filings data
            forms = data['filings']['recent']['form']
            accession_numbers = data['filings']['recent']['accessionNumber']
            filing_dates = data['filings']['recent']['filingDate']
            
            # Create a list to store filing data
            filings_data = []
            
            # Loop through filings and filter for 13F-HR forms
            for i, form in enumerate(forms):
                if form == "13F-HR":
                    accession_number = accession_numbers[i]
                    filing_date = filing_dates[i]
                    
                    # Construct the link to the filing's index page
                    filing_url = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession_number}/{accession_number}-index.htm"
                    
                    # Parse the filing index page to find the 13F XML file link
                    form_13f_url = get_form_13f_url(filing_url)
                    
                    # Append the data to the list
                    filings_data.append({
                        'Form': form,
                        'Filing Date': filing_date,
                        'Filing Index URL': filing_url,
                        'Form 13F URL': form_13f_url
                    })
            
            # Convert the list to a DataFrame
            df_filings = pd.DataFrame(filings_data)
            return df_filings
        else:
            print("Failed to retrieve data.")
            return None

    # Function to extract the Form 13F XML file URL from the index page
    def get_form_13f_url(index_url):
        response = requests.get(index_url, headers=headers)
        
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Look for the link to the Form 13F XML file
            links = soup.find_all('a')
            
            for link in links:
                href = link.get('href')
                if href and ('slform13f' in href.lower() and href.endswith('.xml') and 'primary_doc' not in href.lower()):
                    # Construct the full URL
                
                    full_url = f"{base_url}/{href}"
                    return full_url
        
        return None

    # Get 13F filings
    df_13f = get_13f_filings(CIK)

    
    
    
    
    
    if df_13f is not None:
        df_13f_filtered = df_13f[df_13f["Form 13F URL"].notna()]

        dfs = []

        for index, row in df_13f_filtered.iterrows():

            url = row["Form 13F URL"]
            filing_date = row["Filing Date"]
            
            # Fetching the XML content from the URL
            response = requests.get(url, headers=headers)
            xml_content = response.content

            # Create a BeautifulSoup object
            soup = BeautifulSoup(xml_content, 'html.parser')

            # Find the table element containing the data
            table = soup.find('table', summary="Form 13F-NT Header Information")

            # Extract the column headers
            header_row = table.find('tr')
            header_cells = header_row.find_all('td')
            column_headers = [cell.text.strip() for cell in header_cells]

            # Extract the data rows
            data_rows = table.find_all('tr')[1:]  # Skip the header row
            data = []
            for row in data_rows:
                cells = row.find_all('td')
                row_data = [cell.text.strip() for cell in cells]
                data.append(row_data)   

            # Create a pandas DataFrame
            df = pd.DataFrame(data[2:], columns=data[1])

            # Handle non-breaking space characters (if present)
            df = df.replace('\xa0', '', regex=True)

            df = df.rename(columns={
                '(to the nearest dollar)': 'VALUE', 
                '(x$1000)': 'VALUE'
            })
            
            dfs.append([filing_date, df])



        ## Pulling data frame of historical holdings
        filing_dates = [date[0] for date in dfs]
        columns = filing_dates

        all_holdings = pd.concat([df[1].loc[:, ["NAME OF ISSUER", "CUSIP", "CALL"]] for df in dfs]).drop_duplicates()
        all_holdings_values = all_holdings.copy()

        for i in range(len(dfs)):
            new = dfs[i][1][["NAME OF ISSUER", "CUSIP", "CALL", "PRN AMT"]].copy()

            new["PRN AMT"] = new["PRN AMT"].str.replace(',', '')  # Replace commas in entire column
            
            new.columns = ["NAME OF ISSUER", "CUSIP", "CALL", columns[i]]
            new[columns[i]] = new[columns[i]].astype(np.int64)  # Convert entire column to integer

            new1 = new.groupby(["NAME OF ISSUER", "CUSIP", "CALL"])[columns[i]].sum().reset_index()
            all_holdings = all_holdings.merge(new1, on=["NAME OF ISSUER", "CUSIP", "CALL"], how='left')

        all_holdings = all_holdings.fillna(0).sort_values(by="2024-08-14", ascending=False)

        all_holdings.to_csv(f"C:\\Users\\bfass\\OneDrive\\Desktop\\Fin tools\\BigMoney13F\\Holdings_shares\\{filing_name}.csv")




        

        for i in range(len(dfs)):
            new = dfs[i][1][["NAME OF ISSUER", "CUSIP", "CALL", "VALUE"]].copy()


            new["VALUE"] = new["VALUE"].str.replace(',', '')  # Replace commas in entire column
            new.columns = ["NAME OF ISSUER", "CUSIP", "CALL", columns[i]]


            new[columns[i]] = new[columns[i]].astype(np.int64)  # Convert entire column to integer

            new1 = new.groupby(["NAME OF ISSUER", "CUSIP", "CALL"])[columns[i]].sum().reset_index()
            all_holdings_values = all_holdings_values.merge(new1, on=["NAME OF ISSUER", "CUSIP", "CALL"], how='left')

        all_holdings_values = all_holdings_values.fillna(0).sort_values(by="2024-08-14", ascending=False)

        all_holdings_values.to_csv(f"C:\\Users\\bfass\\OneDrive\\Desktop\\Fin tools\\BigMoney13F\\Holdings_value\\{filing_name}.csv")


    else:
        return None



In [38]:
for cik, whale in investor_dict.items():
    #print(whale)
    cik_data_pull2(cik, whale)

Stanley_Druckenmiller
Bill_Ackman
Warren_Buffet
Michael_Burray
David_Tepper
Daniel_Loeb
Nelson_Peltz


## Downloading Zip files from web

In [6]:

downloadUrl = 'https://www.sec.gov/data-research/sec-markets-data/fails-deliver-data'

headers = {'User-Agent': "bfassnacht17@gmail.com"}

req = requests.get(downloadUrl, headers=headers)
filename = req.url[downloadUrl.rfind('/')+1:]


In [9]:
req.content

b'\n\n<!DOCTYPE html>\n<html lang="en" dir="ltr" class="no-js">\n  <head>\n    <meta charset="utf-8" />\n<meta name="description" content="This text file contains the date, CUSIP numbers, ticker symbols, issuer name, price, and total number of fails-to-deliver (i.e., the balance level outstanding) recorded in the National Securities Clearing Corporation&#039;s (&quot;NSCC&quot;) Continuous Net Settlement (CNS) system aggregated over all NSCC members." />\n<meta name="abstract" content="This text file contains the date, CUSIP numbers, ticker symbols, issuer name, price, and total number of fails-to-deliver (i.e., the balance level outstanding) recorded in the National Securities Clearing Corporation&#039;s (&quot;NSCC&quot;) Continuous Net Settlement (CNS) system aggregated over all NSCC members." />\n<meta name="keywords" content="foia-freq-doc, SEC Markets and Data" />\n<meta name="MobileOptimized" content="width" />\n<meta name="HandheldFriendly" content="true" />\n<meta name="viewpo

In [33]:
from datetime import datetime
from dateutil.relativedelta import relativedelta


# Get the current date and time
now = datetime.now()

# Get the previous month by subtracting one month
previous_month_date = now - relativedelta(months=1)
previous_month_number = previous_month_date.strftime("%m")



# Set the SEC page URL
download_url = 'https://www.sec.gov/data-research/sec-markets-data/fails-deliver-data'

headers = {'User-Agent': "bfassnacht17@gmail.com"}

# Make a GET request to fetch the page content
req = requests.get(download_url, headers=headers)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(req.content, 'html.parser')

# Find all the links to ZIP files (assuming they end with .zip)
zip_links = []
for link in soup.find_all('a', href=True):
    if link['href'].endswith('.zip') and ((previous_month_number + "a" in link['href']) or (previous_month_number + "b" in link['href'])):
        zip_links.append(link['href'])

# Check if any ZIP links were found
if not zip_links:
    print("No ZIP file links found on the page.")
else:
    print(f"Found {len(zip_links)} ZIP files.")

# Create a directory to save the ZIP files if it doesn't exist
output_dir = r'C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker'
os.makedirs(output_dir, exist_ok=True)

# Download each ZIP file
for zip_link in zip_links:
    # If the link is relative, convert it to an absolute URL
    zip_url = zip_link if zip_link.startswith('http') else f'https://www.sec.gov{zip_link}'
    
    # Get the ZIP file name
    zip_file_name = os.path.join(output_dir, zip_url.split('/')[-1])
    
    # Download the ZIP file
    print(f"Downloading {zip_file_name} ...")
    zip_req = requests.get(zip_url, headers=headers)
    
    # Save the ZIP file to the output directory
    with open(zip_file_name, 'wb') as f:
        f.write(zip_req.content)
    
    print(f"Saved {zip_file_name}")

print("All files downloaded.")


Found 31 ZIP files.
Downloading C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202408a.zip ...
Saved C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202408a.zip
Downloading C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202308b.zip ...
Saved C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202308b.zip
Downloading C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202308a.zip ...
Saved C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202308a.zip
Downloading C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202208b.zip ...
Saved C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202208b.zip
Downloading C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker\cnsfails202208a.zip ...
Saved C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_

In [47]:
# Directory where the text files are stored
directory_path = r'C:\Users\bfass\OneDrive\Desktop\Fin tools\BigMoney13F\Cusip_to_ticker'
dataframes = []

# Loop through each ZIP file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.zip'):  # Ensure we only process ZIP files
        zip_path = os.path.join(directory_path, filename)
        
        # Open the ZIP file
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            # Get the list of files in the ZIP (assuming one text file per ZIP)
            text_files = zip_ref.namelist()
            
            # Make sure there is exactly one text file in the ZIP
            if len(text_files) == 1:
                # Extract the text file directly into memory
                with zip_ref.open(text_files[0]) as f:
                    # Try using 'ISO-8859-1' encoding to handle non-UTF-8 characters
                    with io.TextIOWrapper(f, encoding="ISO-8859-1") as text_file:
                        df = pd.read_csv(text_file, delimiter='|')
                    
                        # Append the DataFrame to the list
                        dataframes.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
unioned_df = pd.concat(dataframes, ignore_index=True)

# Optionally, save the final unioned DataFrame to a CSV file
# unioned_df.to_csv('unioned_output.csv', index=False)

print("DataFrames successfully unioned!")


DataFrames successfully unioned!


In [66]:
unioned_df2 = unioned_df[unioned_df["SETTLEMENT DATE"].str.len() == 8]

unioned_df2["SETTLEMENT DATE"] = pd.to_datetime(unioned_df2["SETTLEMENT DATE"])

recent_cusip = unioned_df2.groupby(["SYMBOL", "CUSIP"])["SETTLEMENT DATE"].max().reset_index()
recent_cusip.columns = ["SYMBOL", "CUSIP", "Recent_Date"]

unioned_df2 = unioned_df2.merge(recent_cusip, on=["SYMBOL", "CUSIP"], how='inner')

unioned_df2.to_csv("CUSIP_Mapping.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unioned_df2["SETTLEMENT DATE"] = pd.to_datetime(unioned_df2["SETTLEMENT DATE"])


## 