In [None]:
from bs4 import BeautifulSoup
from datetime import date
import os
import pandas as pd
import requests
import re
import time
import traceback
    
# Returns a dataframe with urls for any odd lot forms filed for the given CIK_num
def get_odd_lot_form_url(CIK_num):
    try:
        # Search for the company with CIK number CIK_num
        search_results_url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=" + \
            CIK_num + "&type=SC+TO-I&dateb=&owner=exclude&count=100"
        result_site = requests.get(search_results_url)
        data = result_site.text
        EDGAR_results_page = BeautifulSoup(data, 'html.parser')

        # Get list of tags with the string \"SC TO-I\", this signals a tender offer document.\n",
        sc_to_i_tag_list = EDGAR_results_page.find_all("td", string="SC TO-I")
        # If no tender offer tags found, quit now
        if len(sc_to_i_tag_list) < 1:
            return None

        final_dates = []
        final_urls = []
        for tag in sc_to_i_tag_list:
            # Get the url to the details page
            filing_details_url = tag.find_next("a")['href']  # extension for the document
            detail_url = "https://www.sec.gov/" + filing_details_url
            print(detail_url)

            # Get the html of the details page
            details_site = requests.get(detail_url)
            details_site_data = details_site.text
            FILING_detail_page = BeautifulSoup(details_site_data, 'html.parser')

            # Get the tender offer/tender ammendment 
            tender_document_tag = FILING_detail_page.find('td', string="Complete submission text file")
            tender_document_url = "https://www.sec.gov/" + tender_document_tag.find_next("a")['href']
            form = requests.get(tender_document_url)

            # If this tender offer has no odd lot provision, then skip this iteration and do
            # not add it
            if re.search(r'(odd lot)', form.text, re.IGNORECASE) is None:
                continue

            # Get date of tender offer
            row_cells = tag.find_next_siblings("td")
            date = pd.to_datetime(row_cells[2].get_text(), format='%Y-%m-%d', errors='ignore').date()

            # Add the information
            final_dates.append(date)
            final_urls.append(detail_url)
            
        if len(final_urls) < 1:
            return None
        # Return the information as a dataframe
        return pd.DataFrame({'cik': '"' + str(CIK_num) + '"', 'date': final_dates, 'url': final_urls})
    except:
        print("Exception occured while attempting to produce submission text file, returning None")
        traceback.print_exc()
        return None

# Returns a dataframe with general information about tender offers
def get_tender_offer_data(CIK_nums):
    ciks = []
    dates = []
    urls = []
    for CIK_num in CIK_nums:
        try:
            # Search for the company with CIK number CIK_num
            search_results_url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=" + \
                CIK_num + "&type=SC+TO-I&dateb=&owner=exclude&count=100"
            result_site = requests.get(search_results_url)
            data = result_site.text
            EDGAR_results_page = BeautifulSoup(data, 'html.parser')

            # Get list of tags with the string \"SC TO-I\", this signals a tender offer document.\n",
            sc_to_i_tag_list = EDGAR_results_page.find_all("td", string="SC TO-I")
            # If no tender offer tags found, quit now
            if len(sc_to_i_tag_list) < 1:
                continue

            for tag in sc_to_i_tag_list:
                # Get the url to the details page
                filing_details_url = tag.find_next("a")['href']  # extension for the document
                detail_url = "https://www.sec.gov/" + filing_details_url
                print(detail_url)

#                 # Get the html of the details page
#                 details_site = requests.get(detail_url)
#                 details_site_data = details_site.text
#                 FILING_detail_page = BeautifulSoup(details_site_data, 'html.parser')
# 
#                 # Get the tender offer/tender ammendment 
#                 tender_document_tag = FILING_detail_page.find('td', string="Complete submission text file")
#                 tender_document_url = "https://www.sec.gov/" + tender_document_tag.find_next("a")['href']
#                 form = requests.get(tender_document_url)
# 
#                 # If this tender offer has no odd lot provision, then skip this iteration and do
#                 # not add it
#                 if re.search(r'(odd lot)', form.text, re.IGNORECASE) is None:
#                     continue

                # Get date of tender offer
                row_cells = tag.find_next_siblings("td")
                date = pd.to_datetime(row_cells[2].get_text(), format='%Y-%m-%d', errors='ignore').date()

                # Add the information
                ciks.append('"' + str(CIK_num) + '""')
                dates.append(date)
                urls.append(detail_url)

        except:
            print("Exception occured while attempting to produce submission text file, returning None")
            traceback.print_exc()
    
    return pd.DataFrame({'cik': ciks, 'date': dates, 'url': urls})

    
def main():
    print("Beginning running")
    with open("CIK_Numbers.txt", "r") as cik_numbers_file: 
        cik_numbers = cik_numbers_file.readlines()
        print(len(cik_numbers))
        # May need to go through 100000 to 200000 again
        cik_numbers = cik_numbers[:100000]
        try:
            tender_offer_data = get_tender_offer_data(cik_numbers)
            tender_offer_data.to_csv("tender_offer_data_1.csv", index=False)
            """ 
            ### Get urls for odd lot forms
            url_data = pd.DataFrame(columns={'cik': [], 'date': [], 'url': []})
            try:
                for cik_number in cik_numbers:
                    df = get_odd_lot_form_urls(cik_number.rstrip())
                    if df is not None and not df.empty:
                        url_data = url_data.append(df, ignore_index=True)
                print(url_data)
                url_data.to_csv("url_data_7.csv", index=False)
            except:
                # If there is an exception, save what we have
                print("Exception occured. Writing now to not lose data")
                url_data.to_csv("url_data_7.csv", index=False)
                traceback.print_exc()
            """
        except:
            print("get_tender_offer_data failed")
            traceback.print_exc()

            
if __name__ == "__main__": 
    main()

Beginning running
708280
https://www.sec.gov//Archives/edgar/data/946738/000102140803008270/0001021408-03-008270-index.htm
https://www.sec.gov//Archives/edgar/data/1062273/000141588915003983/0001415889-15-003983-index.htm
https://www.sec.gov//Archives/edgar/data/1062273/000114420413033994/0001144204-13-033994-index.htm
https://www.sec.gov//Archives/edgar/data/1396016/000114420409062311/0001144204-09-062311-index.htm
https://www.sec.gov//Archives/edgar/data/898441/000114544302000594/0001145443-02-000594-index.htm
https://www.sec.gov//Archives/edgar/data/1476719/000095012311015917/0000950123-11-015917-index.htm
https://www.sec.gov//Archives/edgar/data/1000401/000090873710000327/0000908737-10-000327-index.htm
https://www.sec.gov//Archives/edgar/data/1000401/000119312509212861/0001193125-09-212861-index.htm
https://www.sec.gov//Archives/edgar/data/1000401/000111667908001864/0001116679-08-001864-index.htm
https://www.sec.gov//Archives/edgar/data/1000401/000090873707000225/0000908737-07-0002

https://www.sec.gov//Archives/edgar/data/1156202/000089968115000201/0000899681-15-000201-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968114000695/0000899681-14-000695-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968114000253/0000899681-14-000253-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968113000621/0000899681-13-000621-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968113000207/0000899681-13-000207-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968112000404/0000899681-12-000404-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968112000109/0000899681-12-000109-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968111000325/0000899681-11-000325-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968111000107/0000899681-11-000107-index.htm
https://www.sec.gov//Archives/edgar/data/1156202/000089968110000485/0000899681-10-000485-index.htm
https://ww

https://www.sec.gov//Archives/edgar/data/1506707/000114420418064891/0001144204-18-064891-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000114420418049926/0001144204-18-049926-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000114420418034821/0001144204-18-034821-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000114420418015858/0001144204-18-015858-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000114420417063693/0001144204-17-063693-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000114420417048702/0001144204-17-048702-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000157104917006037/0001571049-17-006037-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000089968117000147/0000899681-17-000147-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000089968116001897/0000899681-16-001897-index.htm
https://www.sec.gov//Archives/edgar/data/1506707/000089968116001747/0000899681-16-001747-index.htm
https://ww

https://www.sec.gov//Archives/edgar/data/1634452/000119312518335500/0001193125-18-335500-index.htm
https://www.sec.gov//Archives/edgar/data/1634452/000119312518335500/0001193125-18-335500-index.htm
https://www.sec.gov//Archives/edgar/data/1091587/000091205701507640/0000912057-01-507640-index.htm
https://www.sec.gov//Archives/edgar/data/882289/000089256908001062/0000892569-08-001062-index.htm
https://www.sec.gov//Archives/edgar/data/1125052/000119312504015079/0001193125-04-015079-index.htm
https://www.sec.gov//Archives/edgar/data/1551152/000104746918003373/0001047469-18-003373-index.htm
https://www.sec.gov//Archives/edgar/data/846676/000110465918034909/0001104659-18-034909-index.htm
https://www.sec.gov//Archives/edgar/data/846676/000110465910023644/0001104659-10-023644-index.htm
https://www.sec.gov//Archives/edgar/data/846676/000091205702000254/0000912057-02-000254-index.htm
https://www.sec.gov//Archives/edgar/data/846676/000110465918034909/0001104659-18-034909-index.htm
https://www.sec

https://www.sec.gov//Archives/edgar/data/1472595/000147259517000054/0001472595-17-000054-index.htm
https://www.sec.gov//Archives/edgar/data/1087243/000089161802000350/0000891618-02-000350-index.htm
https://www.sec.gov//Archives/edgar/data/817979/000104746910008807/0001047469-10-008807-index.htm
https://www.sec.gov//Archives/edgar/data/817979/000091205701531384/0000912057-01-531384-index.htm
https://www.sec.gov//Archives/edgar/data/1295721/000134100408003090/0001341004-08-003090-index.htm
https://www.sec.gov//Archives/edgar/data/1295721/000120621208000116/0001206212-08-000116-index.htm
https://www.sec.gov//Archives/edgar/data/1295721/000120621207000316/0001206212-07-000316-index.htm
https://www.sec.gov//Archives/edgar/data/935036/000091205701526168/0000912057-01-526168-index.htm
https://www.sec.gov//Archives/edgar/data/910524/000119312509098032/0001193125-09-098032-index.htm
https://www.sec.gov//Archives/edgar/data/910524/000119312508121628/0001193125-08-121628-index.htm
https://www.sec

https://www.sec.gov//Archives/edgar/data/718877/000104746908008183/0001047469-08-008183-index.htm
https://www.sec.gov//Archives/edgar/data/718877/000110465907046226/0001104659-07-046226-index.htm
https://www.sec.gov//Archives/edgar/data/1085621/000119312516761333/0001193125-16-761333-index.htm
https://www.sec.gov//Archives/edgar/data/1085621/000095010902003765/0000950109-02-003765-index.htm
https://www.sec.gov//Archives/edgar/data/1085621/000095010901504003/0000950109-01-504003-index.htm
https://www.sec.gov//Archives/edgar/data/1062478/000095013408019483/0000950134-08-019483-index.htm
https://www.sec.gov//Archives/edgar/data/1062478/000101287002003202/0001012870-02-003202-index.htm
https://www.sec.gov//Archives/edgar/data/1062478/000101287001500532/0001012870-01-500532-index.htm
https://www.sec.gov//Archives/edgar/data/1062478/000095013408019483/0000950134-08-019483-index.htm
https://www.sec.gov//Archives/edgar/data/1062478/000101287002003202/0001012870-02-003202-index.htm
https://www.