In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from Py_Files import bankruptcy_google_search as google_search
from Py_Files import credentials
from openai import OpenAI
import ast
import tqdm
import time
import duckduckgo_search




# 0. Example Internet Search Using Google and DuckDuckGo


In [None]:
company_name = 'Tupperware'
google_query = f'{company_name} declared bankruptcy chapter 11'
results = google_search.google_bankruptcy_search(google_query, num_results=12)
collection = []
for r in results:
    collection.append(' -- '.join([r.title, r.description, r.url]))
collection

In [None]:
results = duckduckgo_search.DDGS().text("Tupperware declared chapter 11 bankruptcy", max_results=10)
for result in results:
    print(result["title"])
    print(result["href"])
    print(result["body"])
    print()


In [None]:
from duckduckgo_search import DDGS

results = DDGS().text("WHen did napoleon die?", max_results=5, backend='html')
print(results)

In [None]:
def bankruptcy_search_pipeline(company_name, max_results=5, openai_client=None):
    
    # query using duckduckgo
    results = duckduckgo_search.DDGS().text(f"Did {company_name} ever declare bankruptcy", max_results=5)

    # iterate over each seach engine result
    for r in results:

        chat_gpt_query = f'''I want you to help me analyze the following search engine result. It is a paragraph of text.
        I want to know if the text indicates that the company "{company_name}" went into bankruptcy. Make sure that the company went into bankruptcy and not a different company.
        If it did go into bankruptcy, I also want to know the date of the bankruptcy. Return your answer as text with only the following three elements: 
        The first element is either "yes" or "no". The second element is the date of the bankruptcy in "yyyy-mm-dd" format. If the company
        did not go into bankruptcy, the second element should be "N/A". If you cannot determine the date of the bankruptcy the second element
        should be "N/A". The third element is a score between 1 and 5 of how confident you are in your answer. 5 is the most confident and 1 is
          the least confident. Return your answer as text with "|" pipe delimited to separate the three elements. Here is the search engine result:
          {r["title"]} {r["body"]}'''
        
        # analyze search engine results using chatgpt
        completion = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "system", "content": "You are a helpful assistant that analyzes search engine results."}, 
                      {"role": "user", 
                       "content": chat_gpt_query}]
        )

        llm_result = completion.choices[0].message.content

        # if we found confirmation of bankruptcy, add the result to the collection
        is_bankruptcy = llm_result.split("|")[0]
        if is_bankruptcy == 'yes':
            bankruptcy_date = llm_result.split("|")[1]
            bankruptcy_confidence = llm_result.split("|")[2]
            return [company_name, is_bankruptcy, bankruptcy_confidence, bankruptcy_date, r["title"], r["body"], r["href"]]


    return [company_name, "no", "N/A", "N/A","N/A","N/A",]

def batch_bankruptcy_search_pipeline(company_list:list, max_results:int=9, openai_client=None):

    collection = []
    error_list = []
    for this_company in tqdm.tqdm(company_list):
        
        try:
            result = bankruptcy_search_pipeline(company_name=this_company,
                                            max_results=max_results,
                                            openai_client=openai_client)
            collection.append(result)

            time.sleep(1)
            
        except:
            error_list.append(this_company)


    return pd.DataFrame(collection, columns=['company_name', 'is_bankruptcy', 'confidence', 'bankruptcy_date', 'title', 'body', 'url']), error_list


# openai_client = OpenAI(api_key=credentials.OPENAI_API_KEY)
# bankruptcy_search_pipeline(company_name='Worlcom', max_results=9, openai_client=openai_client)
high_pd = pd.read_csv('/Users/joeybortfeld/Downloads/high_pd_no_default_5.csv', encoding='latin_1')
review_list = high_pd['name1'].tolist()
fsym_list = high_pd['fsym_id'].tolist()

results, error_list = batch_bankruptcy_search_pipeline(company_list=review_list,
                                           max_results=9,
                                           openai_client=openai_client)

results['fsym_id'] = fsym_list[:results.shape[0]]
results



In [None]:
results.to_csv('/Users/joeybortfeld/Downloads/bankruptcy_search_results_20250217.csv', index=False)