# Run Instructions

1. Make sure conda is installed
2. In terminal, run 'conda create -n myproject'
3. Activate your environment by running 'conda activate myproject'
4. run 'conda install pip'
5. to get dependencies, run 'pip install -r requirements.txt'

After these steps, you should be good to run these! You can select your environment in the upper right hand corner of the jupyter notebook. 

In [1]:
%cd ../../

/Users/efang/Desktop/coding/research


In [4]:
from src.functions.matching.matching_agent import MatchingAgent
from src.abstract_classes.attribute import DocumentAttr
from src.functions.decompose_transcript import extract_presentation_section, extract_qa_section, clean_spoken_content


  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to /Users/efang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
# Function to load an xml file into a DocumentAttr object

def load_sample_document(file_path: str) -> DocumentAttr:
    """
    Load a sample XML earnings call transcript and extract its text content
    using the decompose_transcript functions.
    Returns a DocumentAttr object with the text.
    """
    try:
        # Extract presentation and Q&A sections
        presentation_text = extract_presentation_section(file_path)
        qa_text = extract_qa_section(file_path)
        
        # Combine sections
        full_text = presentation_text + "\n\n" + qa_text
        
        # Clean spoken content to remove speaker tags and separators
        cleaned_text = clean_spoken_content(full_text)
        
        return DocumentAttr(document=cleaned_text)
    except Exception as e:
        print(f"Error loading document: {e}")
        return DocumentAttr(document="")

In [6]:
# Initialize Matching Agent

agent = MatchingAgent(
    keywords_file="src/functions/matching/test_keywords.csv",
    document=load_sample_document("data/earnings_calls/ex1.xml")
)

Using device: cpu


In [7]:
# perform cosine similarity

matches = agent.cos_similarity(match_type="word")

In [8]:
print(matches)

Cosine Similarity Threshold: 0.7

-------------------- Summary --------------------
Total keywords searched: 2
Total keywords with matches: 2
Total direct matches: 19
Total cosine matches: 13
Total unique matches: 6
Unique matches: ['impacts', 'uncertainty', 'impacting', 'uncertain', 'impact', 'impacted']


Keyword: 'uncertainty' (6 total matches)
  Direct Matches (4):
    - Text: 'uncertainty', Context: 'There's a lot more uncertainty.', Position: 5948
    - Text: 'uncertainty', Context: 'And just given the uncertainty, could you talk about the month-to-month trends that you saw in rental?', Position: 6696
    - Text: 'uncertainty', Context: 'These statements are based on Management's current expectations and are subject to uncertainty and changes in circumstances.', Position: 116
    - Text: 'uncertainty', Context: 'Although commercial rental delivered solid growth, due to a high level of uncertainty regarding the macro environment and somewhat less robust demand conditions with rent

In [None]:
agent2 = MatchingAgent(
    keywords_file="data/paper_word_sets/political_words.csv",
    document=load_sample_document("data/earnings_calls/ex1.xml")
)

In [None]:
matches = agent2.cos_similarity(match_type="hybrid")

In [None]:
print(matches)