In [2]:
import os
import sys 
import logging 
import numpy as np 
# Load environment variables
from dotenv import load_dotenv
load_dotenv()

from elastic_helpers import ESBulkIndexer, ESQueryMaker
from embedding_model import EmbeddingModel
from llamaindex_processor import LlamaIndexProcessor
from nltk_processor import NLTKProcessor
from chunker import Chunker
from elastic_config import BASIC_CONFIG
from llm import LLMProcessor
from elastic_helpers import ESBulkIndexer
from elastic_config import BASIC_CONFIG
from document_enricher import DocumentEnricher
from entity_extractor import EntityExtractor

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
[nltk_data] Downloading package punkt to /Users/han/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/han/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /Users/han/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/han/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Improvements
1. Vector Enrichment by Metadata Recombination of keyphrases and generated questions via weightings
2. Sentence wise, token-level chunking with window overlaps
3. Match OR string
4. HyDE
5. Reverse Packing

<!-- ![](./assets/rag_pipe.png) -->

# Step One: Initialize services, Initial Setup

In [3]:


# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

# Initialize Embedding Model
HUGGINGFACE_EMBEDDING_MODEL = os.environ.get('HUGGINGFACE_EMBEDDING_MODEL')
embedder=EmbeddingModel(model_name=HUGGINGFACE_EMBEDDING_MODEL)

# Initialize Document Processors
llamaindex_processor=LlamaIndexProcessor()
nltkprocessor=NLTKProcessor()
chunker=Chunker(embedder.tokenizer)
documentenricher=DocumentEnricher()
entity_extractor=EntityExtractor()

# Initialize Elasticsearch
ELASTIC_CLOUD_ID = os.environ.get('ELASTIC_CLOUD_ID')
ELASTIC_USERNAME = os.environ.get('ELASTIC_USERNAME')
ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
ELASTIC_CLOUD_AUTH = (ELASTIC_USERNAME, ELASTIC_PASSWORD)
es_bulk_indexer = ESBulkIndexer(cloud_id=ELASTIC_CLOUD_ID, credentials=ELASTIC_CLOUD_AUTH)
es_query_maker = ESQueryMaker(cloud_id=ELASTIC_CLOUD_ID, credentials=ELASTIC_CLOUD_AUTH)

# Define Index Name
index_name=os.environ.get('ELASTIC_INDEX_NAME')

# Initialize LLMs
gpt4o = LLMProcessor(model='gpt-4o')

Using MPS


  _torch_pytree._register_pytree_node(
INFO:elastic_helpers:Connection created for cloud_id: Training:YXNpYS1zb3V0aGVhc3QxLmdjcC5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOTVmMWQ4MTZiNWEzNGM3NTljNjEwMWIyODU4ZGRmZGEkZmE0NzUyN2UwN2NkNGUwYThiOGFkMGViZDc2ZDE4NGY=
INFO:elastic_helpers:Connection created for cloud_id: Training:YXNpYS1zb3V0aGVhc3QxLmdjcC5lbGFzdGljLWNsb3VkLmNvbTo0NDMkOTVmMWQ4MTZiNWEzNGM3NTljNjEwMWIyODU4ZGRmZGEkZmE0NzUyN2UwN2NkNGUwYThiOGFkMGViZDc2ZDE4NGY=
INFO:llm:LLMProcessor initialized with model: gpt-4o


# Step Two: Process some documents

In [3]:

documents=llamaindex_processor.load_documents('./documents/')
documents=[dict(doc_obj) for doc_obj in documents]

In [4]:
chunked_documents=chunker.sentence_wise_tokenized_chunk_documents(documents, chunk_size=512)

Token indices sequence length is longer than the specified maximum sequence length for this model (647 > 512). Running this sequence through the model will result in indexing errors


In [5]:
chunked_documents[11]['original_text']


'based primarily on a combination of a paid elastic - managed hosted service offering and paid and free proprietary self - managed software. [SEP] [CLS] our paid offerings for our platform are sold via subscription through resource - based pricing, and all customers and users have access to all solutions. [SEP] [CLS] in elastic cloud, our family of cloud - based offerings under which we offer our software as a hosted, managed service, we offer various subscription tiers tied to different features. [SEP] [CLS] for users who download our software, we make some of the features of our software available for free, allowing us to engage with a broad community of developers and practitioners and introduce them to the value of the elastic stack. [SEP] [CLS] we believe in the importance of an open software development model, and we develop the majority of our software in public repositories as open code under a proprietary license. [SEP] [CLS] unlike some companies, we do not build an enterpris

In [12]:
# embedder.get_embeddings_from_tokens([chunked_documents[0]['chunk']])

# Step Three: Enrich your documents

In [3]:
import pickle

with open('./backups/embedding_generated_questions_0.pkl', 'rb') as f:
    embedded_docs=pickle.load(f)



In [6]:

for i in embedded_docs:
   del i['embedding'] 

In [4]:
processors=[
    # (nltkprocessor.textrank_phrases, "keyphrases"),
    # (gpt4o.generate_questions, "potential_questions"),
    (entity_extractor.extract_entities, "entities")
    ]
documentenricher.enrich_document(embedded_docs, text_col='original_text', processors=processors)

Enriching documents using processors: [(<bound method EntityExtractor.extract_entities of <entity_extractor.EntityExtractor object at 0x334331730>>, 'entities')]: 100%|██████████| 224/224 [00:05<00:00, 42.32it/s]


In [7]:
import pickle

with open('./backups/embedding_generated_questions_0.pkl', 'wb') as f:
    pickle.dump(embedded_docs, f)

In [21]:
embedded_docs[25]['potential_questions']

'1. What are the primary functions that Elastic Agent provides in terms of cybersecurity?\n2. Describe how Logstash contributes to data management within an IT environment.\n3. List and explain any key features of Logstash mentioned in the document.\n4. How does Elastic Agent enhance environment-wide visibility in threat detection?\n5. What capabilities does Logstash offer for handling data beyond simple collection?\n6. In what ways does the document suggest that Elastic Agent stops malware and ransomware?\n7. Can you identify any relationships between the functionalities of Elastic Agent and Logstash in an integrated environment?\n8. What implications might the advanced threat detection capabilities of Elastic Agent have for organizational security policies?\n9. Compare and contrast the roles of Elastic Agent and Logstash based on their described functions.\n10. How might the centralized collection ability of Logstash support the threat detection capabilities of Elastic Agent?'

# Step Four: Weighted Embeddings

In [51]:
cols_to_embed=['keyphrases', 'potential_questions', 'entities']

embedding_cols=[]
for col in cols_to_embed:
    embedding_col=embedder.embed_documents_text_wise(embedded_docs, text_field=col)
    embedding_cols.append(embedding_col)
embedding_col=embedder.embed_documents_token_wise(embedded_docs, token_field="chunk")
embedding_cols.append(embedding_col)

Embedding documents: 100%|██████████| 7/7 [00:02<00:00,  3.32it/s]
Embedding documents: 100%|██████████| 7/7 [00:03<00:00,  1.89it/s]
Embedding documents: 100%|██████████| 7/7 [00:02<00:00,  3.19it/s]
Embedding documents: 100%|██████████| 7/7 [00:04<00:00,  1.52it/s]


In [52]:
embedding_cols

['keyphrases_embedding',
 'potential_questions_embedding',
 'entities_embedding',
 'chunk_embedding']

In [50]:
embedding_cols=[
                'keyphrases_embedding',
                'potential_questions_embedding',
                'entities_embedding',
                'chunk_embedding']
combination_weights=[
                    0.15,
                    0.25,
                    0.1,
                    0.45
                ]

In [53]:

def combine_embeddings(objects, embedding_cols, combination_weights, primary_embedding='primary_embedding'):
    # Ensure the number of weights matches the number of embedding columns
    assert len(embedding_cols) == len(combination_weights), "Number of embedding columns must match number of weights"
    
    # Normalize weights to sum to 1
    weights = np.array(combination_weights) / np.sum(combination_weights)
    
    for obj in tqdm(objects, desc="Combining embeddings"):
        # Initialize the combined embedding
        combined = np.zeros_like(obj[embedding_cols[0]])
        
        # Compute the weighted sum
        for col, weight in zip(embedding_cols, weights):
            combined += weight * np.array(obj[col])
        
        # Add the new combined embedding to the object
        obj.update({primary_embedding:combined.tolist()})
        
        # Remove the original embedding columns
        for col in embedding_cols:
            obj.pop(col, None)

In [54]:
combine_embeddings(embedded_docs, embedding_cols, combination_weights)

Combining embeddings: 100%|██████████| 224/224 [00:00<00:00, 8975.03it/s]


In [49]:
embedded_docs[1]

{'id_': '24173c7a-ef22-4a07-abe2-ec7302352437',
 'chunk': [1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,
  1035,

# Step Five: Index to Elastic Search

In [55]:

index_exists = es_bulk_indexer.check_index_existence(index_name=index_name)
if not index_exists:
    logger.info(f"Creating new index: {index_name}")
    es_bulk_indexer.create_es_index(es_configuration=BASIC_CONFIG, index_name=index_name)

success_count = es_bulk_indexer.bulk_upload_documents(
    index_name=index_name, 
    documents=embedded_docs, 
    id_col='id_',
    batch_size=32
)

INFO:elastic_transport.transport:HEAD https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1 [status:404 duration:0.155s]
INFO:__main__:Creating new index: test_index_1
INFO:elastic_transport.transport:HEAD https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1 [status:404 duration:0.009s]
INFO:elastic_helpers:Index test_index_1 does not exist.
INFO:elastic_transport.transport:PUT https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1 [status:200 duration:0.144s]
INFO:elastic_helpers:New index test_index_1 created!
INFO:elastic_transport.transport:PUT https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/_bulk [status:200 duration:0.398s]
INFO:elastic_helpers:Batch 1: Successfully indexed 32 documents to test_index_1
INFO:elastic_transport.transport:PUT https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/_

# Step Six: Make a Query


In [15]:
def get_context(index_name, match_query, text_query, fields, num_candidates=100, num_results=20, text_field="original_text", embedding_field="primary_embedding"):
    embedding=embedder.get_embeddings_from_text(text_query)

    results, search_body = es_query_maker.hybrid_vector_search(
        index_name=index_name,
        query_text=match_query,
        query_vector=embedding[0][0],
        text_field=text_field,
        vector_field=embedding_field,
        num_candidates=num_candidates,
        num_results=num_results
    )
    context_docs=['\n\n'.join([field+":\n\n"+j['_source'][field] for field in fields]) for j in results['hits']['hits']]
    context_docs.reverse()
    return context_docs, search_body


In [16]:
def RAG(query_text):
    match_query= gpt4o.generate_query(query_text)
    fields=['original_text']

    hyde_document=gpt4o.generate_HyDE(query_text)

    context, search_body=get_context(index_name, match_query, hyde_document, fields)

    answer= gpt4o.basic_qa(query=query_text, context=context)
    return answer, match_query, hyde_document, context, search_body

# Test 1

In [22]:
query_text="who audits elastic"
answer, match_query, hyde_document, context, search_body = RAG(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 6.000000 seconds
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation compl

According to the provided context, Elastic's auditors are PricewaterhouseCoopers (PwC). The context states:

"[...] [CLS] report of independent registered public accounting firm to the board of directors and shareholders of elastic n. v. [SEP] [CLS] opinions on the financial statements and internal control over financial reporting we have audited the accompanying consolidated balance sheets of elastic n. v. [SEP] [CLS] and its subsidiaries ( the “ company ” ) as of april 30, 2023 and 2022, and the related consolidated statements of operations, of comprehensive loss, of shareholders' equity and of cash flows for each of the three years in the period ended april 30, 2023, including the related notes ( collectively referred to as the “ consolidated financial statements ” ). [SEP] [CLS] we also have audited the company's internal control over financial reporting as of april 30, 2023, based on criteria established in internal control - integrated framework ( 2013 ) issued by the committee o

# Test 2

In [35]:
query_text="total revenue 2023"
answer, match_query, hyde_document, context, search_body = RAG(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1/_search [status:200 duration:0.037s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text 

According to the provided context, the total revenue for the year ended April 30, 2023, was $1,068,989,000 ($1.068 billion).

This information can be found in multiple excerpts from the context:
- "The following table summarizes the company ’ s total revenue by geographic area based on the location of customers ( in thousands ) : year ended april 30, 2023… total revenue $ 1, 068, 989..."
- "Our total revenue was $ 1. 1 billion for the year ended April 30, 2023."
- "Year ended April 30, 2023… total revenue $ 1,068,989..."

Thus, the total revenue for 2023 is accurately documented as $1,068,989,000.


# Test 3

In [32]:
query_text="what product does growth primaryily depend on? How much"
answer, match_query, hyde_document, context, search_body = RAG(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1/_search [status:200 duration:0.111s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text 

Based on the provided context, the primary product growth depends significantly on is "Elastic Cloud" offerings. The context indicates:

1. "Our ability to grow our business will depend significantly on the expansion and adoption of our elastic cloud offerings."
2. "We believe our future success will depend significantly on the growth in the adoption of elastic cloud, our family of cloud-based offerings."
3. "For the years ended April 30, 2023, 2022, and 2021, elastic cloud contributed 40%, 35%, and 27% of our total revenue, respectively."

This shows that Elastic Cloud's contribution to total revenue has been increasing over the years, which underscores its importance to the company's growth strategy.

To summarize:

**Primary Product:** Elastic Cloud offerings.
**Contribution to Revenue:**
- 2023: 40%
- 2022: 35%
- 2021: 27%


# Test 4

In [34]:
query_text="What must elastic do if a cnage of control occurs"
answer, match_query, hyde_document, context, search_body = RAG(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1/_search [status:200 duration:0.038s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text 

According to the provided context, if a change of control occurs, Elastic N.V. must take specific actions related to its senior notes:

1. **Repurchase of Senior Notes**: Holders of the senior notes can require Elastic to repurchase the senior notes at a repurchase price equal to 101% of the principal amount of the senior notes, plus accrued and unpaid interest up to, but excluding, the applicable repurchase date. Here is the relevant excerpt from the context:
   > "Holders of the senior notes can require us to repurchase the senior notes upon a change of control (as defined in the indenture governing the senior notes) at a repurchase price equal to 101% of the principal amount of the senior notes, plus accrued and unpaid interest to, but excluding, the applicable repurchase date."

2. **Financial Limitation**: Elastic's ability to repurchase the senior notes may be limited by law or the terms of other agreements related to its indebtedness. Additionally, the company may not have suffi

# Test 5

In [42]:
query_text="equity awarded to acquisition shareholders"
answer, match_query, hyde_document, context, search_body = RAG(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1/_search [status:200 duration:0.043s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text 

Based on the given context, specifically regarding equity awards in acquisitions, here's the relevant information:

According to the context:
- "When the company issues stock-based or cash awards to an acquired company’s shareholders, the company evaluates whether the awards are consideration or compensation for post-acquisition services."
- The evaluation considers "whether the vesting of the awards is contingent on the continued employment of the acquired company’s shareholders beyond the acquisition date."
- If continued employment is required for vesting, the awards are regarded as "compensation for post-acquisition services and recognized as expense over the requisite service period."
- Acquisition-related transaction costs are "accounted for as an operating expense in the period in which the costs are incurred."
- The issuance of equity awards as consideration or compensation post-acquisition is a complex process involving evaluating vesting conditions and appropriately categoriz

In [39]:
context

['original_text:\n\n471 issuance of ordinary shares upon release of restricted stock units 2, 064, 997 22 — ( 22 ) — — — stock - based compensation — — — 204, 039 — — 204, 039 net loss — — — — — ( 236, 161 ) ( 236, 161 ) other comprehensive loss — — — — ( 1, 885 ) — ( 1, 885 ) balances as of april 30, 2023 97, 366, 947 $ 1, 024 $ ( 369 ) $ 1, 471, 584 $ ( 20, 015 ) $ ( 1, 053, 327 ) $ 398, 897 the accompanying notes are an integral part of these consolidated financial statements. [SEP] [CLS] 72 [SEP]',
 'original_text:\n\n[CLS] year ended april 30, change 2023 2022 $ % ( in thousands ) provision for income taxes $ 19, 284 $ 6, 059 $ 13, 225 218 % the provision for income taxes increased $ 13. 2 million, or 218 %, for the year ended april 30, 2023 compared to the prior year. [SEP] [CLS] our effective tax rate was ( 8. 9 ) % and ( 3. 1 ) % of our net loss before taxes for the years ended april 30, 2023 and 2022, respectively. [SEP] [CLS] our effective tax rate is affected by recurring it