In [1]:
import time

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


from src.gemini_llm import GeminiLLM
from src.utils import wiki_intro
from src.utils import train_test_split

Loading environment variables...
Enviroment vars loaded: True


In [2]:
model = GeminiLLM(model_name="gemini-2.0-flash-lite", temperature=0)

In [3]:
model.categorize(company_name="Altria Group")

'Food drink & tobacco'

In [5]:
model.categorize_get_description(
    company_name="Altria Group",
    get_description=wiki_intro
)

'Food drink & tobacco'

In [6]:
model.categorize(company_name="wewaealkedad")



'Other'

Because I have limited API requests for the free plan with Gemini I only got results on the test set. This test set is also used for the other models.

In [6]:
df = pd.read_csv("../resources/dataset.csv")

In [7]:
train_df, test_df = train_test_split(df)

In [8]:
test_df

Unnamed: 0,COMPANY,CATEGORY
568,Pitney Bowes,Business services & supplies
1603,Nanto Bank,Banking
1010,Agfa-Gevaert,Household & personal products
1466,US Commercial,Diversified financials
968,Computer Associates,Software & services
...,...,...
1433,Tiffany,Retailing
697,Murata Manufacturing,Business services & supplies
139,Cendant,Hotels restaurants & leisure
1212,Canadian Tire,Retailing


A workaround for the rate limits of the Free tier Gemini API.

In [9]:
def safe_llm_call(company_name, model, get_description, max_retries=5):
    for attempt in range(max_retries):
        try:
            if get_description:
             return  model.categorize_get_description(
                        company_name=company_name,
                        get_description=wiki_intro
                )
            
            return model.categorize(company_name=company_name)
            
        except Exception as e:
            if "ResourceExhausted" in str(e) or "quota" in str(e).lower():
                wait_time = 60
                print(f"Rate limit hit. Retrying in {wait_time:.1f}s (attempt {attempt+1}/{max_retries})...")
                time.sleep(wait_time)
            else:
                print(f"Error with {company_name}: {e}")
                return "n/a"
    print(f"Max retries exceeded for {company_name}. Returning 'n/a'.")
    return "n/a"

In [10]:
safe_llm_call("Altria Group", model, get_description=True)

'Food drink & tobacco'

In [12]:
result_no_description = []
for name in tqdm(test_df["COMPANY"]):
    result_no_description.append(
        safe_llm_call(name, model, get_description=False)
    )

  1%|          | 4/500 [00:01<03:27,  2.39it/s]



  6%|▌         | 31/500 [00:14<03:10,  2.46it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 41
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 10%|█         | 52/500 [01:25<03:15,  2.29it/s]  



 12%|█▏        | 59/500 [01:28<03:03,  2.41it/s]



 12%|█▏        | 62/500 [01:29<03:03,  2.38it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 25
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 14%|█▍        | 69/500 [02:35<19:01,  2.65s/it]  



 22%|██▏       | 108/500 [02:52<02:53,  2.25it/s]



 24%|██▍       | 120/500 [02:58<03:01,  2.10it/s]



 25%|██▍       | 123/500 [02:59<02:43,  2.31it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 26%|██▌       | 129/500 [04:03<21:55,  3.55s/it]  



 29%|██▉       | 145/500 [04:11<02:43,  2.17it/s]



 30%|██▉       | 149/500 [04:12<02:37,  2.24it/s]



 31%|███       | 154/500 [04:15<02:23,  2.40it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 40
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 34%|███▍      | 171/500 [05:24<02:44,  2.00it/s]  



 37%|███▋      | 185/500 [05:30<02:15,  2.33it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 25
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 48%|████▊     | 240/500 [06:59<02:00,  2.16it/s]  Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 48%|████▊     | 242/500 [08:02<58:01, 13.49s/it]  



 54%|█████▍    | 271/500 [08:14<01:36,  2.37it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 40
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 54%|█████▍    | 272/500 [09:17<1:12:20, 19.04s/it]



 56%|█████▌    | 280/500 [09:21<05:36,  1.53s/it]  



 57%|█████▋    | 283/500 [09:22<02:59,  1.21it/s]



 60%|██████    | 302/500 [09:30<01:23,  2.37it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 25
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 63%|██████▎   | 315/500 [10:38<02:21,  1.31it/s]  



 66%|██████▌   | 328/500 [10:44<01:11,  2.41it/s]



 68%|██████▊   | 338/500 [10:48<01:06,  2.42it/s]



 70%|███████   | 350/500 [10:53<01:05,  2.29it/s]



 72%|███████▏  | 361/500 [10:58<01:01,  2.27it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 57
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 73%|███████▎  | 367/500 [12:02<07:50,  3.54s/it]



 76%|███████▌  | 379/500 [12:08<01:03,  1.89it/s]



 78%|███████▊  | 392/500 [12:14<00:51,  2.10it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 41
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 79%|███████▉  | 396/500 [13:18<11:54,  6.87s/it]



 83%|████████▎ | 415/500 [13:26<00:39,  2.15it/s]



 85%|████████▍ | 423/500 [13:30<00:32,  2.34it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 25
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 85%|████████▌ | 427/500 [14:34<08:22,  6.89s/it]



 96%|█████████▌| 480/500 [14:59<00:09,  2.19it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


100%|█████████▉| 498/500 [16:10<00:01,  1.90it/s]



100%|██████████| 500/500 [16:10<00:00,  1.94s/it]


In [13]:
len(result_no_description)

500

In [15]:
test_df['PRED_CATEGORY_NO_DESC'] = result_no_description

In [17]:
np.mean(test_df['PRED_CATEGORY_NO_DESC'] == test_df['CATEGORY'])

0.776

In [None]:
df.to_csv("../resources/dataset_with_predictions.csv", index=False)
# in case of crash

In [19]:
result_with_description = []
for name in tqdm(test_df["COMPANY"]):
    result_with_description.append(
        safe_llm_call(name, model, get_description=True)
    )

  0%|          | 2/500 [00:01<06:19,  1.31it/s]

[Fallback] Searching for: Agfa-Gevaert


  1%|          | 3/500 [00:02<08:27,  1.02s/it]

[Fallback] Searching for: US Commercial


  1%|          | 5/500 [00:05<08:19,  1.01s/it]

[Fallback] Searching for: Bouygues Group


  1%|          | 6/500 [00:06<09:14,  1.12s/it]

[Fallback] Searching for: Corporation Mapfre


  2%|▏         | 12/500 [00:11<06:42,  1.21it/s]

[Fallback] Searching for: Yapi ve Kredi


  5%|▌         | 27/500 [00:24<06:14,  1.26it/s]

[Fallback] Searching for: Charter Commun


  6%|▌         | 28/500 [00:25<07:53,  1.00s/it]



  6%|▌         | 29/500 [00:26<07:04,  1.11it/s]

[Fallback] Searching for: Sanmina-SCI


  6%|▌         | 30/500 [00:27<08:18,  1.06s/it]

[Fallback] Searching for: Kawasho


  6%|▌         | 31/500 [00:29<09:38,  1.23s/it]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 30
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


  7%|▋         | 33/500 [01:33<1:49:24, 14.06s/it]

[Fallback] Searching for: Sacyr-Vallehermoso


  7%|▋         | 37/500 [01:36<30:41,  3.98s/it]  

[Fallback] Searching for: Higashi-Nippon Bank


  8%|▊         | 38/500 [01:37<24:22,  3.16s/it]

[Fallback] Searching for: Bank of Ryukyus


  9%|▊         | 43/500 [01:41<08:41,  1.14s/it]

[Fallback] Searching for: SEM-Samsung Electro


  9%|▉         | 45/500 [01:43<07:48,  1.03s/it]

[Fallback] Searching for: Singapore Technologies


 10%|▉         | 48/500 [01:46<06:40,  1.13it/s]

[Fallback] Searching for: Amadeus Global Travel


 10%|█         | 51/500 [01:49<07:03,  1.06it/s]

[Fallback] Searching for: Archstone-Smith


 10%|█         | 52/500 [01:51<07:59,  1.07s/it]

[Fallback] Searching for: China Petroleum & Chemical


 11%|█         | 56/500 [01:54<06:07,  1.21it/s]

[Fallback] Searching for: Hokuriku Electric Power


 12%|█▏        | 58/500 [01:56<06:34,  1.12it/s]

[Fallback] Searching for: Apollo-Education Group


 12%|█▏        | 59/500 [01:57<07:33,  1.03s/it]

[Fallback] Searching for: Phoenix Cos


 12%|█▏        | 61/500 [01:59<06:55,  1.06it/s]

[Fallback] Searching for: Marriott Intl


 12%|█▏        | 62/500 [02:00<07:44,  1.06s/it]

[Fallback] Searching for: Air Prods & Chems


 13%|█▎        | 64/500 [02:02<07:26,  1.02s/it]

[Fallback] Searching for: Banque Nat de Belgique


 13%|█▎        | 67/500 [02:05<06:27,  1.12it/s]

[Fallback] Searching for: Kiyo Bank


 14%|█▍        | 71/500 [02:08<05:33,  1.29it/s]



 15%|█▍        | 73/500 [02:10<06:19,  1.12it/s]

[Fallback] Searching for: Level 3 Commun


 15%|█▍        | 74/500 [02:12<07:06,  1.00s/it]



 17%|█▋        | 86/500 [02:20<05:03,  1.36it/s]



 18%|█▊        | 88/500 [02:22<05:32,  1.24it/s]

[Fallback] Searching for: ThyssenKrupp Group


 18%|█▊        | 91/500 [02:25<05:33,  1.23it/s]

[Fallback] Searching for: Pacific Century Regional


 18%|█▊        | 92/500 [02:26<06:25,  1.06it/s]

[Fallback] Searching for: Henderson Land Dev


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 32
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...
[Fallback] Searching for: Henderson Land Dev


 19%|█▉        | 94/500 [03:31<1:36:08, 14.21s/it]

[Fallback] Searching for: Valiant Holding


 19%|█▉        | 96/500 [03:33<50:09,  7.45s/it]  

[Fallback] Searching for: Mylan Labs


 20%|█▉        | 99/500 [03:36<21:05,  3.16s/it]

[Fallback] Searching for: Oil & Natural Gas


 21%|██        | 104/500 [03:40<07:51,  1.19s/it]

[Fallback] Searching for: Temple-Inland


 21%|██▏       | 107/500 [03:43<06:11,  1.06it/s]

[Fallback] Searching for: Equity Office Prop


 22%|██▏       | 111/500 [03:46<05:11,  1.25it/s]



 23%|██▎       | 114/500 [03:48<04:44,  1.36it/s]



 23%|██▎       | 116/500 [03:50<04:23,  1.46it/s]



 24%|██▍       | 119/500 [03:52<04:13,  1.50it/s]

[Fallback] Searching for: Westfield America Trust


 24%|██▍       | 120/500 [03:53<05:42,  1.11it/s]

[Fallback] Searching for: Toronto-Dominion Bank


 25%|██▍       | 123/500 [03:56<05:19,  1.18it/s]

[Fallback] Searching for: Louisiana-Pacific


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 2
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...
[Fallback] Searching for: Louisiana-Pacific


 26%|██▌       | 128/500 [05:03<32:57,  5.32s/it]  

[Fallback] Searching for: Apartment Investment


 26%|██▌       | 129/500 [05:05<25:32,  4.13s/it]



 26%|██▌       | 131/500 [05:06<14:38,  2.38s/it]

[Fallback] Searching for: Great Eastern Holdings


 27%|██▋       | 133/500 [05:08<10:19,  1.69s/it]

[Fallback] Searching for: Brambles Group


 27%|██▋       | 135/500 [05:10<08:25,  1.38s/it]

[Fallback] Searching for: Aegon Insurance Group


 27%|██▋       | 136/500 [05:12<08:51,  1.46s/it]

[Fallback] Searching for: Miyazaki Bank


 28%|██▊       | 139/500 [05:15<06:45,  1.12s/it]

[Fallback] Searching for: Korea Gas


 29%|██▉       | 145/500 [05:20<04:52,  1.22it/s]



 29%|██▉       | 147/500 [05:22<04:49,  1.22it/s]

[Fallback] Searching for: Allmerica Financial


 30%|██▉       | 148/500 [05:23<06:04,  1.04s/it]

[Fallback] Searching for: Stockland Australia


 31%|███       | 154/500 [05:28<04:32,  1.27it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 30
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 31%|███▏      | 157/500 [06:33<57:15, 10.02s/it]  

[Fallback] Searching for: Akzo Nobel Group


 32%|███▏      | 159/500 [06:35<30:34,  5.38s/it]

[Fallback] Searching for: Swiss Life Holding


 33%|███▎      | 164/500 [06:40<08:50,  1.58s/it]

[Fallback] Searching for: Marshall & Ilsley


 34%|███▍      | 169/500 [06:44<04:51,  1.14it/s]

[Fallback] Searching for: Bank of Nagoya


 34%|███▍      | 171/500 [06:46<05:35,  1.02s/it]



 35%|███▍      | 173/500 [06:48<04:55,  1.11it/s]

[Fallback] Searching for: San-In Godo Bank


 35%|███▌      | 175/500 [06:50<04:52,  1.11it/s]

[Fallback] Searching for: Impala Platinum Holdings


 36%|███▋      | 182/500 [06:55<03:44,  1.42it/s]

[Fallback] Searching for: CIC Group


 37%|███▋      | 185/500 [06:58<04:00,  1.31it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
].
 38%|███▊      | 188/500 [07:02<05:18,  1.02s/it]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds a

[Fallback] Searching for: Sigma-Aldrich


 40%|████      | 200/500 [07:13<04:00,  1.24it/s]

[Fallback] Searching for: Wal-Mart Stores


 40%|████      | 201/500 [07:14<04:57,  1.01it/s]

[Fallback] Searching for: Williams Cos


 41%|████      | 204/500 [07:17<04:09,  1.19it/s]

[Fallback] Searching for: Burlington Santa Fe


 41%|████      | 206/500 [07:19<04:24,  1.11it/s]

[Fallback] Searching for: HDFC-Housing Devel
No Wikipedia content found for: HDFC-Housing Devel


 42%|████▏     | 208/500 [07:21<04:08,  1.17it/s]

[Fallback] Searching for: Surgutneftegas Oil


 42%|████▏     | 209/500 [07:22<04:49,  1.01it/s]

[Fallback] Searching for: Shin-Etsu Chemical


 42%|████▏     | 211/500 [07:24<04:44,  1.02it/s]

[Fallback] Searching for: Malaysia Intl Shipping


 43%|████▎     | 215/500 [07:27<03:53,  1.22it/s]

[Fallback] Searching for: Wm Wrigley Jr


 43%|████▎     | 216/500 [07:29<04:33,  1.04it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 30
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 44%|████▎     | 218/500 [08:32<1:05:24, 13.92s/it]

[Fallback] Searching for: Sanofi-Synthelabo


 44%|████▍     | 221/500 [08:35<24:33,  5.28s/it]  



 44%|████▍     | 222/500 [08:36<18:07,  3.91s/it]

[Fallback] Searching for: First Citizens Bcshs


 45%|████▌     | 225/500 [08:38<08:36,  1.88s/it]

[Fallback] Searching for: Kimberly-Clark de Mexico


 47%|████▋     | 233/500 [08:45<03:34,  1.24it/s]



 48%|████▊     | 242/500 [08:51<03:15,  1.32it/s]



 49%|████▊     | 243/500 [08:52<03:10,  1.35it/s]

[Fallback] Searching for: China Development Finl


 49%|████▉     | 244/500 [08:53<03:55,  1.09it/s]

[Fallback] Searching for: Pinault-Printemps-Redoute


 49%|████▉     | 246/500 [08:55<03:56,  1.07it/s]

[Fallback] Searching for: Schering Group


 49%|████▉     | 247/500 [08:56<04:15,  1.01s/it]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 2
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 50%|█████     | 250/500 [10:01<41:32,  9.97s/it]  

[Fallback] Searching for: Hanshin Construction


 50%|█████     | 252/500 [10:03<22:06,  5.35s/it]

[Fallback] Searching for: Statoil Group


 51%|█████     | 254/500 [10:05<12:33,  3.06s/it]

[Fallback] Searching for: Assoc British Foods


 51%|█████     | 255/500 [10:06<10:30,  2.57s/it]



 51%|█████▏    | 257/500 [10:08<06:35,  1.63s/it]

[Fallback] Searching for: Thai Airways Intl


 52%|█████▏    | 261/500 [10:11<04:01,  1.01s/it]

[Fallback] Searching for: Kerr-McGee


 52%|█████▏    | 262/500 [10:13<04:30,  1.14s/it]

[Fallback] Searching for: Standard Chartered Group


 53%|█████▎    | 263/500 [10:14<04:57,  1.26s/it]

[Fallback] Searching for: Great A&P Tea


 53%|█████▎    | 264/500 [10:16<05:16,  1.34s/it]

[Fallback] Searching for: Wharf (Holdings) Hong


 54%|█████▍    | 269/500 [10:20<03:06,  1.24it/s]

[Fallback] Searching for: Chiba Kogyo Bank


 54%|█████▍    | 270/500 [10:22<04:25,  1.15s/it]

[Fallback] Searching for: KKPC-Korea Kumho


 54%|█████▍    | 272/500 [10:23<03:53,  1.02s/it]

[Fallback] Searching for: Associated Banc-Corp


 55%|█████▍    | 274/500 [10:25<03:41,  1.02it/s]

[Fallback] Searching for: Service Corp Intl


 56%|█████▌    | 280/500 [10:31<02:56,  1.25it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 28
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 56%|█████▌    | 281/500 [11:34<1:11:05, 19.48s/it]

[Fallback] Searching for: BPER-Emilia Romagna


 57%|█████▋    | 283/500 [11:36<36:22, 10.06s/it]  



 57%|█████▋    | 284/500 [11:36<25:59,  7.22s/it]

[Fallback] Searching for: Banca Naz del Lavoro
No Wikipedia content found for: Banca Naz del Lavoro


 57%|█████▋    | 285/500 [11:37<19:07,  5.34s/it]

[Fallback] Searching for: Taiwan Business Bank


 58%|█████▊    | 290/500 [11:42<05:37,  1.61s/it]



 58%|█████▊    | 291/500 [11:43<04:46,  1.37s/it]

[Fallback] Searching for: Novo-Nordisk


 58%|█████▊    | 292/500 [11:44<04:54,  1.42s/it]

[Fallback] Searching for: Natl Semiconductor


 59%|█████▉    | 295/500 [11:47<03:27,  1.01s/it]

[Fallback] Searching for: Central European Media


 60%|██████    | 302/500 [11:52<02:25,  1.36it/s]

[Fallback] Searching for: Shionogi & Co


 62%|██████▏   | 309/500 [11:58<02:19,  1.37it/s]

[Fallback] Searching for: Tupras-Turkiye Petrol


 62%|██████▏   | 311/500 [12:00<02:45,  1.14it/s]

[Fallback] Searching for: Bank of Saga


 63%|██████▎   | 316/500 [12:04<02:24,  1.27it/s]

[Fallback] Searching for: Verizon Commun


 63%|██████▎   | 317/500 [12:06<02:59,  1.02it/s]

[Fallback] Searching for: William Hill Org


 64%|██████▎   | 318/500 [12:07<03:32,  1.17s/it]



 64%|██████▍   | 319/500 [12:08<03:03,  1.01s/it]

[Fallback] Searching for: Deere & Co


 64%|██████▍   | 322/500 [12:11<02:43,  1.09it/s]

[Fallback] Searching for: Performance Food


 65%|██████▍   | 323/500 [12:12<02:59,  1.01s/it]

[Fallback] Searching for: Intl Flavors & Frags


 65%|██████▌   | 326/500 [12:15<02:43,  1.06it/s]

[Fallback] Searching for: Wendy`s International


 66%|██████▌   | 330/500 [12:18<02:20,  1.21it/s]

[Fallback] Searching for: KT&G


 67%|██████▋   | 335/500 [12:23<02:05,  1.32it/s]

[Fallback] Searching for: McCormick & Co


 67%|██████▋   | 337/500 [12:24<02:12,  1.23it/s]

[Fallback] Searching for: Nurnberger Beteiligungs


 68%|██████▊   | 341/500 [12:28<02:13,  1.19it/s]

[Fallback] Searching for: Travelers Property Cas


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 30
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...
[Fallback] Searching for: Travelers Property Cas


 70%|███████   | 350/500 [13:39<04:37,  1.85s/it]



 70%|███████   | 351/500 [13:39<03:40,  1.48s/it]

[Fallback] Searching for: Fuji Heavy Inds


 71%|███████   | 353/500 [13:41<02:59,  1.22s/it]

[Fallback] Searching for: Neyveli Lignite


 71%|███████▏  | 357/500 [13:44<01:56,  1.23it/s]



 72%|███████▏  | 359/500 [13:46<01:37,  1.45it/s]

[Fallback] Searching for: Petrobras-Petrsleo Brasil
No Wikipedia content found for: Petrobras-Petrsleo Brasil


 72%|███████▏  | 362/500 [13:48<01:38,  1.40it/s]



 73%|███████▎  | 365/500 [13:50<01:37,  1.39it/s]

[Fallback] Searching for: Hyakugo Bank


 73%|███████▎  | 367/500 [13:52<01:46,  1.25it/s]



 74%|███████▎  | 368/500 [13:53<01:45,  1.25it/s]

[Fallback] Searching for: Essilor International


 74%|███████▍  | 372/500 [13:56<01:33,  1.37it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 3
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 75%|███████▍  | 373/500 [14:59<41:05, 19.41s/it]

[Fallback] Searching for: Britannic Group


 75%|███████▍  | 374/500 [15:00<29:28, 14.04s/it]

[Fallback] Searching for: Brother Inds


 75%|███████▌  | 375/500 [15:02<21:23, 10.27s/it]



 76%|███████▌  | 378/500 [15:05<08:20,  4.10s/it]

[Fallback] Searching for: AMB Property


 76%|███████▌  | 379/500 [15:06<06:57,  3.45s/it]



 76%|███████▌  | 381/500 [15:08<04:00,  2.02s/it]

[Fallback] Searching for: iStar Financial


 77%|███████▋  | 386/500 [15:12<01:49,  1.04it/s]

[Fallback] Searching for: Grupo Imsa


 78%|███████▊  | 392/500 [15:17<01:25,  1.27it/s]

[Fallback] Searching for: Ishikawajima-Harima


 79%|███████▉  | 394/500 [15:19<01:36,  1.10it/s]

[Fallback] Searching for: Taiyo Life Insurance


 79%|███████▉  | 395/500 [15:20<01:48,  1.04s/it]

[Fallback] Searching for: CBD-Brasil Distribuieco
No Wikipedia content found for: CBD-Brasil Distribuieco


 79%|███████▉  | 396/500 [15:21<01:50,  1.06s/it]



 80%|████████  | 400/500 [15:24<01:18,  1.28it/s]

[Fallback] Searching for: Scientific-Atlanta


 80%|████████  | 402/500 [15:26<01:26,  1.13it/s]

[Fallback] Searching for: Yamanashi Chou Bank


 81%|████████  | 404/500 [15:28<01:29,  1.07it/s]

[Fallback] Searching for: Kumho Industrial


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 30
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...
[Fallback] Searching for: Kumho Industrial


 81%|████████  | 406/500 [16:33<22:14, 14.20s/it]

[Fallback] Searching for: Sumitomo Heavy Inds


 82%|████████▏ | 411/500 [16:37<04:26,  3.00s/it]

[Fallback] Searching for: FIBI Holding


 83%|████████▎ | 415/500 [16:41<01:52,  1.32s/it]



 83%|████████▎ | 417/500 [16:42<01:27,  1.05s/it]

[Fallback] Searching for: Electronic Data Sys


 85%|████████▌ | 426/500 [16:50<00:54,  1.36it/s]

[Fallback] Searching for: Custodia Holding


 86%|████████▋ | 432/500 [16:54<00:48,  1.40it/s]



 87%|████████▋ | 433/500 [16:55<00:47,  1.40it/s]

[Fallback] Searching for: Community Health Sys


 87%|████████▋ | 435/500 [16:57<00:54,  1.19it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 87%|████████▋ | 437/500 [18:01<14:34, 13.88s/it]

[Fallback] Searching for: Downey Financial


 88%|████████▊ | 438/500 [18:02<10:30, 10.17s/it]



 88%|████████▊ | 440/500 [18:04<05:19,  5.33s/it]

[Fallback] Searching for: Industrivarden


 89%|████████▉ | 445/500 [18:08<01:23,  1.52s/it]

[Fallback] Searching for: Hillenbrand Inds


 89%|████████▉ | 446/500 [18:09<01:19,  1.47s/it]



 90%|█████████ | 450/500 [18:12<00:46,  1.08it/s]

[Fallback] Searching for: Impac Mortgage Holding


 92%|█████████▏| 459/500 [18:20<00:33,  1.24it/s]



 92%|█████████▏| 462/500 [18:22<00:28,  1.33it/s]

[Fallback] Searching for: Axis Capital Holdings


 93%|█████████▎| 465/500 [18:25<00:27,  1.25it/s]

[Fallback] Searching for: Hon Hai Precision Ind


 93%|█████████▎| 466/500 [18:26<00:32,  1.06it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 32
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


 93%|█████████▎| 467/500 [19:29<10:45, 19.55s/it]

[Fallback] Searching for: Nextel Commun


 94%|█████████▎| 468/500 [19:31<07:32, 14.15s/it]

[Fallback] Searching for: Sumisho Lease


 95%|█████████▍| 473/500 [19:35<01:21,  3.04s/it]

[Fallback] Searching for: Dentsply Intl


 95%|█████████▌| 475/500 [19:37<00:49,  1.99s/it]

[Fallback] Searching for: Shenzhen Development Bk


 95%|█████████▌| 477/500 [19:39<00:33,  1.45s/it]



 96%|█████████▌| 479/500 [19:40<00:22,  1.05s/it]



 96%|█████████▌| 481/500 [19:41<00:16,  1.16it/s]

[Fallback] Searching for: Mitsui Sumitomo Ins


 97%|█████████▋| 484/500 [19:44<00:13,  1.20it/s]

[Fallback] Searching for: Dah Sing Financial


 98%|█████████▊| 490/500 [19:49<00:07,  1.40it/s]

[Fallback] Searching for: IFIL


 98%|█████████▊| 491/500 [19:51<00:10,  1.12s/it]



100%|█████████▉| 499/500 [19:56<00:00,  1.31it/s]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash-lite"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 30
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 2
}
].


Rate limit hit. Retrying in 60.0s (attempt 1/5)...


100%|██████████| 500/500 [21:00<00:00,  2.52s/it]


In [21]:
test_df['PRED_CATEGORY_WITH_DESC'] = result_with_description
np.mean(test_df['PRED_CATEGORY_WITH_DESC'] == test_df['CATEGORY'])

0.726

In [None]:
# bootstrap standard error for confidence intervals
from src.eval import calculate_metrics, calculate_ste_metrics_with_bootstrap
from sklearn.preprocessing import LabelEncoder

    
y_true = test_df['CATEGORY'].values
# First, identify unique classes including the 'Other' class in predictions
all_classes = np.unique(np.concatenate([
    y_true,
    test_df['PRED_CATEGORY_NO_DESC'].values,
    test_df['PRED_CATEGORY_WITH_DESC'].values
]))

# Create a label encoder that includes all classes
le = LabelEncoder()
le.fit(all_classes)

# Encode all arrays
y_true_encoded = le.transform(y_true)
y_pred_no_desc = le.transform(test_df['PRED_CATEGORY_NO_DESC'].values)
y_pred_with_desc = le.transform(test_df['PRED_CATEGORY_WITH_DESC'].values)

# Now we can calculate metrics with properly encoded labels
metrics_no_desc = calculate_metrics(y_true_encoded, y_pred_no_desc)
metrics_with_desc = calculate_metrics(y_true_encoded, y_pred_with_desc)

# Calculate bootstrap standard errors
ste_no_desc = calculate_ste_metrics_with_bootstrap(y_true_encoded, y_pred_no_desc)
ste_with_desc = calculate_ste_metrics_with_bootstrap(y_true_encoded, y_pred_with_desc)

# Display results
metrics_no_desc, metrics_with_desc, ste_no_desc, ste_with_desc


({'f1': 0.7877742767754623,
  'precision': 0.7555683842009833,
  'recall': 0.7164840052454159,
  'accuracy': 0.776},
 {'f1': 0.744093026198812,
  'precision': 0.7506080085704225,
  'recall': 0.6540583380457584,
  'accuracy': 0.726},
 {'f1': 0.018394217175705593,
  'precision': 0.02224683289447747,
  'recall': 0.023030516303994213,
  'accuracy': 0.018341162885706034},
 {'f1': 0.020024022685492627,
  'precision': 0.028331083999983624,
  'recall': 0.025402954664524163,
  'accuracy': 0.019669155955454733})

In [51]:
# print nicely the results with bootstrap standard errors
for key in metrics_no_desc.keys():
    print(f"{key}: {metrics_no_desc[key]:.4f} ± {ste_no_desc[key]:.4f} (no desc)")
    print(f"{key}: {metrics_with_desc[key]:.4f} ± {ste_with_desc[key]:.4f} (with desc)")
    print()

# for key in metrics_with_desc.keys():
#     print(f"{key}: {metrics_with_desc[key]:.4f} ± {ste_with_desc[key]:.4f} (with desc)")
#     print(f"{key}: {metrics_no_desc[key]:.4f} ± {ste_no_desc[key]:.4f} (no desc)")
#     print()


f1: 0.7878 ± 0.0184 (no desc)
f1: 0.7441 ± 0.0200 (with desc)

precision: 0.7556 ± 0.0222 (no desc)
precision: 0.7506 ± 0.0283 (with desc)

recall: 0.7165 ± 0.0230 (no desc)
recall: 0.6541 ± 0.0254 (with desc)

accuracy: 0.7760 ± 0.0183 (no desc)
accuracy: 0.7260 ± 0.0197 (with desc)



In [25]:
np.sum(test_df['PRED_CATEGORY_WITH_DESC'] == 'n/a'), np.sum(test_df['PRED_CATEGORY_NO_DESC'] == 'n/a'),

(0, 0)

See how many of the prediction are the same.

In [26]:
np.mean(test_df['PRED_CATEGORY_NO_DESC'] == test_df['PRED_CATEGORY_WITH_DESC'])

0.808

In [28]:
mask = (test_df['PRED_CATEGORY_NO_DESC'] != test_df['PRED_CATEGORY_WITH_DESC']) & (test_df['PRED_CATEGORY_NO_DESC'] != 'n/a') \
    & (test_df['PRED_CATEGORY_WITH_DESC'] == test_df['CATEGORY'])
test_df[mask]

Unnamed: 0,COMPANY,CATEGORY,PRED_CATEGORY_NO_DESC,PRED_CATEGORY_WITH_DESC
754,MTR,Transportation,Hotels restaurants & leisure,Transportation
1034,Nippon Mining,Oil & gas operations,Materials,Oil & gas operations
1133,Apollo-Education Group,Business services & supplies,Other,Business services & supplies
1964,Valiant Holding,Banking,Insurance,Banking
1268,Avnet,Technology hardware & equipment,Trading companies,Technology hardware & equipment
642,MAN Group,Capital goods,Trading companies,Capital goods
1158,China Development Finl,Banking,Diversified financials,Banking
670,Capitalia,Banking,Other,Banking
1293,PMI Group,Insurance,Other,Insurance
1969,Performance Food,Food markets,Food drink & tobacco,Food markets


In [29]:
mask = (test_df['PRED_CATEGORY_NO_DESC'] != test_df['PRED_CATEGORY_WITH_DESC']) & (test_df['PRED_CATEGORY_NO_DESC'] != 'n/a') \
    & (test_df['PRED_CATEGORY_NO_DESC'] == test_df['CATEGORY'])

test_df[mask]

Unnamed: 0,COMPANY,CATEGORY,PRED_CATEGORY_NO_DESC,PRED_CATEGORY_WITH_DESC
568,Pitney Bowes,Business services & supplies,Business services & supplies,Technology hardware & equipment
218,Bouygues Group,Construction,Construction,Conglomerates
69,Comcast,Media,Media,Telecommunications services
1519,Kawasho,Trading companies,Trading companies,Media
1074,Iberia,Transportation,Transportation,Other
1462,SEM-Samsung Electro,Technology hardware & equipment,Technology hardware & equipment,Semiconductors
970,Daikin Industries,Capital goods,Capital goods,Technology hardware & equipment
935,Kobe Steel,Materials,Materials,Conglomerates
1314,Level 3 Commun,Telecommunications services,Telecommunications services,Other
348,EMC,Technology hardware & equipment,Technology hardware & equipment,Other


In [32]:
print(wiki_intro("Bouygues Group"))

[Fallback] Searching for: Bouygues Group
Bouygues S.A. (French pronunciation: [bwiɡ]) is a French engineering group headquartered in the 8th arrondissement of Paris, France. Bouygues is listed on the Euronext Paris exchange and is a blue chip in the CAC 40 stock market index. The company was founded in 1952 by Francis Bouygues and has been led by his son Martin Bouygues since 1989. Martin's older brother, Olivier Bouygues, is a board member.
The group specialises in construction (Colas Group and Bouygues Construction), real estate development (Bouygues Immobilier), media (TF1 Group), and telecommunications (Bouygues Telecom).


How much can we trust the labels? For some cases there is ambiguity. `Bouygues Group` description from Wikipedia says that it is a conglomerate.

In [31]:
print(wiki_intro("Comcast"))

Comcast Corporation, formerly known as Comcast Holdings, is an American multinational mass media corporation that works in telecommunications and entertainment. It is headquartered at the Comcast Center in Philadelphia. Comcast is the third-largest broadcasting and cable television company worldwide by revenue (behind China Mobile and Verizon). It is the third-largest pay-TV company, the second-largest cable TV company by subscribers, and the largest home Internet service provider in the United States. In 2023, the company was ranked 51st in the Forbes Global 2000.
Comcast is additionally the nation's third-largest home telephone service provider. It provides services to U.S. residential and commercial customers in 40 states and the District of Columbia. As the owner of NBCUniversal since 2013, Comcast is also a high-volume producer of films for theatrical exhibition and television programming, and a theme parks operator. It is the fourth-largest telecommunications company by worldwide

Same as the above case - here it explicitly says that it is a telecommunications company.

In [None]:
def plot_confusion_matrix(y_true, y_pred, categories, normalize=True):
    cm = confusion_matrix(y_true, y_pred, labels=categories)
    
    # normalize -> percentages
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1, keepdims=True)
        cm = np.nan_to_num(cm) * 100  # now in [0,100]
    
    fig, ax = plt.subplots(figsize=(15, 12))
    

    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=categories)
    disp.plot(cmap=plt.cm.Blues, ax=ax, values_format=".1f")

    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.grid(False)
    ax.set_title("Normalized Confusion Matrix (%)" if normalize else "Confusion Matrix", fontsize=16)
    plt.tight_layout()
    
categories = pd.read_csv("../resources/categories.csv")['CATEGORY'].unique().tolist()
plot_confusion_matrix(test_df['CATEGORY'], test_df['PRED_CATEGORY_NO_DESC'], categories, normalize=True)
plt.savefig("../resources/confusion_matrix_no_desc.svg", dpi=300, bbox_inches='tight')

In [39]:
cm = confusion_matrix(test_df['CATEGORY'], test_df['PRED_CATEGORY_NO_DESC'], labels=categories)
cm = cm.astype('float') / cm.sum(axis=1, keepdims=True)
cm = np.nan_to_num(cm) * 100  # now in [0,100]
# find most confused categories
confused_categories = []
for i in range(len(categories)):
    for j in range(len(categories)):
        if i != j and cm[i, j] > 0:
            confused_categories.append((categories[i], categories[j], cm[i, j]))

In [40]:
confused_categories = sorted(confused_categories, key=lambda x: x[2], reverse=True)
confused_categories[:10]

[('Business services & supplies', 'Technology hardware & equipment', 40.0),
 ('Trading companies', 'Conglomerates', 40.0),
 ('Household & personal products', 'Consumer durables', 33.33333333333333),
 ('Food markets', 'Retailing', 28.57142857142857),
 ('Conglomerates', 'Capital goods', 25.0),
 ('Conglomerates', 'Aerospace & defense', 25.0),
 ('Technology hardware & equipment', 'Telecommunications services', 20.0),
 ('Trading companies', 'Retailing', 20.0),
 ('Construction', 'Materials', 17.647058823529413),
 ('Software & services', 'Business services & supplies', 16.666666666666664)]

In [None]:
test_df.to_csv("../resources/dataset_with_predictions.csv", index=False)