In [3]:
import pandas as pd

splits = {'train': 'data/train-00000-of-00001.parquet', 'validation': 'data/validation-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}
df = pd.read_parquet("hf://datasets/allenai/sciq/" + splits["train"])
df

Unnamed: 0,question,distractor3,distractor1,distractor2,correct_answer,support
0,What type of organism is commonly used in prep...,viruses,protozoa,gymnosperms,mesophilic organisms,"Mesophiles grow best in moderate temperature, ..."
1,What phenomenon makes global winds blow northe...,tropical effect,muon effect,centrifugal effect,coriolis effect,Without Coriolis Effect the global winds would...
2,Changes from a less-ordered state to a more-or...,endothermic,unbalanced,reactive,exothermic,Summary Changes of state are examples of phase...
3,What is the least dangerous radioactive decay?,zeta decay,beta decay,gamma decay,alpha decay,All radioactive decay is dangerous to living t...
4,Kilauea in hawaii is the world’s most continuo...,magma,greenhouse gases,carbon and smog,smoke and ash,Example 3.5 Calculating Projectile Motion: Hot...
...,...,...,...,...,...,...
11674,The enzyme pepsin plays an important role in t...,lipids,protons,proteins,peptides,Protein A large part of protein digestion take...
11675,What remains a constant of radioactive substan...,acidity,temperature,volatility,rate of decay,The rate of decay of a radioactive substance i...
11676,"Terrestrial ecosystems, also known for their d...",substrates,bisomes,monomes,biomes,"Terrestrial ecosystems, also known for their d..."
11677,High explosives create shock waves that exceed...,turbulence,light speed,ion speed,supersonic,The modern day formulation of gun powder is ca...


In [16]:
df.loc[0]

question          What type of organism is commonly used in prep...
distractor3                                                 viruses
distractor1                                                protozoa
distractor2                                             gymnosperms
correct_answer                                 mesophilic organisms
support           Mesophiles grow best in moderate temperature, ...
Name: 0, dtype: object

In [9]:
import torch
import os

from sentence_transformers import CrossEncoder
from src.chain_initialisation import init_chain
from src.embedding_management import init_embeddings, add_embeddings_from_files
from src.model_initialisation import init_model
from src.pipeline_initialisation import init_pipeline
from src.retriever_initialisation import init_retriever
from src.dynamic_doc_retrieval import download_documents, initialise_keyword_model, generate_query_from_question

In [8]:
cuda_available = torch.cuda.is_available()
print(f"Initializing model... CUDA available: {cuda_available}")
model, model_name = init_model(cuda_available)
pipeline = init_pipeline(model=model, model_name=model_name)
vectorstore = init_embeddings(cuda_available)
retriever = init_retriever(vectorstore)
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
chain = init_chain(pipeline=pipeline, retriever=retriever)
keyword_model = initialise_keyword_model()
print("Start-up complete.")

Initializing model... CUDA available: False


Device set to use cpu
  embedding = HuggingFaceEmbeddings(


Loading FAISS from file - skipping embedding build.
Vectorstore loaded successfully.
Retriever initialized with k=2 and similarity threshold=0.8


config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.66k [00:00<?, ?B/s]

  llm = HuggingFacePipeline(pipeline=pipeline)


LLM model chain created.
Start-up complete.


In [12]:
BASE_DIR = os.curdir
DATA_DIR = os.path.join(BASE_DIR, "data")
DYNAMIC_DATA_DIR = os.path.join(DATA_DIR, "dynamic")
print(f"dynamic data dir: {DYNAMIC_DATA_DIR}")

def ask_question(question, online=True):
    if online:
        print("Obtaining data from scholar...")
        query = generate_query_from_question(keyword_model, question)
        num_docs: int = int(os.getenv("num_docs"))
        max_tries: int = int(os.getenv("max_tries"))
        downloaded_files = download_documents(query, DYNAMIC_DATA_DIR, num_docs, max_tries)
        add_embeddings_from_files(vectorstore, downloaded_files)

    response = chain.invoke({
        "question": question,
        "chat_history": []
    })
    answer = response["answer"].split("### Answer:")[-1].strip()
    source_documents = response["source_documents"]
    return answer, source_documents

dynamic data dir: ./data/dynamic


In [13]:
question = df.loc[0]["question"]
answer, source_docs = ask_question(question, online=False)
answer

  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.19999999999999996


"The following two animals-- fish (from which milk may also come) and other fishes--- include an invertebrate named Megaptera klassensis although its genus does not exist at present there.(Tasmanian Sea Turtles.) These species live around 70 percent of the ocean basins covering about 65 percent water(Suez Formation Water Source). Aquamarine waters typically contain less than five feet between three degrees south latitude=0 deg C..ocean surface temperature< 30 °C > below sea level range on both ends of the main boundary line during estuaries up to 4° s(-1 ) above 100 yz...[p]atoll ice thickness < 0.5 mm−2; maximum energy needed per unit volume ~ 20 Mg((m*r+8)/4 ); most bodies form into four'submersible' submaploidic marine mounds 2 km² apart including cetaceans plus halos.[i]The vast majority rely upon saltwater derived material after several thousand years [and more significantly provide dietary nutrients]. In some instances their diets consist primarily comprised sugar based meals con

In [15]:
from sentence_transformers import SentenceTransformer, util
from fuzzywuzzy import fuzz

# Initialize embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [23]:
def evaluate_answer(row):
    question = row['question']
    correct_answer = row['correct_answer']
    support = row['support']
    distractors = [row['distractor1'], row['distractor2'], row['distractor3']]

    # Chatbot answer
    response, source_docs = ask_question(question)
    response = response.lower()

    # Exact match
    exact = int(correct_answer.lower() in response)

    # Fuzzy match
    fuzzy_score = fuzz.partial_ratio(correct_answer.lower(), response)

    # Embedding similarity to correct answer
    emb_correct = model.encode(correct_answer, convert_to_tensor=True)
    emb_response = model.encode(response, convert_to_tensor=True)
    sim_correct = util.cos_sim(emb_correct, emb_response).item()

    # Embedding similarity to support passage
    emb_support = model.encode(support, convert_to_tensor=True)
    sim_support = util.cos_sim(emb_support, emb_response).item()

    # Compare similarity to distractors
    sim_distractors = []
    for d in distractors:
        emb_distractor = model.encode(d, convert_to_tensor=True)
        sim_distractors.append(util.cos_sim(emb_distractor, emb_response).item())
    
    # Is chatbot closer to correct than to all distractors?
    correct_beats_distractors = int(sim_correct > max(sim_distractors))

    return {
        "question": question,
        "chatbot_response": response,
        "exact_match": exact,
        "fuzzy_score": fuzzy_score,
        "sim_to_correct": round(sim_correct, 3),
        "sim_to_support": round(sim_support, 3),
        "correct_beats_distractors": correct_beats_distractors,
        "source_docs": source_docs
    }


In [24]:
df.loc[:3]

Unnamed: 0,question,distractor3,distractor1,distractor2,correct_answer,support
0,What type of organism is commonly used in prep...,viruses,protozoa,gymnosperms,mesophilic organisms,"Mesophiles grow best in moderate temperature, ..."
1,What phenomenon makes global winds blow northe...,tropical effect,muon effect,centrifugal effect,coriolis effect,Without Coriolis Effect the global winds would...
2,Changes from a less-ordered state to a more-or...,endothermic,unbalanced,reactive,exothermic,Summary Changes of state are examples of phase...
3,What is the least dangerous radioactive decay?,zeta decay,beta decay,gamma decay,alpha decay,All radioactive decay is dangerous to living t...


In [25]:
results = [evaluate_answer(row) for _, row in df.loc[:3].iterrows()]
results_df = pd.DataFrame(results)
results_df

Obtaining data from scholar...
Querying Google Scholar for: yogurt foods cheese organism preparation

Result 1: Potential effects of probiotics in cheese and yogurt production: A review
Publication URL: https://analyticalsciencejournals.onlinelibrary.wiley.com/doi/abs/10.1002/elsc.201100122
Trying to download PDF from: https://www.academia.edu/download/107457162/elsc.20110012220231117-1-j22l0l.pdf




PDF saved as ./data/dynamic/Potential_effects_of_probiotics_in_cheese_and_yogurt_production_A_review.pdf

Result 2: Inhibitory effects of Microgard™ on yogurt and cottage cheese spoilage organisms
Publication URL: https://www.sciencedirect.com/science/article/pii/S0022030290787449
Trying to download PDF from: https://www.sciencedirect.com/science/article/pii/S0022030290787449/pdf?md5=271e01ddf92d493dc06ad431140c96c9&pid=1-s2.0-S0022030290787449-main.pdf


[1m[INFO][0m | [32m2025/05/28 12:59:25[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 12:59:25[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S0022030290787449], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


The eprint URL is not a direct link to a PDF.
Downloading Inhibitory effects of Microgard™ on yogurt and cottage cheese spoilage organisms from Sci Hub: https://www.sciencedirect.com/science/article/pii/S0022030290787449


[1m[INFO][0m | [32m2025/05/28 12:59:25[0m | [1m-> Response: status_code=200, content_length=8618[0m
[1m[INFO][0m | [32m2025/05/28 12:59:25[0m | [1m* Extracted information: {'url': 'https://moscow.sci-hub.se/2361/426d37caede6625a46e53ae624bf227e/salih1990.pdf', 'title': 'Inhibitory Effects of Microgard™ on Yogurt and Cottage Cheese Spoilage Organisms. Journal of Dairy Science, 73(4), 887–893'}[0m




[1m[INFO][0m | [32m2025/05/28 12:59:26[0m | [1m↓ Successfully download the url to: ./data/dynamic/Inhibitory_effects_of_Microgard_on_yogurt_and_cottage_cheese_spoilage_organisms.pdf[0m


Successfully downloaded to ./data/dynamic/Inhibitory_effects_of_Microgard_on_yogurt_and_cottage_cheese_spoilage_organisms.pdf

Result 3: Yogurt as probiotic carrier food
Publication URL: https://www.sciencedirect.com/science/article/pii/S095869460100036X
Trying to download PDF from: https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=e69eda186ccfed1592ad1ba165c2a319a74b80aa


[1m[INFO][0m | [32m2025/05/28 12:59:27[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 12:59:27[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S095869460100036X], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


The eprint URL is not a direct link to a PDF.
Downloading Yogurt as probiotic carrier food from Sci Hub: https://www.sciencedirect.com/science/article/pii/S095869460100036X


[1m[INFO][0m | [32m2025/05/28 12:59:27[0m | [1m-> Response: status_code=200, content_length=8560[0m
[1m[INFO][0m | [32m2025/05/28 12:59:27[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.se/journal-article/3331c481f54b11019bb46e3bde81efb5/lourens-hattingh2001.pdf', 'title': 'Yogurt as probiotic carrier food. International Dairy Journal, 11(1-2), 1–17'}[0m




[1m[INFO][0m | [32m2025/05/28 12:59:28[0m | [1m↓ Successfully download the url to: ./data/dynamic/Yogurt_as_probiotic_carrier_food.pdf[0m


Successfully downloaded to ./data/dynamic/Yogurt_as_probiotic_carrier_food.pdf
Error Failed to open file './data/dynamic/Potential_effects_of_probiotics_in_cheese_and_yogurt_production_A_review.pdf'.


  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.19999999999999996


Vectorstore updated with 229 document splits.
Obtaining data from scholar...
Querying Google Scholar for: winds hemisphere northern northeast southeast

Result 1: A climatology of high‐wind events for the eastern United States
Publication URL: http://www.wkuweather.com/publications/Gilliland%20et%20al.%20%5B2019%5D%20A%20climatology%20of%20high-wind%20events%20for%20the%20eastern%20United%20States.pdf
Trying to download PDF from: http://www.wkuweather.com/publications/Gilliland%20et%20al.%20%5B2019%5D%20A%20climatology%20of%20high-wind%20events%20for%20the%20eastern%20United%20States.pdf
PDF saved as ./data/dynamic/A_climatology_of_highwind_events_for_the_eastern_United_States.pdf

Result 2: Surface winds and development of thunderstorms along southwest–northeast oriented mountain chains
Publication URL: https://journals.ametsoc.org/view/journals/wefo/14/5/1520-0434_1999_014_0758_swadot_2_0_co_2.xml
Trying to download PDF from: https://journals.ametsoc.org/view/journals/wefo/14/5/1520-

[1m[INFO][0m | [32m2025/05/28 13:00:10[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:00:10[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/wefo/14/5/1520-0434_1999_014_0758_swadot_2_0_co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


The eprint URL is not a direct link to a PDF.
Downloading Surface winds and development of thunderstorms along southwest–northeast oriented mountain chains from Sci Hub: https://journals.ametsoc.org/view/journals/wefo/14/5/1520-0434_1999_014_0758_swadot_2_0_co_2.xml


[1m[INFO][0m | [32m2025/05/28 13:00:10[0m | [1m-> Response: status_code=200, content_length=6278[0m
[1m[INFO][0m | [32m2025/05/28 13:00:10[0m | [1mChoose scihub url [1]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2025/05/28 13:00:10[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/wefo/14/5/1520-0434_1999_014_0758_swadot_2_0_co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:11[0m | [1m-> Response: status_code=403, content_length=898[0m
[1m[INFO][0m | [32m2025/05/28 13:00:11[0m | [1mChoose scihub url [2]: https://sci-hub.in[0m
[1m[INFO][0m | [32m2025/05/28 13:00:11[0m | [1m<- Request: scihub_url=https://sci-hub.in, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/wefo/14/5/1520-0434_1999_014_0758_swadot_2_0_co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:12[0m | [1m-> Response: 

Failed to download Surface winds and development of thunderstorms along southwest–northeast oriented mountain chains via Sci-Hub.

Result 3: Northern Hemisphere airstream regions
Publication URL: https://journals.ametsoc.org/view/journals/mwre/109/2/1520-0493_1981_109_0255_nhar_2_0_co_2.xml
Trying to download PDF from: https://journals.ametsoc.org/view/journals/mwre/109/2/1520-0493_1981_109_0255_nhar_2_0_co_2.pdf
The eprint URL is not a direct link to a PDF.
Downloading Northern Hemisphere airstream regions from Sci Hub: https://journals.ametsoc.org/view/journals/mwre/109/2/1520-0493_1981_109_0255_nhar_2_0_co_2.xml


[1m[INFO][0m | [32m2025/05/28 13:00:12[0m | [1m-> Response: status_code=200, content_length=6277[0m
[1m[INFO][0m | [32m2025/05/28 13:00:13[0m | [1mChoose scihub url [1]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2025/05/28 13:00:13[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/mwre/109/2/1520-0493_1981_109_0255_nhar_2_0_co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:13[0m | [1m-> Response: status_code=403, content_length=898[0m
[1m[INFO][0m | [32m2025/05/28 13:00:13[0m | [1mChoose scihub url [2]: https://sci-hub.in[0m
[1m[INFO][0m | [32m2025/05/28 13:00:13[0m | [1m<- Request: scihub_url=https://sci-hub.in, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/mwre/109/2/1520-0493_1981_109_0255_nhar_2_0_co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:13[0m | [1m-> Response: st

Failed to download Northern Hemisphere airstream regions via Sci-Hub.

Result 4: Effects of Northern Hemisphere Annular Mode on terrestrial near-surface wind speed over eastern China from 1979 to 2017
Publication URL: https://www.sciencedirect.com/science/article/pii/S1674927822001125
Trying to download PDF from: https://www.sciencedirect.com/science/article/pii/S1674927822001125


[1m[INFO][0m | [32m2025/05/28 13:00:14[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:00:14[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S1674927822001125], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


The eprint URL is not a direct link to a PDF.
Downloading Effects of Northern Hemisphere Annular Mode on terrestrial near-surface wind speed over eastern China from 1979 to 2017 from Sci Hub: https://www.sciencedirect.com/science/article/pii/S1674927822001125


[1m[INFO][0m | [32m2025/05/28 13:00:15[0m | [1m-> Response: status_code=200, content_length=6210[0m
[1m[INFO][0m | [32m2025/05/28 13:00:15[0m | [1mChoose scihub url [1]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2025/05/28 13:00:15[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S1674927822001125], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:16[0m | [1m-> Response: status_code=403, content_length=898[0m
[1m[INFO][0m | [32m2025/05/28 13:00:16[0m | [1mChoose scihub url [2]: https://sci-hub.in[0m
[1m[INFO][0m | [32m2025/05/28 13:00:16[0m | [1m<- Request: scihub_url=https://sci-hub.in, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S1674927822001125], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:17[0m | [1m-> Response: status_code=403, content_length=3460[0m
[1m[INFO][0m | 

Failed to download Effects of Northern Hemisphere Annular Mode on terrestrial near-surface wind speed over eastern China from 1979 to 2017 via Sci-Hub.

Result 5: Explosive cyclone development in the Southern Hemisphere and a comparison with Northern Hemisphere events
Publication URL: https://journals.ametsoc.org/view/journals/mwre/130/9/1520-0493_2002_130_2188_ecdits_2.0.co_2.xml
Trying to download PDF from: https://journals.ametsoc.org/view/journals/mwre/130/9/1520-0493_2002_130_2188_ecdits_2.0.co_2.pdf
The eprint URL is not a direct link to a PDF.
Downloading Explosive cyclone development in the Southern Hemisphere and a comparison with Northern Hemisphere events from Sci Hub: https://journals.ametsoc.org/view/journals/mwre/130/9/1520-0493_2002_130_2188_ecdits_2.0.co_2.xml


[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1m-> Response: status_code=200, content_length=6279[0m
[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1mChoose scihub url [1]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/mwre/130/9/1520-0493_2002_130_2188_ecdits_2.0.co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1m-> Response: status_code=403, content_length=898[0m
[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1mChoose scihub url [2]: https://sci-hub.in[0m
[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1m<- Request: scihub_url=https://sci-hub.in, source=DoiSource[type=doi, id=journals.ametsoc.org/view/journals/mwre/130/9/1520-0493_2002_130_2188_ecdits_2.0.co_2.xml], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:18[0m | [1m-> Response

Failed to download Explosive cyclone development in the Southern Hemisphere and a comparison with Northern Hemisphere events via Sci-Hub.

Result 6: Winds and fronts over Southeast Asia
Publication URL: https://www.jstor.org/stable/211475
Downloading Winds and fronts over Southeast Asia from Sci Hub: https://www.jstor.org/stable/211475


[1m[INFO][0m | [32m2025/05/28 13:00:19[0m | [1m-> Response: status_code=200, content_length=8456[0m
[1m[INFO][0m | [32m2025/05/28 13:00:19[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.se/journal-article/fe3a9f7a00c47d44ccce252b75e292a9/dobby1945.pdf', 'title': 'Winds and Fronts over Southeast Asia. Geographical Review, 35(2), 204'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:21[0m | [1m↓ Successfully download the url to: ./data/dynamic/Winds_and_fronts_over_Southeast_Asia.pdf[0m


Successfully downloaded to ./data/dynamic/Winds_and_fronts_over_Southeast_Asia.pdf

Result 7: Wind directions predicted from global circulation models and wind directions determined from eolian sandstones of the western United States—A comparison
Publication URL: https://www.sciencedirect.com/science/article/pii/0037073888900565
Trying to download PDF from: https://www.academia.edu/download/81263686/parrish_20and_20peterson_201988.pdf




PDF saved as ./data/dynamic/Wind_directions_predicted_from_global_circulation_models_and_wind_directions_determined_from_eolian_sandstones_of_the_western_United_StatesA_comparison.pdf
Error Failed to open file './data/dynamic/Wind_directions_predicted_from_global_circulation_models_and_wind_directions_determined_from_eolian_sandstones_of_the_western_United_StatesA_comparison.pdf'.


  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.19999999999999996


Vectorstore updated with 235 document splits.
Obtaining data from scholar...
Querying Google Scholar for: liquid solid ordered changes state


[1m[INFO][0m | [32m2025/05/28 13:00:53[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:00:53[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=www.tandfonline.com/doi/pdf/10.1080/14786448108627066], proxies={'http': 'socks5://127.0.0.1:7890'}[0m



Result 1: V. Change of state: Solid-liquid
Publication URL: https://www.tandfonline.com/doi/pdf/10.1080/14786448108627066
Downloading V. Change of state: Solid-liquid from Sci Hub: https://www.tandfonline.com/doi/pdf/10.1080/14786448108627066


[1m[INFO][0m | [32m2025/05/28 13:00:54[0m | [1m-> Response: status_code=200, content_length=8583[0m
[1m[INFO][0m | [32m2025/05/28 13:00:54[0m | [1m* Extracted information: {'url': 'https://moscow.sci-hub.se/2147/31d0b856dbcc85eba1a30404752fbf8b/poynting1881.pdf', 'title': 'V. Change of state  Solid-liquid. The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science, 12(72), 32–48'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:55[0m | [1m↓ Successfully download the url to: ./data/dynamic/V._Change_of_state_Solid-liquid.pdf[0m


Successfully downloaded to ./data/dynamic/V._Change_of_state_Solid-liquid.pdf

Result 2: Characteristic ordering in liquid phase-change materials
Publication URL: https://www.academia.edu/download/91373956/adma.20070001620220922-1-1rr5bwt.pdf
Trying to download PDF from: https://www.academia.edu/download/91373956/adma.20070001620220922-1-1rr5bwt.pdf


[1m[INFO][0m | [32m2025/05/28 13:00:56[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:00:56[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S0040603110003497], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


PDF saved as ./data/dynamic/Characteristic_ordering_in_liquid_phase-change_materials.pdf

Result 3: The influence of the liquid-to-solid transitions on the changes of macromolecules from disorder to order
Publication URL: https://www.sciencedirect.com/science/article/pii/S0040603110003497
Downloading The influence of the liquid-to-solid transitions on the changes of macromolecules from disorder to order from Sci Hub: https://www.sciencedirect.com/science/article/pii/S0040603110003497


[1m[INFO][0m | [32m2025/05/28 13:00:57[0m | [1m-> Response: status_code=200, content_length=8604[0m
[1m[INFO][0m | [32m2025/05/28 13:00:57[0m | [1m* Extracted information: {'url': 'https://2024.sci-hub.se/389/020e6f93a91336f842b611c63a9cec61/wunderlich2011.pdf', 'title': 'The influence of the liquid-to-solid transitions on the changes of macromolecules from disorder to order. Thermochimica Acta, 522(1-2), 2–13'}[0m
[1m[INFO][0m | [32m2025/05/28 13:00:57[0m | [1m↓ Successfully download the url to: ./data/dynamic/The_influence_of_the_liquid-to-solid_transitions_on_the_changes_of_macromolecules_from_disorder_to_order.pdf[0m


Successfully downloaded to ./data/dynamic/The_influence_of_the_liquid-to-solid_transitions_on_the_changes_of_macromolecules_from_disorder_to_order.pdf
Error Failed to open file './data/dynamic/Characteristic_ordering_in_liquid_phase-change_materials.pdf'.


  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.19999999999999996


Vectorstore updated with 251 document splits.
Obtaining data from scholar...
Querying Google Scholar for: radioactive decay dangerous

Result 1: Modeling radioactive decay
Publication URL: https://www.sciencedirect.com/science/article/pii/S1877042811006252
Trying to download PDF from: https://www.sciencedirect.com/science/article/pii/S1877042811006252/pdf?md5=e8dd83318b570489fb7368bdebd1d06d&pid=1-s2.0-S1877042811006252-main.pdf&_valck=1


[1m[INFO][0m | [32m2025/05/28 13:01:28[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:01:28[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=www.sciencedirect.com/science/article/pii/S1877042811006252], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


The eprint URL is not a direct link to a PDF.
Downloading Modeling radioactive decay from Sci Hub: https://www.sciencedirect.com/science/article/pii/S1877042811006252


[1m[INFO][0m | [32m2025/05/28 13:01:29[0m | [1m-> Response: status_code=200, content_length=8556[0m
[1m[INFO][0m | [32m2025/05/28 13:01:29[0m | [1m* Extracted information: {'url': 'https://2024.sci-hub.se/1359/514074171b059f71bea6403bf431320f/10.1016@j.sbspro.2011.04.079.pdf', 'title': 'Modeling radioactive decay. Procedia - Social and Behavioral Sciences, 15, 2196–2200'}[0m
[1m[INFO][0m | [32m2025/05/28 13:01:29[0m | [1m↓ Successfully download the url to: ./data/dynamic/Modeling_radioactive_decay.pdf[0m
[1m[INFO][0m | [32m2025/05/28 13:01:29[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:01:29[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=link.springer.com/chapter/10.1007/978-3-642-20186-8_11], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


Successfully downloaded to ./data/dynamic/Modeling_radioactive_decay.pdf

Result 2: Modes of radioactive decay
Publication URL: https://link.springer.com/chapter/10.1007/978-3-642-20186-8_11
Downloading Modes of radioactive decay from Sci Hub: https://link.springer.com/chapter/10.1007/978-3-642-20186-8_11


[1m[INFO][0m | [32m2025/05/28 13:01:30[0m | [1m-> Response: status_code=200, content_length=8549[0m
[1m[INFO][0m | [32m2025/05/28 13:01:30[0m | [1m* Extracted information: {'url': 'https://moscow.sci-hub.se/2276/8dd9db168267d8e1dd7378d5d6a9281b/10.1007@978-3-642-20186-811.pdf', 'title': 'Modes of Radioactive Decay. Compendium to Radiation Physics for Medical Physicists, 693–786'}[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1m↓ Successfully download the url to: ./data/dynamic/Modes_of_radioactive_decay.pdf[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1mChoose scihub url [0]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=DoiSource[type=doi, id=link.springer.com/chapter/10.1007/978-3-031-71942-4_5], proxies={'http': 'socks5://127.0.0.1:7890'}[0m


Successfully downloaded to ./data/dynamic/Modes_of_radioactive_decay.pdf

Result 3: Radioactive Decay: Radioactivity, Kinetics of Decay, and Examples
Publication URL: https://link.springer.com/chapter/10.1007/978-3-031-71942-4_5
Downloading Radioactive Decay: Radioactivity, Kinetics of Decay, and Examples from Sci Hub: https://link.springer.com/chapter/10.1007/978-3-031-71942-4_5


[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1m-> Response: status_code=200, content_length=6208[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1mChoose scihub url [1]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=link.springer.com/chapter/10.1007/978-3-031-71942-4_5], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1m-> Response: status_code=403, content_length=898[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1mChoose scihub url [2]: https://sci-hub.in[0m
[1m[INFO][0m | [32m2025/05/28 13:01:31[0m | [1m<- Request: scihub_url=https://sci-hub.in, source=DoiSource[type=doi, id=link.springer.com/chapter/10.1007/978-3-031-71942-4_5], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2025/05/28 13:01:32[0m | [1m-> Response: status_code=403, content_length=3460[0m
[1m[INFO][0m | [32m2025/05

Failed to download Radioactive Decay: Radioactivity, Kinetics of Decay, and Examples via Sci-Hub.

Result 4: Ionizing Radiation Hazards: Dangerous Goods IV
Publication URL: https://jsystemsafety.com/index.php/jss/article/view/16
Trying to download PDF from: https://jsystemsafety.com/index.php/jss/article/download/16/14




PDF saved as ./data/dynamic/Ionizing_Radiation_Hazards_Dangerous_Goods_IV.pdf


  self.vectorstore.similarity_search_with_relevance_scores(
No relevant docs were retrieved using the relevance score threshold 0.19999999999999996


Vectorstore updated with 630 document splits.


This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (1024). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


IndexError: index out of range in self