In [40]:
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import pandas as pd
from transformers import AutoTokenizer
import os
from io import StringIO
import logging

os.environ["TOKENIZERS_PARALLELISM"] = "false"

model_path = '/Users/hissain/git/github/models/all-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Define maximum token length per chunk
max_token_length = 512

def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    service = Service()
    return webdriver.Chrome(service=service, options=options)

def get_text_content(element):
    return ' '.join(element.stripped_strings)

def chunk_text(text, max_token_length):
    tokens = tokenizer.encode(text, add_special_tokens=False)
    chunks = []
    start = 0
    while start < len(tokens):
        end = min(start + max_token_length, len(tokens))
        chunk = tokenizer.decode(tokens[start:end])
        chunks.append(chunk)
        start = end
    return chunks

def merge_small_chunks(chunks, max_token_length):
    """Merge small chunks into larger ones where possible."""
    merged_chunks = []
    temp_chunk = ""
    for chunk in chunks:
        if len(tokenizer.encode(temp_chunk + " " + chunk)) <= max_token_length:
            temp_chunk += " " + chunk
        else:
            merged_chunks.append(temp_chunk.strip())
            temp_chunk = chunk
    if temp_chunk:
        merged_chunks.append(temp_chunk.strip())
    return merged_chunks

def chunk_table(df, max_token_length, header_info):
    table_chunks = []
    current_chunk = header_info + ' ||| '  # Distinct marker between header and rows
    
    for _, row in df.iterrows():
        row_text = ' | '.join([str(cell) for cell in row if pd.notna(cell)])
        combined_text = current_chunk + row_text + ' || '

        if len(tokenizer.encode(combined_text)) <= max_token_length:
            # If adding this row keeps within the max token limit, add it to the current chunk
            current_chunk += row_text + ' || '
        else:
            # If it exceeds the limit, store the chunk and start a new one
            table_chunks.append(current_chunk.strip())
            current_chunk = header_info + ' ||| ' + row_text + ' || '

    # Append any remaining chunk
    if current_chunk:
        table_chunks.append(current_chunk.strip())

    return table_chunks


def scrape_and_chunk_page(url):
    driver = init_driver()
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    chunks = []
    current_url = url
    last_header = ""

    elements = soup.find_all(['h1', 'h2', 'h3', 'h4', 'p', 'table'])
    for element in elements:
        if element.name in ['h1', 'h2', 'h3', 'h4']:
            header_text = get_text_content(element)
            last_header = header_text  # Store this as context for following elements
            header_chunks = chunk_text(header_text, max_token_length)
            chunks.extend([(chunk, current_url) for chunk in header_chunks])
            
        elif element.name == 'p':
            paragraph_text = get_text_content(element)
            paragraph_chunks = chunk_text(paragraph_text, max_token_length)
            chunks.extend([(chunk, current_url) for chunk in paragraph_chunks])
            
        elif element.name == 'table':
            table_html = StringIO(str(element))
            df = pd.read_html(table_html)[0]
            
            # Drop empty rows and columns
            df.dropna(axis=0, how='all', inplace=True)
            df.dropna(axis=1, how='all', inplace=True)
            
            # Ensure column headers are strings
            df.columns = [str(col) for col in df.columns]
            header_info = last_header + ' | ' + ' | '.join(df.columns) if not df.columns.empty else last_header
            
            # Chunk the table content
            table_chunks = chunk_table(df, max_token_length, header_info)
            chunks.extend([(chunk, current_url) for chunk in table_chunks])

    # Merge small chunks where possible
    text_chunks = [chunk[0] for chunk in chunks]
    final_chunks = merge_small_chunks(text_chunks, max_token_length)
    
    # Re-associate URLs after merging
    return [(chunk, current_url) for chunk in final_chunks]

# Usage example:
url = "https://en.wikipedia.org/wiki/List_of_wars_by_death_toll"
scraped_chunks = scrape_and_chunk_page(url)

print(f"Total Chunks: {len(scraped_chunks)}")

for chunk, url in scraped_chunks[:5]:
    print(f"Chunk: {chunk}\nSource URL: {url}\n")


Token indices sequence length is longer than the specified maximum sequence length for this model (527 > 512). Running this sequence through the model will result in indexing errors


Total Chunks: 13
Chunk: contents list of wars by death toll List of wars by death toll | 0 ||| Part of a series on || War (outline) || showHistory || showMilitary || showBattlespace || showWeapons || showTactics || showOperational || showStrategy || showGrand strategy || showAdministrative || showOrganization || showPersonnel || showLogistics || showScience || showLaw || showTheory || showNon-warfare || showCulture || showRelated || hideLists Battles Military occupations Military terms Operations Sieges War crimes Wars Weapons Writers || vte || this list of wars by death toll includes all deaths that are either directly or indirectly caused by war. these numbers include the deaths of military personnel which are the direct results of a battle or other military wartime actions, as well as wartime / war - related deaths of civilians which are often results of war - induced epidemics, famines, genocide, etc. due to incomplete records, the destruction of evidence, differing methods of coun

In [41]:
import numpy as np
from qdrant_client import QdrantClient, models
from tqdm.notebook import tqdm
from sentence_transformers import SentenceTransformer
from IPython.display import display, clear_output, Markdown
import requests
import json
import asyncio

session = requests.Session()
session.headers.update({"Connection": "keep-alive", "Content-Type": "application/json"})

qdrant_url = "http://localhost:6333"
collection_name = "wiki_collection"
ollama_url_inf = "http://localhost:11434/api/show"
ollama_url_emb = "http://localhost:11434/api/embeddings"
ollama_url_gen = "http://localhost:11434/api/generate"
ollama_model_name = "llama3.2:latest"

client = QdrantClient(url=qdrant_url)
embedding_model = SentenceTransformer(model_path)

def get_embedding(text):
    return embedding_model.encode(text)


def create_collection_if_not_exists(dimension):
    #if collection_name in client.get_collections():
    client.delete_collection(collection_name=collection_name)
    #else:
    #    print(f"{collection_name} not in {client.get_collections()}")
    
    client.create_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(size=dimension, distance=models.Distance.COSINE),
    )
    
def upsert_points_with_metadata(embeddings, chunks):
    points = [
        models.PointStruct(
            id=i,
            vector=embedding.tolist(),
            payload={"text": chunk, "url": url}
        ) for i, (embedding, (chunk, url)) in enumerate(zip(embeddings, chunks))
    ]
    client.upsert(collection_name=collection_name, points=points)

def store_in_qdrant_with_metadata(chunks):
    dimension = 384  # Dimension for 'all-MiniLM-L6-v2'
    create_collection_if_not_exists(dimension)
    chunk_texts = [chunk for chunk, _ in chunks]
    embeddings = embedding_model.encode(chunk_texts, batch_size=32, show_progress_bar=True)
    upsert_points_with_metadata(embeddings, chunks)

def search_points_with_metadata(query_text, k=3):
    query_embedding = get_embedding(query_text)
    search_result = client.search(
        collection_name=collection_name,
        query_vector=query_embedding.tolist(),
        limit=k,
        with_payload=True
    )
    return [{"text": hit.payload["text"], "url": hit.payload["url"]} for hit in search_result]

def ask(query, k=3, p=False):
    retrieved_docs = search_points_with_metadata(query, k)
    
    combined_docs = "\n\n".join([f"Source: {doc['url']}\n{doc['text']}" for doc in retrieved_docs])
    inst = "Instruction: If you do not find the answer in the CONTEXT, just say you don't know."
    rag_prompt = f"{inst}\n\n<CONTEXT>\n{combined_docs}\n</CONTEXT>\n\nQuery: {query}\n"
    if p:
        print(rag_prompt)
        
    payload = {"model": ollama_model_name, "prompt": rag_prompt, "stream": True}
    headers = {"Content-Type": "application/json"}

    response_text = ""
    if p:
        response_text = rag_prompt
    buffer = ""

    response = session.post(ollama_url_gen, headers=headers, data=json.dumps(payload), stream=True)

    if response.status_code == 200:
        for chunk in response.iter_content(chunk_size=None):
            try:
                data = json.loads(chunk.decode('utf-8'))
                content = data.get("response", "")
                buffer += content

                if len(buffer) > 10:
                    response_text += buffer
                    clear_output(wait=True)
                    display(Markdown(response_text))
                    buffer = ""
                    
            except json.JSONDecodeError:
                continue

        response_text += buffer
        clear_output(wait=True)
        display(Markdown(response_text))
    else:
        print("Request failed:", response.status_code, response.text)

    return response_text

try:
    store_in_qdrant_with_metadata(scraped_chunks)
    print(f'Stored {len(scraped_chunks)} chunks')
except Exception as e:
    print(f"Error storing in Qdrant: {e}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Stored 13 chunks


In [48]:
_ = ask("Bangladesh Liberation War data?", p=False)

The death range for the Bangladesh Liberation War is 0.3-3 million, with a date of 1971, and multiple combatants including India and Provisional Government of Bangladesh vs. Pakistan. The location is the Indian subcontinent.

In [49]:
_ = ask("When was Federal War happened?")

I don't know when the Federal War occurred from the given CONTEXT.

In [50]:
_ = ask("How many died in Congo Crisis?")

I don't know. The Congo Crisis is not mentioned in the provided CONTEXT as a separate event, but it appears to be related to the Ituri conflict and other conflicts listed under "External links". Therefore, I couldn't find any information on the death toll of the Congo Crisis itself in the provided context.

In [45]:
_ = ask("Where did Second Congo War happend?")

Second Congo War happened in the Democratic Republic of the Congo.

In [44]:
_ = ask("What types of killings are excluded in the list?")

According to the CONTEXT, the list excludes mass killings and atrocities not explicitly classified as genocides, such as:

* Human sacrifices
* Ethnic cleansing operations
* Acts of state terrorism or political repression during peacetime.

In [42]:
_ = ask("Show the table data for Arab-Israeli conflict and Lebanese Civil War.", k=2, p=True)

Instruction: If you do not find the answer in the CONTEXT, just say you don't know.

<CONTEXT>
Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
List | War | Death range | Date | Combatants | Location ||| Algerian Civil War | 0.15 million[206] | 1992–2002 | Multiple sides | North Africa || Arab-Israeli conflict | 0.15 million[207][208][209][210] | 1948[g]–present | Israel vs. Arab League, Iran, Hezbollah, Hamas, and the Houthi movement | Levant || Lebanese Civil War | 0.12–0.15 million[212][213][214] | 1975–1990 | Multiple sides | Levant || Greek Civil War | 0.08–0.15 million[215][216] | 1946–1949 | Kingdom of Greece vs. Provisional Democratic Government | Balkans and Peloponnese Peninsula || Yugoslav Wars | 0.13–0.14 million[217][218] | 1991–2001 | Separatist forces and NATO vs. Socialist Federal Republic of Yugoslavia, later Federal Republic of Yugoslavia | Balkans || Irish Nine Year's War | 0.13 million[219] | 1593–1603 | Kingdom of England vs. Irish rebels | Ireland || Chaco War | 0.08–0.13 million[220][221][222] | 1932–1935 | Paraguay vs. Bolivia | Paraguay and Bolivia || Federal War | 0.1 million[223] | 1859–1863 | Federalists vs. Conservatives | Venezuela || Congo Crisis | 0.1 million[224] | 1960–1965 | Republic of the Congo, later Democratic Republic of the Congo, and allies vs. Free Republic of the Congo, South Kasai, Katanga, Kwilu rebels, Simba rebels, and allies | Republic of the Congo || Wars of Alexander the Great | 0.1 million[225][226][227] | 336 BCE–323 BCE | Macedonian Empire vs. Achaemenid Empire among others | Greece, Balkans, Anatolia, Persia, Middle East, Central Asia, and the Indian subcontinent || charts and graphs see also notes references further reading external links External links ||| External links ||| External links ||| External links |||

Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
List | War | Death range | Date | Combatants | Location ||| Goguryeo–Sui War | 0.3 million[153][154] | 598–614 | Sui Dynasty vs. Goguryeo | Manchuria and Korean Peninsula || Carlist Wars | 0.3 million[155] | 1833–1876 | Carlists vs. Liberals and Republicans | Iberian Peninsula || Iraqi conflict | 0.27–0.3 million[156] | 2003–2017 | Multiple sides | Levant || Gulf War | 0.17–0.3 million[157][158] | 1990–1991[e] | Kuwait and the United States-led coalition vs. Iraq | Kuwait and Iraq || Roman conquest of Britain | 0.13–0.29 million[159][160][161] | 43–84 | Roman Empire vs. Celtic Britons | Great Britain || Russo-Ukrainian War | 0.12–0.28 million[162][163][164][165][166] | 2014–present | Russia vs. Ukraine | Eastern Europe and the Black Sea || Philippine–American War | 0.21–0.27 million[167] | 1899–1913 | United States vs. Philippine Republic, later Tagalog Republic, Sultanate of Sulu and Sultanate of Maguindanao | Philippines || Burmese–Siamese wars | 0.26 million[168][169] | 1547–1855 | Thailand vs. Myanmar | Southeast Asia || Kalinga War | 0.25 million[170] | 262 BCE–261 BCE | Maurya Empire vs. Kalinga | Indian subcontinent || First Congo War | 0.25 million[171] | 1996–1997 | Zaire vs. AFDL | Democratic Republic of the Congo || Cristero War | 0.25 million[172] | 1926–1929 | Mexico vs. Cristeros | Mexico || Greek War of Independence | 0.24 million[173][174] | 1821–1829 | Ottoman Empire vs. Separatist First Hellenic Republic | Balkans and Peloponnese Peninsula || Myanmar conflict | 0.23 million[175][176] | 1948–present | National Unity Government of Myanmar vs. State Administration Council | Myanmar || American Revolution | 0.18–0.23 million[177][178] | 1775–1783 | American Patriots vs. Great Britain | North America ||
</CONTEXT>

Query: Show the table data for Arab-Israeli conflict and Lebanese Civil War.
Here is the table data for the requested wars:

1. Arab-Israeli conflict
   | War | Death range | Date | Combatants | Location |
   | Arab-Israeli conflict | 0.15 million[207][208][209][210] | 1948[g]–present | Israel vs. Arab League, Iran, Hezbollah, Hamas, and the Houthi movement | Levant |

2. Lebanese Civil War
   | War | Death range | Date | Combatants | Location |
   | Lebanese Civil War | 0.12–0.15 million[212][213][214] | 1975–1990 | Multiple sides | Levant |

In [43]:
_ = ask("this 'list excludes mass killings and atrocities' of what types?", k=5, p=True)

Instruction: If you do not find the answer in the CONTEXT, just say you don't know.

<CONTEXT>
Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
List | War | Death range | Date | Combatants | Location ||| Goguryeo–Sui War | 0.3 million[153][154] | 598–614 | Sui Dynasty vs. Goguryeo | Manchuria and Korean Peninsula || Carlist Wars | 0.3 million[155] | 1833–1876 | Carlists vs. Liberals and Republicans | Iberian Peninsula || Iraqi conflict | 0.27–0.3 million[156] | 2003–2017 | Multiple sides | Levant || Gulf War | 0.17–0.3 million[157][158] | 1990–1991[e] | Kuwait and the United States-led coalition vs. Iraq | Kuwait and Iraq || Roman conquest of Britain | 0.13–0.29 million[159][160][161] | 43–84 | Roman Empire vs. Celtic Britons | Great Britain || Russo-Ukrainian War | 0.12–0.28 million[162][163][164][165][166] | 2014–present | Russia vs. Ukraine | Eastern Europe and the Black Sea || Philippine–American War | 0.21–0.27 million[167] | 1899–1913 | United States vs. Philippine Republic, later Tagalog Republic, Sultanate of Sulu and Sultanate of Maguindanao | Philippines || Burmese–Siamese wars | 0.26 million[168][169] | 1547–1855 | Thailand vs. Myanmar | Southeast Asia || Kalinga War | 0.25 million[170] | 262 BCE–261 BCE | Maurya Empire vs. Kalinga | Indian subcontinent || First Congo War | 0.25 million[171] | 1996–1997 | Zaire vs. AFDL | Democratic Republic of the Congo || Cristero War | 0.25 million[172] | 1926–1929 | Mexico vs. Cristeros | Mexico || Greek War of Independence | 0.24 million[173][174] | 1821–1829 | Ottoman Empire vs. Separatist First Hellenic Republic | Balkans and Peloponnese Peninsula || Myanmar conflict | 0.23 million[175][176] | 1948–present | National Unity Government of Myanmar vs. State Administration Council | Myanmar || American Revolution | 0.18–0.23 million[177][178] | 1775–1783 | American Patriots vs. Great Britain | North America ||

Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
contents list of wars by death toll List of wars by death toll | 0 ||| Part of a series on || War (outline) || showHistory || showMilitary || showBattlespace || showWeapons || showTactics || showOperational || showStrategy || showGrand strategy || showAdministrative || showOrganization || showPersonnel || showLogistics || showScience || showLaw || showTheory || showNon-warfare || showCulture || showRelated || hideLists Battles Military occupations Military terms Operations Sieges War crimes Wars Weapons Writers || vte || this list of wars by death toll includes all deaths that are either directly or indirectly caused by war. these numbers include the deaths of military personnel which are the direct results of a battle or other military wartime actions, as well as wartime / war - related deaths of civilians which are often results of war - induced epidemics, famines, genocide, etc. due to incomplete records, the destruction of evidence, differing methods of counting, and various other reasons, death tolls of wars have often been quite uncertain, and heavily debated. while the definition of war isn't entirely clear - cut, there is a general understanding of what it is. merriam - webster defines war as " a state of usually open and declared armed hostile conflict between states or nations ", [ 1 ] oxford english dictionary defines war as " hostile contention by means of armed forces, carried on between nations, states, or rulers, or between parties in the same nation or state ; the employment of armed forces against a foreign power, or against an opposing party in the state ", [ 2 ] and encyclopædia britannica defines war as " a conflict between political groups involving hostilities of considerable duration and magnitude ". [ 3 ] this list excludes mass killings and atrocities not explicitly classified as genocides, as well as genocides occurring outside of wartime, human sacrifices, ethnic cleansing operations, and acts of state terrorism or political repression during peacetime. [ a ] list

Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
List | War | Death range | Date | Combatants | Location ||| Roman-Germanic wars | 0.54 million[108][109] | 113 BCE–774 | Roman Republic, later Roman Empire and Byzantine Empire vs. Germanic tribes | Germania || First Punic War | 0.4–0.54 million[110][111] | 264 BCE–241 BCE | Roman Republic vs. Ancient Carthage | Southern Europe, the Mediterranean Sea, and North Africa || Iran-Iraq War | 0.45–0.5 million[112][113] | 1980–1988 | Islamic Republic of Iran vs. Iraqi Republic | Iran and Iraq || Paraguayan War | 0.15–0.5 million[114] | 1864–1870 | Empire of Brazil, Argentina, and Uruguay vs. Paraguay | South America || Uganda-Tanzania War and Ugandan Bush War | 0.1–0.5 million[115][116][117][118] | 1978–1986 | Uganda vs. Tanzania, National Liberation Front, and National Resistance Movement | Uganda and Tanzania || Papua conflict | 0.1–0.5 million[119][120] | 1962–present | Indonesia vs. Free Papua Movement | New Guinea || Eighty Years' War | 0.1–0.5 million[121] | 1566–1648 | Spanish Empire vs. Separatist Dutch Republic | Low Countries || Spanish Civil War | 0.35–0.47 million[122][123][124] | 1936–1939[d] | Nationalists vs. Republicans | Iberian Peninsula || Colombian conflict | 0.45 million[128] | 1964–present | Colombia vs. Colombian and Mexican drug cartels and paramilitaries vs. FARC | Colombia || Polish–Ottoman Wars | 0.43 million[129][130] | 1485–1699 | Poland-Lithuania, Holy League, and allies vs. Ottoman Empire and allies | Central Europe and Balkans || Roman–Greek wars | 0.42 million[131] | 280 BCE–30 BCE | Roman Republic vs. Greek states, later Greek rebels and Ptolemaic Kingdom | Peloponnese Peninsula, Balkans, Anatolia, Egypt and Italy || Maratha invasions of Bengal | 0.4 million[132][133] | 1741–1751 | Maratha Confederacy vs. Bengal Nawab | Indian subcontinent ||

Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
List | War | Death range | Date | Combatants | Location ||| Algerian Civil War | 0.15 million[206] | 1992–2002 | Multiple sides | North Africa || Arab-Israeli conflict | 0.15 million[207][208][209][210] | 1948[g]–present | Israel vs. Arab League, Iran, Hezbollah, Hamas, and the Houthi movement | Levant || Lebanese Civil War | 0.12–0.15 million[212][213][214] | 1975–1990 | Multiple sides | Levant || Greek Civil War | 0.08–0.15 million[215][216] | 1946–1949 | Kingdom of Greece vs. Provisional Democratic Government | Balkans and Peloponnese Peninsula || Yugoslav Wars | 0.13–0.14 million[217][218] | 1991–2001 | Separatist forces and NATO vs. Socialist Federal Republic of Yugoslavia, later Federal Republic of Yugoslavia | Balkans || Irish Nine Year's War | 0.13 million[219] | 1593–1603 | Kingdom of England vs. Irish rebels | Ireland || Chaco War | 0.08–0.13 million[220][221][222] | 1932–1935 | Paraguay vs. Bolivia | Paraguay and Bolivia || Federal War | 0.1 million[223] | 1859–1863 | Federalists vs. Conservatives | Venezuela || Congo Crisis | 0.1 million[224] | 1960–1965 | Republic of the Congo, later Democratic Republic of the Congo, and allies vs. Free Republic of the Congo, South Kasai, Katanga, Kwilu rebels, Simba rebels, and allies | Republic of the Congo || Wars of Alexander the Great | 0.1 million[225][226][227] | 336 BCE–323 BCE | Macedonian Empire vs. Achaemenid Empire among others | Greece, Balkans, Anatolia, Persia, Middle East, Central Asia, and the Indian subcontinent || charts and graphs see also notes references further reading external links External links ||| External links ||| External links ||| External links |||

Source: https://en.wikipedia.org/wiki/List_of_wars_by_death_toll
List | War | Death range | Date | Combatants | Location ||| Chechen conflict | 0.08–0.23 million[179][180][181][182][183][184] | 1994–2009 | Russia vs. Separatist Chechen Republic of Ichkeria | Caucasus || Indian Invasion of Hyderabad | 0.2 million[185][186] | 1948 | India vs. Hyderabad | Indian subcontinent || La Violencia | 0.2 million[187] | 1948–1958 | Colombian Conservative Party vs. Colombian Liberal Party | Colombia || Greco-Persian Wars | 0.2 million[188] | 499 BCE–449 BCE | Greek city-states vs. Achaemenid Empire | Southeast Europe, West Asia, and Northeast Africa || Guatemalan Civil War | 0.14–0.2 million[189][190] | 1960–1996 | Government of Guatemala vs. Guatemalan National Revolutionary Unity | Central America || North Yemen civil war | 0.1–0.2 million[191][192] | 1962–1970 | Kingdom of Yemen vs. Yemen Arab Republic | Yemen || Italo-Senussi Wars | 0.07-0.2 million[193][194][195] | 1911–1934 | Kingdom of Italy vs. Senusiyya, Ottoman Empire | North Africa || Portuguese Colonial War | 0.14–0.18 million[196][197][198] | 1961–1974 | Estado Novo vs. MPLA, PAIGC, FNLA, among others | Angola, Guinea, and Mozambique || Thousand Days' War | 0.1–0.18 million[199] | 1899–1902 | Colombian Conservative Party vs. Colombian Liberal Party | Colombia || Sri Lankan Civil War | 0.08–0.17 million[200][201] | 1983[f]–2009 | Sri Lankan government vs. Separatist Liberation Tigers of Tamil Eelam | Sri Lanka || Russo-Japanese War | 0.12–0.16 million[203] | 1904–1905 | Empire of Japan vs. Russian Empire | East Asia || Sudanese civil war (2023–present) | 0.15 million[204][205] | 2023–present | Sudan and allies vs. Rapid Support Forces and allies | Sudan ||
</CONTEXT>

Query: this 'list excludes mass killings and atrocities' of what types?
According to the provided context, the list excludes "mass killings and atrocities" such as genocide, ethnic cleansing, and other forms of large-scale violence or brutality that are not typically considered part of a broader conflict.