In [26]:
import warnings
warnings.filterwarnings("ignore")

In [12]:
# load credentials
import os
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")

## Load documents

In [28]:
# document loader
import os
from langchain_community.document_loaders import Docx2txtLoader 

file_path = "d:/NextCountry/db/"
docx_files = [file for file in os.listdir(file_path) if file.endswith('.docx')]


all_files = []  # To store pages from all files

for docx in docx_files:
    # print(f"Processing {docx} file...")  # Optional: Show progress
    loader = Docx2txtLoader(file_path=os.path.join(file_path, docx))
    
    #  Load the documents for this file and append to `all_files`
    for doc in loader.lazy_load():
        all_files.append(doc)

# Output all collected pages
print(f"✅ Total documents loaded: {len(all_files)}")

✅ Total documents loaded: 1


In [29]:
# Splitting document
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 300,
        separators=[
        "\n"]
)

splits = text_splitter.split_documents(all_files)
# splits
len(splits)

38

## Embedding Engine
---
HF credentials needed

In [31]:
from langchain_huggingface import HuggingFaceEmbeddings

#NOTE: currently this is local model, need api inference
hf_embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-m3",
    show_progress=True,
)

print("embedding model loaded")

embedding model loaded


## Vector Database Connection

In [30]:
from astrapy import DataAPIClient

# Initialize the client
client = DataAPIClient(ASTRA_DB_APPLICATION_TOKEN)
db = client.get_database_by_api_endpoint(
  ASTRA_DB_API_ENDPOINT
)

print(f"Connected Database: {db.info().name}\nCollections found: {db.list_collection_names()}")

Connected Database: next_country
Collections found: ['uk_visa_details']


In [None]:
# create collection
from langchain_astradb import AstraDBVectorStore

vector_store = AstraDBVectorStore(
    collection_name="uk_visa_details",      # this collection gets created automatically
    embedding=hf_embedding_model,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    namespace="default_keyspace",
    # autodetect_collection=True,   # set to True whille using it
)

vector_store.collection_name

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00,  9.32it/s]


'uk_visa_details'

### Pushing data to collection

In [19]:
# push embedding to collection
for i in range(0, len(splits), 10):
    chunk = splits[i:i+10]
    try:
        # Add the chunk to the vector store
        vector_store.add_documents(documents=chunk)
        print(f"Chunk {i//10} added successfully")
    except Exception as e:
        print(f"Error adding chunk {i//10}: {e}")
        continue
    

Batches: 100%|██████████| 1/1 [00:07<00:00,  7.99s/it]


Chunk 0 added successfully


Batches: 100%|██████████| 1/1 [00:08<00:00,  8.92s/it]


Chunk 1 added successfully


Batches: 100%|██████████| 1/1 [00:08<00:00,  8.46s/it]


Chunk 2 added successfully


Batches: 100%|██████████| 1/1 [00:05<00:00,  5.78s/it]


Chunk 3 added successfully


In [20]:
print(f"Embeddings loaded to: {vector_store.collection_name}")

Embeddings loaded to: uk_visa_details


## Check Embeddings

In [22]:
# as data is already loaded

vector_store = AstraDBVectorStore(
    embedding = hf_embedding_model,
    collection_name="uk_visa_details",
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
    autodetect_collection=True,
)

In [25]:
# sanity check for similarity search
results = vector_store.similarity_search_with_score(
    query="Documents for UK Visa",
    k=1,
)
for res, score in results:
    print(f" --->[Similarity score={score:3f}]\n{res.page_content}\n[{res.metadata}]\n")

Batches: 100%|██████████| 1/1 [00:00<00:00, 13.58it/s]


 --->[Similarity score=14.789062]
8. Evidence and Documentation

A. Mandatory Documents

Valid passport or travel document

Proof of financial means (bank statements, payslips)

Details of accommodation (hotel booking, invitation letter)

Travel itinerary (if available)

Evidence of employment or study in home country

Previous travel history (visas, entry/exit stamps)

Letter of invitation/sponsorship (if applicable)

Parental consent for children

B. Other Supporting Documents

Proof of ties to home country (property deeds, family documents)

Medical treatment evidence (if applicable)

Conference/event invitations (for business visitors)

9. Period and Conditions of Stay

Standard grant: Up to 6 months per visit.

Long-term multiple entry visas: Valid for 2, 5, or 10 years, but maximum 6 months per entry.

No extensions except for certain medical or academic cases.

Must leave the UK at or before visa expiry.

10. Common Reasons for Refusal

Doubts about genuine intention to visit or