In [1]:
import cv2
print(cv2.__version__)

4.11.0


In [2]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("Device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))

CUDA Available: True
Device: cuda


In [3]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is available
print(torch.cuda.device_count())  # Should return number of GPUs
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

True
1
NVIDIA GeForce GTX 1650 Ti


In [6]:
import torch

if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"Total VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"Allocated VRAM: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")
    print(f"Cached VRAM: {torch.cuda.memory_reserved(0) / 1e9:.2f} GB")
else:
    print("No GPU detected")

GPU Name: NVIDIA GeForce GTX 1650 Ti
Total VRAM: 4.29 GB
Allocated VRAM: 7.55 GB
Cached VRAM: 7.56 GB


In [4]:
!pip install keybert sentence-transformers

Collecting keybert
  Downloading keybert-0.9.0-py3-none-any.whl.metadata (15 kB)
Downloading keybert-0.9.0-py3-none-any.whl (41 kB)
Installing collected packages: keybert
Successfully installed keybert-0.9.0


In [5]:
import os
import json
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer

# Define paths
DATA_PATH = "../data/json"
NEW_DATA_PATH = "../data/new_json"

# Auto-detect device (Prefer CUDA if available)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load MiniLM model for KeyBERT
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2").to(device)
kw_model = KeyBERT(model)  # Initialize KeyBERT with MiniLM

# Ensure output directory exists
os.makedirs(NEW_DATA_PATH, exist_ok=True)

def extract_keywords(query, solution, top_n=5):
    """Extract relevant keywords using KeyBERT"""
    text = f"{query} {solution}"  # Combine query & solution

    # Extract keywords & phrases
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=top_n)

    # Return keywords as a list of words/phrases
    return [kw[0] for kw in keywords]

# Process each JSON file in /data/json/
for filename in os.listdir(DATA_PATH):
    if filename.endswith(".json"):
        file_path = os.path.join(DATA_PATH, filename)

        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)  # Expecting a list of dictionaries

        if isinstance(data, list):  # Ensure it's a list
            for entry in data:
                query = entry.get("Query", "")
                solution = entry.get("Solution", "")

                if query and solution:
                    entry["keywords"] = extract_keywords(query, solution)

        # Save updated JSON to new path
        new_file_path = os.path.join(NEW_DATA_PATH, filename)
        with open(new_file_path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=4)

print("✅ Keyword extraction completed. Updated files are saved in /data/new_json/")

Using device: cuda
✅ Keyword extraction completed. Updated files are saved in /data/new_json/
