In [1]:
pip install sentence-transformers pandas scikit-learn

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import json

In [3]:
with open("iso27002_extracted.json", "r", encoding="utf-8") as f:
    iso_data = json.load(f)

with open("cleaned_sopsv2.json", "r", encoding="utf-8") as f:
    sop_data = json.load(f)

In [10]:
model = SentenceTransformer("all-MiniLM-L6-v2")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [11]:
sop_text = sop_data[4]["text"]
results = []


In [12]:
for clause_id, content in iso_data.items():
    clause_name = content.get("Clause Name", "").strip()
    control = content.get("Control", "").strip()
    purpose = content.get("Purpose", "").strip()
    guidance = content.get("Guidance", "").strip()

    if not (control or purpose or guidance):
        continue

    full_text = f"{control} {purpose} {guidance}".strip()

    embeddings = model.encode([full_text, sop_text])

    score = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

    results.append({
        "Clause ID": clause_id,
        "Clause Name": clause_name,
        "Similarity Score": round(score, 3),
        "Remarks": "Relevant" if score > 0.5 else "Not Relevant"
    })

In [13]:
df = pd.DataFrame(results).sort_values(by="Similarity Score", ascending=False).reset_index(drop=True)

In [14]:
df.to_csv("ISO_to_SOP_Similarity_Report_4_1.csv", index=False)
print(df.head(20))

   Clause ID                                        Clause Name  \
0       5.27      Learning fr om information security incidents   
1       5.26        R esponse to information security incidents   
2       5.24  Information secur ity incident management plan...   
3       5.33                             Pr otection of records   
4       5.35        Independent r eview of information security   
5       5.36  C ompliance with policies, rules and standards...   
6       5.21  Managing informa tion security in the ICT supp...   
7       5.22  Monit oring, review and change management of s...   
8       5.37                   Document ed operating procedures   
9       5.34                     Pri vacy and protection of PII   
10       6.5  R esponsibilities after termination or change ...   
11      5.23    Information secur ity for use of cloud services   
12       6.4                              Disciplinary pr ocess   
13      5.31  Leg al, statutory, regulatory and contractual ..