In [None]:
# If you haven't installed Transformers or openpyxl, uncomment and run:
# !pip install transformers openpyxl

import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


In [9]:
# Replace with the correct path/filename for your .xlsx
df = pd.read_excel("scenarios.xlsx", engine="openpyxl")

# Let's see the first rows and columns
display(df.head(5))
print(df.columns)


Unnamed: 0,Scenario ID,User,Assistant - Extended,Assistant - Short,Assistant - Details,Assistant - Risk ID,Assistant - Risk description,Assistant - Vulnerability ID,Assistant - Vulnerability description,Assistant - Risk occurrence type
0,S1,The processing center is located in the basem...,The described scenario presents at least one ...,Yes,The system is vulnerable to flooding,M3,Flooding,V27,Inadequate flood protection,Real
1,S1,The processing center is located in the basem...,The described scenario presents at least one ...,Yes,The system is vulnerable to explosions or col...,M13,Destruction of the CED area,V38,System not protected from explosions,Real
2,S2,Confidential documents are stored in an archi...,The scenario is not sufficiently characterize...,More,Details on the physical characteristics of th...,M15,Physical theft of media,V32,Inadequate access control to the CED area,Potential
3,S2,Confidential documents are stored in an archi...,The scenario is not sufficiently characterize...,More,Details on the physical characteristics of th...,M25,Sabotage,V32,Inadequate access control to the CED area,Potential
4,S3,The connection between the data center and the...,The scenario is not sufficiently characterize...,More,Details on data line protection are missing,M18,Interception and espionage,V1,Communication channels not adequately protected,Potential


Index([' Scenario ID', ' User', ' Assistant - Extended', ' Assistant - Short',
       ' Assistant - Details', ' Assistant - Risk ID',
       ' Assistant - Risk description', ' Assistant - Vulnerability ID',
       ' Assistant - Vulnerability description',
       ' Assistant - Risk occurrence type'],
      dtype='object')


In [10]:
for col in df.columns:
    print(repr(col))


' Scenario ID'
' User'
' Assistant - Extended'
' Assistant - Short'
' Assistant - Details'
' Assistant - Risk ID'
' Assistant - Risk description'
' Assistant - Vulnerability ID'
' Assistant - Vulnerability description'
' Assistant - Risk occurrence type'


In [11]:
for col in df.columns:
    print(f"'{col}'")


' Scenario ID'
' User'
' Assistant - Extended'
' Assistant - Short'
' Assistant - Details'
' Assistant - Risk ID'
' Assistant - Risk description'
' Assistant - Vulnerability ID'
' Assistant - Vulnerability description'
' Assistant - Risk occurrence type'


In [12]:
df.columns = df.columns.str.strip()


In [None]:
pfor col in df.columns:
    print(repr(col))

In [None]:
df.dropna(
    subset=["Assistant - Risk description", "Assistant - Vulnerability description"],
    inplace=True
)


df.reset_index(drop=True, inplace=True)

print("DataFrame after dropping missing rows:", len(df))
display(df.head(5))


DataFrame after dropping missing rows: 1199


Unnamed: 0,Scenario ID,User,Assistant - Extended,Assistant - Short,Assistant - Details,Assistant - Risk ID,Assistant - Risk description,Assistant - Vulnerability ID,Assistant - Vulnerability description,Assistant - Risk occurrence type
0,S1,The processing center is located in the basem...,The described scenario presents at least one ...,Yes,The system is vulnerable to flooding,M3,Flooding,V27,Inadequate flood protection,Real
1,S1,The processing center is located in the basem...,The described scenario presents at least one ...,Yes,The system is vulnerable to explosions or col...,M13,Destruction of the CED area,V38,System not protected from explosions,Real
2,S2,Confidential documents are stored in an archi...,The scenario is not sufficiently characterize...,More,Details on the physical characteristics of th...,M15,Physical theft of media,V32,Inadequate access control to the CED area,Potential
3,S2,Confidential documents are stored in an archi...,The scenario is not sufficiently characterize...,More,Details on the physical characteristics of th...,M25,Sabotage,V32,Inadequate access control to the CED area,Potential
4,S3,The connection between the data center and the...,The scenario is not sufficiently characterize...,More,Details on data line protection are missing,M18,Interception and espionage,V1,Communication channels not adequately protected,Potential


: 

In [None]:
# This loads the model from Hugging Face. If you haven't done so, 
# you may need to log in or accept the license on the Hugging Face page
# for AIDC-AI/Marco-o1, or you have a local checkpoint.

model_name = "AIDC-AI/Marco-o1"
tokenizer = AutoTokenizer.from_pretrained("AIDC-AI/Marco-o1")
model = AutoModelForCausalLM.from_pretrained("AIDC-AI/Marco-o1")



In [None]:
def analyze_scenario(text):
    """
    This function takes some text (e.g., vulnerability description),
    tokenizes it, runs the Marco-o1 model, and returns the generated output.
    """
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True
    ).to(device)
    
    # You can adjust max_length, temperature, etc. as needed
    outputs = model.generate(
        **inputs,
        max_length=100,
        temperature=0.7,
        do_sample=True
    )
    
    # Decode the first (and only) sequence of tokens
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# We'll pass "Assistant - Vulnerability description" into the model
# but you can combine it with "Assistant - Risk description" if you want.
df["Reasoning"] = df["Assistant - Vulnerability description"].apply(analyze_scenario)

# Show the first few results
df[["Assistant - Vulnerability description", "Reasoning"]].head(10)

In [None]:
df.to_excel("scenarios_with_reasoning.xlsx", index=False)
# or df.to_csv("scenarios_with_reasoning.csv", index=False, encoding="utf-8")
print("Saved to scenarios_with_reasoning.xlsx")