In [106]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer

import numpy as np
import torch
import json

import sandra
from sandra.torch import ReasonerModule

# Load the dataset

In [145]:
dataset = [json.loads(l) for l in open("dataset.jsonl").readlines()]
data = {}
data["text"] = [sample["text"] for sample in dataset]
data["role"] = [sample["roles"] for sample in dataset]
data["label"] = [sample["label"] for sample in dataset]

targets = set(data["label"])

In [146]:
print(len(data["text"]), "samples")
print("targets: ", targets)

150 samples
targets:  {'Bathroom', 'DiningRoom', 'Bedroom', 'Kitchen', 'LivingRoom'}


## Feature based

In [147]:
X = data["text"]

# turn target into numerical values
le = LabelEncoder()
y = le.fit_transform(data["label"])

### N-grams

In [156]:
vectorizer = CountVectorizer() # only take 3 grams
X_vec = vectorizer.fit_transform(X)

In [157]:
# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.3, random_state=42)

Classify

In [158]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print(classification_report(y_test, pred, zero_division=0, target_names=targets))

              precision    recall  f1-score   support

    Bathroom       0.88      0.88      0.88         8
  DiningRoom       1.00      0.89      0.94         9
     Bedroom       1.00      1.00      1.00         9
     Kitchen       1.00      1.00      1.00        10
  LivingRoom       0.80      0.89      0.84         9

    accuracy                           0.93        45
   macro avg       0.93      0.93      0.93        45
weighted avg       0.94      0.93      0.93        45



In [159]:
clf = SVC(random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print(classification_report(y_test, pred, zero_division=0, target_names=targets))

              precision    recall  f1-score   support

    Bathroom       1.00      1.00      1.00         8
  DiningRoom       1.00      1.00      1.00         9
     Bedroom       1.00      1.00      1.00         9
     Kitchen       1.00      1.00      1.00        10
  LivingRoom       1.00      1.00      1.00         9

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



### Deductive approach - sandra

In [160]:
ontology = sandra.Ontology.from_graph("sandra101.ttl")

print("Descriptions: ", len(ontology.descriptions))
print("Role: ", len(ontology.roles))

Descriptions:  5
Role:  15


Build the situations

In [161]:
# build the role name -> role dictionary
rolename2role = { r.name.split("#")[1]: r for r in ontology.roles}

# map each role to a situation
situations = [
    sandra.Situation([rolename2role[r] for r in sample])
    for sample in data["role"]
]

create reasoner

In [162]:
reasoner = ReasonerModule(ontology)

In [163]:
sample = situations[0]
print("text: ", data["text"][0])

text:  The spacious bathroom features a modern shower system, under-sink cabinets for hygiene products, and decorative candles on the shelves.


In [164]:
# compute the encoding
encoded_sample = reasoner.encode(sample)
encoded_sample

tensor([0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.])

infer

In [165]:
inference = reasoner(encoded_sample)[0]
inference

tensor([1.0000, 0.0000, 0.3333, 0.0000, 0.3333])

In [166]:
for idx, p in enumerate(inference):
    print(ontology.descriptions[idx].name, " - Probability: ", p.item())

    for role in ontology.descriptions[idx].components:
        print("\t", "+" if role in sample.individuals else "-", role.name)

https://w3id.org/sandra/sandra101#Bathroom  - Probability:  1.0
	 + https://w3id.org/sandra/sandra101#HygieneProducts
	 + https://w3id.org/sandra/sandra101#HygieneEquipment
https://w3id.org/sandra/sandra101#Bedroom  - Probability:  0.0
	 - https://w3id.org/sandra/sandra101#PersonalItems
	 - https://w3id.org/sandra/sandra101#ClothingStorage
	 - https://w3id.org/sandra/sandra101#SleepingFurniture
https://w3id.org/sandra/sandra101#DiningRoom  - Probability:  0.3333333432674408
	 + https://w3id.org/sandra/sandra101#DecorativeItems
	 - https://w3id.org/sandra/sandra101#DiningFurniture
	 - https://w3id.org/sandra/sandra101#ServingItems
https://w3id.org/sandra/sandra101#Kitchen  - Probability:  0.0
	 - https://w3id.org/sandra/sandra101#FoodStorage
	 - https://w3id.org/sandra/sandra101#CookingUtensil
	 - https://w3id.org/sandra/sandra101#CookingAppliance
https://w3id.org/sandra/sandra101#LivingRoom  - Probability:  0.3333333432674408
	 + https://w3id.org/sandra/sandra101#DecorativeItems
	 - ht

infer all

In [167]:
encoded_situation = torch.stack([reasoner.encode(s) for s in situations])
encoded_situation.shape

torch.Size([150, 15])

Classify on the situation fv

In [172]:
X_train, X_test, y_train, y_test = train_test_split(encoded_situation, y, test_size=0.3, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print(classification_report(y_test, pred, zero_division=0, target_names=targets))

              precision    recall  f1-score   support

    Bathroom       1.00      1.00      1.00         8
  DiningRoom       0.90      1.00      0.95         9
     Bedroom       1.00      0.89      0.94         9
     Kitchen       0.89      0.80      0.84        10
  LivingRoom       0.90      1.00      0.95         9

    accuracy                           0.93        45
   macro avg       0.94      0.94      0.94        45
weighted avg       0.94      0.93      0.93        45



what on only the inferred descriptions

In [171]:
inferred_descriptions = reasoner(encoded_situation)
inferred_descriptions.shape

torch.Size([150, 5])

classify on inference

In [173]:
X_train, X_test, y_train, y_test = train_test_split(inferred_descriptions, y, test_size=0.3, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print(classification_report(y_test, pred, zero_division=0, target_names=targets))

              precision    recall  f1-score   support

    Bathroom       1.00      1.00      1.00         8
  DiningRoom       1.00      1.00      1.00         9
     Bedroom       1.00      1.00      1.00         9
     Kitchen       1.00      1.00      1.00        10
  LivingRoom       1.00      1.00      1.00         9

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



classify on situation + inferece

In [179]:
X = torch.cat([encoded_situation, inferred_descriptions], dim=1)
X[:2]

tensor([[0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3333,
         0.0000, 0.3333],
        [1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.6667, 0.0000,
         0.0000, 0.3333]])

In [180]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print(classification_report(y_test, pred, zero_division=0, target_names=targets))

              precision    recall  f1-score   support

    Bathroom       1.00      1.00      1.00         8
  DiningRoom       1.00      1.00      1.00         9
     Bedroom       1.00      1.00      1.00         9
     Kitchen       1.00      1.00      1.00        10
  LivingRoom       1.00      1.00      1.00         9

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



### Combine distributiona and Deductive

In [183]:
X = torch.cat([torch.tensor(X_vec.toarray()), inferred_descriptions], dim=1)

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)

print(classification_report(y_test, pred, zero_division=0, target_names=targets))

              precision    recall  f1-score   support

    Bathroom       1.00      1.00      1.00         8
  DiningRoom       1.00      1.00      1.00         9
     Bedroom       1.00      1.00      1.00         9
     Kitchen       1.00      1.00      1.00        10
  LivingRoom       1.00      1.00      1.00         9

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

