In [77]:
import pandas as pd
import re
from collections import defaultdict

# load titles 
df = pd.read_csv("robot_privacy.csv")
titles = df.iloc[:, 0].tolist()

In [78]:
# keywords organized by theme (privacy, robot, sensor)
PRIVACY_TYPES = {
    # "Location privacy": ["location privacy", "trajectory", "receiver location", "source location", "sink location"],
    # "Visual privacy": ["camera", "vision", "visual", "depth", "thermal", "lidar", "cartooning"],
    # "Acoustic privacy": ["acoustic", "microphone", "speech", "audio"],
    # "Multimodal data privacy": ["multi-modal", "fusion", "multi-sensor", "heterogeneous"],
    # "Physiological / biometric privacy": ["ecg", "gait", "health", "wearable", "biometric"],
    # "General data privacy": ["data aggregation", "encryption", "homomorphic", "query", "anonymization"]

    # 1) Informational Privacy — control over information (invasion, collection, processing, dissemination)
    "Informational Privacy": [
        # Invasion / leakage / disclosure
        "privacy invasion", "information intrusion", "data breach", "data theft", "information theft",
        "data leakage", "information disclosure", "data disclosure", "data release",
        # Collection / monitoring
        "data collection", "information gathering", "data acquisition", "information acquisition",
        "sensor data collection", "monitoring", "surveillance", "tracking", "telemetry",
        # Processing / analysis & PETs
        "data processing", "information processing", "data analysis", "data analytics",
        "profiling", "data mining", "feature extraction", "signal processing", "in-sensor processing",
        "aggregation", "data aggregation", "secure aggregation", "privacy-preserving aggregation",
        # Query / database privacy
        "range query", "top-k query", "k-nn query", "set-membership", "exact query evaluation",
        "verifiable query", "privacy-preserving query", "query privacy",
        # Cryptography & foundations
        "encryption", "decryption", "homomorphic", "differential privacy", "local differential privacy",
        "pufferfish privacy", "multi-party computation", "secure multi-party computation", "smpc", "mpc",
        "federated learning", "federated averaging", "federated reinforcement learning",
        "zero-knowledge", "blind signature", "ring signature", "attribute-based", "access control",
        "authentication", "key agreement", "certificate-less", "privacy-preserving authentication",
        # De-identification / anonymization
        "anonymization", "anonymisation", "de-identification", "pseudonymization", "thumbnail-preserving",
        "obfuscation", "perturbation", "synthetic data", "data synthesization",
        # Location-privacy (treated as informational in WSN/IoT)
        "location privacy", "source-location privacy", "sink-location privacy", "receiver-location privacy",
        "trajectory privacy", "temporal privacy",
        # Routing / traffic-shaping techniques for privacy
        "phantom routing", "fake packet", "dummy traffic", "onion routing", "mix-ring", "mixing ring",
        "proxy node routing", "random walk routing", "credit routing", "jellyfish routing",
        # Data governance / storage
        "privacy-preserving storage", "privacy preserving storage", "data retention", "information sharing",
        "privacy-preserving data sharing", "data publishing", "privacy-preserving publishing",
        # Ledgers / blockchain
        "blockchain", "distributed ledger", "smart contract", "verifiable credential", "decentralized identifier",
    ],

    # 2) Physical Privacy — bodily/space boundaries, intrusion/obtrusion/contamination, modesty
    "Physical Privacy": [
        # Space & boundaries
        "personal space", "physical space", "proximity", "distance keeping", "boundary control",
        "physical boundaries", "perimeter", "territoriality", "territory",
        # Intrusion / obtrusion
        "physical intrusion", "unauthorized entry", "trespass", "obtrusion", "physical obstruction",
        "physical interference", "encroachment", "occlusion", "blocking",
        # Contamination / hygiene contexts
        "contamination", "biohazard", "contact-less", "contactless",
        # Modesty / exposure contexts
        "modesty", "nudity", "exposure", "body concealment", "decency",
        # HRI presence & proxemics
        "physical presence", "embodiment", "co-located", "co-present", "proxemics",
        # Places where physical privacy is salient
        "private room", "private space", "bathroom", "changing room",
    ],

    # 3) Social Privacy — control over social interactions (association, crowding/isolation, public gaze, solitude, intimacy, anonymity, reserve)
    "Social Privacy": [
        # Association / group privacy
        "association privacy", "group privacy", "relationship privacy", "association tracking",
        # Crowding / isolation
        "crowding", "overcrowding", "isolation", "social isolation", "loneliness",
        # Public gaze / bystanders
        "public gaze", "public viewing", "being watched", "public attention",
        "bystander", "onlooker", "passer-by", "public space", "public setting", "public environment",
        # Solitude / intimacy
        "solitude", "being alone", "retreat", "intimacy", "intimate interaction", "private interaction",
        # Anonymity / reserve
        "anonymity", "anonymous", "de-anonymization", "re-identification", "de-identification",
        "reserve", "self-restraint", "withholding information", "non-disclosure",
        # Contexts common in HRI
        "telepresence", "teleoperated robot", "teleoperation",
        "self-disclosure", "disclosure intention", "crowd monitoring", "crowd estimation",
        "bystander privacy", "onlooker privacy",
    ],

    # 4) Psychological Privacy — freedom of values/thoughts (interrogation, psychological distance)
    "Psychological Privacy": [
        "interrogation", "questioning", "probing", "psychological pressure",
        "psychological distance", "emotional distance", "mental space", "cognitive separation",
        # Common HRI phrasing
        "persuasion pressure", "undue influence", "manipulative questioning",
    ],
}

ROBOT_TYPES = {
    # 1) Robots designed for human interaction, companionship, or social engagement
    "Social robot": [
        "social robot", "companion robot", "assistive robot", "humanoid", "humanoid robot",
        "interactive robot", "entertainment robot", "educational robot", "tutoring robot",
        "therapy robot", "rehabilitation robot", "communication robot", "elderly care robot",
        "pet robot", "robot pet", "childcare robot", "personal robot"
    ],

    # 2) Robots providing services in commercial, industrial, or public contexts
    "Service robot": [
        "service robot", "delivery robot", "last-mile delivery robot", "food delivery robot",
        "hospital delivery robot", "logistics robot", "warehouse robot", "inventory robot",
        "cleaning robot", "vacuum robot", "robot vacuum", "floor cleaning robot",
        "industrial robot", "factory robot", "assembly robot", "manufacturing robot",
        "inspection robot", "maintenance robot", "repair robot", "security robot",
        "surveillance robot", "patrol robot", "airport robot", "hospitality robot"
    ],

    # 3) Robots enabling remote presence and interaction
    "Telepresence robot": [
        "telepresence robot", "remote presence robot", "video conferencing robot",
        "teleoperated robot", "teleoperation robot", "remote-controlled robot",
        "avatar robot", "robot avatar"
    ],

    # 4) Autonomous ground, aerial, or maritime vehicles
    "Autonomous vehicle": [
        "autonomous vehicle", "self-driving car", "driverless car", "robotaxi",
        "automated vehicle", "intelligent vehicle", "autonomous bus", "autonomous truck",
        "unmanned vehicle", "unmanned ground vehicle", "ugv",
        "unmanned aerial vehicle", "uav", "drone", "quadcopter", "hexacopter", "octocopter",
        "fixed-wing drone", "delivery drone", "inspection drone", "mapping drone",
        "agricultural drone", "surveillance drone", "unmanned surface vehicle", "usv",
        "autonomous ship", "autonomous boat", "unmanned underwater vehicle", "uuv",
        "autonomous submarine", "marine drone"
    ],

    # 5) Robots for medical, elder, or patient care
    "Care robot": [
        "care robot", "healthcare robot", "medical robot", "patient care robot",
        "nursing robot", "elder care robot", "geriatrics robot", "rehabilitation robot",
        "therapy robot", "hospital service robot", "assistive healthcare robot"
    ],

    # 6) Multiple robots working together
    "Multi-robot systems": [
        "multi-robot", "multi-robot system", "multi-robot systems",
        "swarm robot", "robot swarm", "robotic swarm", "collective robotics",
        "cooperative robot", "cooperative robots", "collaborative robot", "collaborative robots",
        "cobot", "cobots", "distributed robotic system", "heterogeneous robot team",
        "homogeneous robot team", "multi-agent robotic system"
    ],

    # 7) Robots for surgery and other medical procedures
    "Surgical robot": [
        "surgical robot", "robot-assisted surgery", "robotic surgery", "medical robot",
        "minimally invasive surgery robot", "da vinci robot", "orthopedic surgery robot",
        "neurosurgery robot", "laparoscopic robot", "endoscopic robot"
    ],

    # 8) Agricultural / Field robots for farming, forestry, and environmental monitoring
    "Agricultural / Field robot": [
        "agricultural robot", "farming robot", "farm robot", "harvest robot", "crop monitoring robot",
        "weeding robot", "forestry robot", "field robot", "agbot", "vineyard robot",
        "precision agriculture robot", "agriculture drone", "livestock monitoring robot"
    ],

    # 9) Construction / Infrastructure robots
    "Construction / Infrastructure robot": [
        "construction robot", "infrastructure robot", "bridge inspection robot", "building inspection robot",
        "pipe inspection robot", "sewer inspection robot", "road paving robot", "concrete printing robot",
        "demolition robot", "maintenance robot", "repair robot", "civil engineering robot"
    ],

    # 10) Exploration / Search & Rescue robots
    "Exploration / Search & Rescue robot": [
        "search and rescue robot", "sar robot", "disaster response robot",
        "hazardous environment robot", "exploration robot", "explorer robot",
        "space robot", "planetary rover", "mars rover", "lunar rover",
        "volcano exploration robot", "mine rescue robot", "nuclear inspection robot"
    ],

    # 11) Military / Defense robots
    "Military / Defense robot": [
        "military robot", "defense robot", "combat robot", "armed robot",
        "autonomous combat vehicle", "unmanned combat vehicle", "ucv",
        "unmanned ground combat vehicle", "ugcv", "explosive ordnance disposal robot",
        "bomb disposal robot", "eod robot", "surveillance robot", "reconnaissance robot",
        "tactical robot"
    ],

    # 12) Underwater / Maritime robots
    "Underwater / Maritime robot": [
        "autonomous underwater vehicle", "auv", "remotely operated vehicle", "rov",
        "underwater inspection robot", "submersible robot", "marine robot",
        "underwater drone", "autonomous submarine", "ocean exploration robot",
        "underwater maintenance robot", "hull cleaning robot"
    ]
}

SENSOR_TYPES = {
    # 1) Camera / vision-based sensors
    "Camera / vision": [
        "camera", "vision", "rgb camera", "rgb", "rgb-d", "rgbd",
        "depth camera", "depth sensor", "3d camera", "stereo camera", "stereo vision",
        "lidar", "liDAR", "light detection and ranging", "tof camera", "time-of-flight camera",
        "thermal camera", "thermal imaging", "thermal vision", "flir", "infrared camera",
        "multispectral camera", "multispectral imaging", "hyperspectral camera", "hyperspectral imaging",
        "fisheye camera", "wide-angle camera", "panoramic camera", "omnidirectional camera",
        "event camera", "dynamic vision sensor", "line-scan camera", "structured light sensor"
    ],

    # 2) Microphone / acoustic-based sensors
    "Microphone / acoustic": [
        "microphone", "acoustic sensor", "acoustic array", "microphone array",
        "audio sensor", "speech sensor", "voice sensor", "voice recorder",
        "audio recorder", "sound sensor", "acoustic camera", "binaural microphone",
        "ultrasonic microphone", "directional microphone", "parabolic microphone",
        "hydrophone", "underwater microphone", "sonar", "echosounder", "passive sonar",
        "active sonar", "acoustic localization", "acoustic ranging"
    ],

    # 3) Infrared / thermal arrays
    "Infrared / thermal array": [
        "infrared sensor", "ir sensor", "thermal array", "thermal sensor", "thermopile array",
        "pyroelectric sensor", "pir sensor", "passive infrared sensor", "ir camera",
        "ir thermometer", "thermal imaging array", "thermal detection array"
    ],

    # 4) Pressure / tactile sensing
    "Pressure / tactile": [
        "pressure sensor", "force sensor", "force-sensitive resistor", "fsr",
        "tactile sensor", "touch sensor", "capacitive touch sensor",
        "piezoelectric sensor", "load cell", "strain gauge", "haptic sensor",
        "skin sensor", "electronic skin", "e-skin", "robotic skin", "force-torque sensor"
    ],

    # 5) IMU / motion sensing
    "IMU / motion": [
        "imu", "inertial measurement unit", "accelerometer", "gyroscope", "magnetometer",
        "inertial sensor", "motion sensor", "movement sensor", "orientation sensor",
        "step counter", "pedometer", "inertial navigation system", "ins"
    ],

    # 6) RF / wireless-based sensing
    "RF / wireless": [
        "wifi", "wi-fi", "wi fi", "802.11", "rf", "radio frequency", "rfid", "nfc",
        "bluetooth", "ble", "zigbee", "zwave", "lpwan", "lorawan", "sigfox",
        "cellular", "lte", "4g", "5g", "6g",
        "uwb", "ultra wideband", "millimeter wave", "mmwave", "sub-6ghz",
        "backscatter", "ambient backscatter", "rf tomography", "radio tomographic imaging",
        "rf sensing", "rf localization"
    ],

    # 7) Multimodal / sensor fusion
    "Multimodal / fusion": [
        "sensor fusion", "multi-sensor", "multisensor", "multi-modal", "multimodal",
        "cross-modal", "crossmodal", "data fusion", "sensor network", "heterogeneous sensor",
        "distributed sensing", "cooperative sensing", "integrated sensing"
    ],

    # 8) Other specialized sensing
    "Other specialized": [
        "biosensor", "biometric sensor", "biopotential sensor", "ecg", "electrocardiogram",
        "eeg", "electroencephalogram", "emg", "electromyogram", "pulse oximeter",
        "gait sensor", "respiration sensor", "heart rate sensor", "ppg", "photoplethysmography",
        "radar", "millimeter wave radar", "mmwave radar", "fmcw radar", "doppler radar",
        "ultrasonic sensor", "sonar sensor", "proximity sensor", "distance sensor",
        "environmental sensor", "gas sensor", "chemical sensor", "co2 sensor",
        "light sensor", "lux sensor", "ambient light sensor", "photodiode", "phototransistor",
        "magnetic sensor", "hall effect sensor", "compass sensor", "geophone", "seismic sensor"
    ]
}

MITIGATION_STRATEGIES = {
    # 1) Privacy by Design — embedding privacy & security protections in the design phase
    "Privacy by Design": [
        "privacy by design", "secure by design", "security by design", "built-in privacy",
        "embedded privacy", "privacy-aware design", "privacy-preserving design",
        "privacy-preserving architecture", "secure architecture", "privacy-preserving system",
        "privacy-preserving framework", "secure framework", "privacy-preserving protocol",
        "secure protocol", "lightweight privacy protocol", "privacy-enhancing technology",
        "pet", "privacy-preserving AI", "privacy-preserving machine learning",
        "federated learning", "federated reinforcement learning", "differential privacy",
        "local differential privacy", "homomorphic encryption", "secure multi-party computation",
        "multi-party computation", "mpc", "secure hardware", "trusted execution environment",
        "secure enclave", "sgx", "arm trustzone", "hardware security module", "hsm",
        "privacy-aware sensing", "privacy-preserving sensing", "privacy-aware robot",
        "privacy-preserving robot", "privacy-aware autonomous vehicle",
        "privacy-preserving navigation", "access control design", "data minimization",
        "privacy risk assessment", "threat modeling", "privacy impact assessment"
    ],

    # 2) Control — user ability to control data, access, robot capabilities, and areas
    "Control": [
        # Data control
        "data deletion", "right to be forgotten", "data retention", "data storage location",
        "data sovereignty", "data localization", "amend incorrect data", "data correction",
        "restrict data collection", "prohibit data collection", "prohibit video collection",
        "prohibit audio collection", "restrict sensor data", "disable logging",
        # Access control
        "access limitation", "restrict access", "stakeholder-only access", "role-based access control",
        "attribute-based access control", "hardware access key", "password protection",
        "two-factor authentication", "access card", "key fob", "secure login",
        # Robot control
        "disable robot function", "disable sensor", "turn camera away", "switch off camera",
        "mute microphone", "snooze sensor", "pause robot", "privacy mode",
        "disable navigation", "disable mapping", "turn off lidar", "disable gps",
        # Area control
        "no-go zone", "geofencing", "virtual boundary", "spatial restriction",
        "restrict robot movement", "limit movement", "spatial privacy"
    ],

    # 3) Transparency — making data, sensors, and intentions understandable to users
    "Transparency": [
        # Data transparency
        "data transparency", "data disclosure", "data use disclosure", "purpose disclosure",
        "data collection purpose", "data sharing purpose", "data usage explanation",
        "datafication process", "data processing details", "data provenance", "data traceability",
        # Informed consent
        "informed consent", "iterative consent", "rolling consent", "granular consent",
        "layered consent", "just-in-time consent", "user comprehension", "plain language notice",
        "consent dialog", "consent form", "privacy prompt",
        # Sensor transparency
        "sensor transparency", "sensor capability disclosure", "display sensors",
        "visible sensors", "sensor labeling", "sensor indicator light",
        "show active sensors", "sensor usage explanation", "sensor awareness",
        "sensor education", "accessible for older adults", "elder-friendly interface"
    ],

    # 4) Regulation — government and industry rules for privacy protection
    "Regulation": [
        "privacy regulation", "privacy law", "privacy legislation", "privacy compliance",
        "data protection law", "data protection act", "gdpr", "general data protection regulation",
        "ccpa", "california consumer privacy act", "hipaa", "health insurance portability and accountability act",
        "ferpa", "coppa", "children's online privacy protection act", "nist privacy framework",
        "iso/iec 27701", "data protection officer", "policy enforcement", "privacy enforcement",
        "regulatory compliance", "regulatory requirement"
    ],

    # 5) Contextual Integrity — adapting privacy norms to context and relationships
    "Contextual Integrity": [
        "contextual integrity", "privacy norms", "social norms", "norms of information flow",
        "context-aware privacy", "context-sensitive privacy", "context-aware access control",
        "purpose limitation", "appropriate use", "role-based disclosure",
        "only share with authorized person", "authorized recipient", "privacy expectation",
        "user expectation", "situational privacy", "boundary regulation", "norm compliance",
        "norm violation", "norm adaptation", "intent-aware privacy", "recipient intention"
    ]
}

In [79]:
# filtering functions

# Assign categories
def categorize(title, theme):
    t = title.lower()
    matched = []
    for category, keywords in theme.items():
        for kw in keywords:
            if kw in t:
                matched.append(category)
    return matched

# Count per category
def category_counts(list):
    counter = defaultdict(int)
    for categories in list:
        for c in categories:
            counter[c] += 1
    return dict(counter)

In [80]:
# sorting into categories

privacy_list = []
robot_list = []
sensor_list = []
mitigations_list = []

for t in titles:
    privacy_list.append(categorize(t, PRIVACY_TYPES))
    robot_list.append(categorize(t, ROBOT_TYPES))
    sensor_list.append(categorize(t, SENSOR_TYPES))
    mitigations_list.append(categorize(t,MITIGATION_STRATEGIES))

df["privacy type"] = privacy_list
df["robot type"] = robot_list
df["sensor type"] = sensor_list
df["mitigation strategy"] = mitigations_list

In [81]:
# counting number in each category

privacy_counts = category_counts(df["privacy type"])
robot_counts = category_counts(df["robot type"])
sensor_counts = category_counts(df["sensor type"])
mitigation_counts = category_counts(df["mitigation strategy"])

In [82]:
print(f"Total papers: {len(df)}")
print("\nPrivacy type counts:", privacy_counts)
print("\nRobot type counts:", robot_counts)
print("\nSensor type counts:", sensor_counts)
print("\nMitigation strategies counts:", mitigation_counts)

Total papers: 164

Privacy type counts: {'Informational Privacy': 20, 'Social Privacy': 11, 'Physical Privacy': 1}

Robot type counts: {'Telepresence robot': 8, 'Social robot': 41, 'Care robot': 2, 'Service robot': 9, 'Multi-robot systems': 11, 'Underwater / Maritime robot': 2, 'Autonomous vehicle': 1, 'Surgical robot': 3}

Sensor type counts: {'RF / wireless': 12, 'Camera / vision': 9, 'Multimodal / fusion': 2, 'IMU / motion': 4}

Mitigation strategies counts: {'Privacy by Design': 13, 'Control': 2, 'Contextual Integrity': 1}


In [83]:
# df.to_csv("categorized_autonomous_car_privacy.csv",index=False)