In [28]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
EMBEDDER = "BAAI/bge-base-en-v1.5"
FAISS_PATH = "vector_db/"
SEARCH_KWARGS = {'k' : 3}

In [12]:
mac_dataset = [
    ("List all installed Homebrew packages", "brew list"),
    ("Show detailed information about the macOS version", "sw_vers"),
    ("Display information about disk space usage", "df -h"),
    ("Show top processes by CPU usage", "top -o cpu"),
    ("Check if port 443 on example.com is open using telnet", "echo | telnet example.com 443"),
    ("Check open ports on a host using nc", "nc -zv example.com 443"),
    ("List all installed applications", "ls /Applications"),
    ("Display information about system hardware", "system_profiler"),
    ("Show connected USB devices", "system_profiler SPUSBDataType"),
    ("Check the macOS firewall status", "sudo /usr/libexec/ApplicationFirewall/socketfilterfw --getglobalstate"),
    ("List all running processes", "ps aux"),
    ("Show information about network interfaces", "ifconfig"),
    ("Display detailed information about a file", "stat filename"),
    ("List available Wi-Fi networks", "/System/Library/PrivateFrameworks/Apple80211.framework/Versions/A/Resources/airport -s"),
    ("Check the status of macOS Gatekeeper", "spctl --status"),
    ("List all user accounts", "dscl . -list /Users"),
    ("Show the current date and time", "date"),
    ("List all connected Bluetooth devices", "system_profiler SPBluetoothDataType"),
    ("Check DNS resolution for a domain", "nslookup example.com"),
    ("Display detailed information about a specific process", "ps -p <process_id> -o pid,ppid,command,%cpu,%mem"),
    ("List all active network connections", "netstat -an"),
    ("Display information about the macOS kernel", "uname -a"),
    ("Check system uptime", "uptime"),
    ("List all installed Python packages", "pip list"),
    ("Check available updates for installed software", "softwareupdate -l"),
    ("Show information about macOS disk partitions", "diskutil list"),
    ("Display information about the macOS audio devices", "system_profiler SPAudioDataType"),
    ("Check for available updates for macOS", "softwareupdate -l"),
    ("List all available shells on the system", "cat /etc/shells"),
    ("Show macOS system log", "log show"),
    ("Check kernel extensions", "kextstat"),
    ("Display information about the macOS file system", "diskutil info /"),
    ("List all connected FireWire devices", "system_profiler SPSerialATADataType"),
    ("Check for available updates for Ruby gems", "gem outdated"),
    ("Display detailed information about the macOS memory", "vm_stat"),
    ("List all installed Ruby gems", "gem list"),
    ("Show information about macOS system preferences", "sudo system_profiler SPConfigurationProfileDataType"),
    ("Check available disk space on a specific directory", "du -h /path/to/directory"),
    ("List all user groups", "dscl . -list /Groups"),
    ("Display macOS environment variables", "printenv"),
    ("Check macOS system integrity with SIP status", "csrutil status"),
    ("Show detailed information about system memory", "sysctl hw"),
    ("List all user accounts with User ID", "dscl . -list /Users UniqueID"),
    ("Display information about the macOS battery", "ioreg -rn AppleSmartBattery"),
    ("Check disk usage for a specific directory", "du -h /path/to/directory"),
    ("List all macOS system services", "launchctl list"),
    ("Display macOS system-wide energy impact", "sudo powermetrics --samplers smc"),
    ("List all connected Thunderbolt devices with detailed information", "system_profiler SPTechnologyDataType"),
    ("Display information about macOS system fonts", "fc-list : family"),
    ("Check macOS Bluetooth status", "system_profiler SPBluetoothDataType"),
    ("Show detailed information about macOS system audio", "system_profiler SPAudioDataType"),
    ("List all macOS user accounts with User ID", "dscl . -list /Users UniqueID"),
    ("Check macOS system firewall settings", "sudo defaults read /Library/Preferences/com.apple.alf globalstate"),
    ("Show information about macOS network interfaces using ifconfig", "ifconfig"),
    ("List all installed macOS Quick Look plugins", "qlmanage -m plugins"),
    ("Check macOS Time Machine backup status", "tmutil status"),
    ("Display information about macOS system hardware sensors", "ioreg -n IODisplayConnect"),
    ("List all available macOS screen resolutions", "system_profiler SPDisplaysDataType | grep Resolution"),
    ("Check macOS AirPlay status", "system_profiler SPDisplaysDataType"),
    ("Show information about macOS system input devices", "system_profiler SPUSBDataType"),
    ("List all macOS user accounts with home directory paths", "dscl . -readall /Users NFSHomeDirectory"),
    ("Display macOS kernel panic logs", "log show --predicate 'eventMessage contains \"kernel panic\"' --info"),
    ("Check macOS system's NVRAM (Non-Volatile Random-Access Memory) settings", "nvram -p"),
    ("Show detailed information about macOS system thermal conditions", "sudo powermetrics --samplers smc | grep -i temperature"),
    ("List all available macOS input methods", "defaults read /Library/Preferences/com.apple.HIToolbox.plist AppleInputSourceHistory | grep Name"),
    ("Check macOS FileVault encryption status for a specific disk", "fdesetup status -device /dev/diskX"),
    ("Show macOS system-wide crash reports", "ls -lR /Library/Logs/DiagnosticReports/"),
    ("List all available Ruby versions (if using rbenv)", "rbenv versions"),
    ("Show information about macOS firewall settings", "sudo defaults read /Library/Preferences/com.apple.alf globalstate"),
    ("List all installed Java Runtime Environments (JREs)", "/usr/libexec/java_home -V"),
    ("Check macOS user environment variables", "launchctl getenv"),
    ("Show information about macOS XPC services", "launchctl list | grep XPC"),
    ("List all connected USB devices with detailed information", "system_profiler SPUSBDataType"),
    ("Check macOS system resource usage in real-time", "top -o cpu"),
    ("Display information about macOS system preferences profiles", "sudo profiles -P"),
    ("Show detailed information about macOS I/O Kit registry", "ioreg -l"),
    ("Check macOS FileVault encryption status", "fdesetup status"),
    ("List all installed macOS Quick Look generators", "qlmanage -m generators"),
    ("Display detailed information about macOS Bluetooth devices", "system_profiler SPBluetoothDataType"),
    ("Check macOS AirDrop status", "defaults read /Library/Preferences/com.apple.sharing.advertise"),
    ("Show information about macOS battery cycles", "ioreg -r -c 'AppleSmartBattery' | grep -i cycle"),
    ("List all installed macOS kernel extensions", "kextstat -l"),
    ("Check macOS disk arbitration status", "diskutil arbstatus"),
    ("Show information about macOS SIP (System Integrity Protection)", "csrutil status"),
    ("List all macOS system services", "systemsetup -listallservices"),
    ("Display macOS system-wide energy usage", "sudo powermetrics --samplers smc"),
    ("Check macOS system display sleep settings", "pmset -g | grep displaysleep"),
    ("Show detailed information about macOS system sensors", "ioreg -n IODisplayConnect"),
    ("Check macOS system-wide VPN settings", "networksetup -listallnetworkservices"),
    ("Display macOS system thermal conditions", "sudo powermetrics --samplers smc | grep -i temperature"),
    ("List all macOS system startup items", "sudo launchctl list"),
    ("Show information about macOS system boot volume", "bless --info /"),
    ("Check macOS system's Bluetooth PAN (Personal Area Network) status", "networksetup -getBluetoothPAN"),
    ("Display macOS system-wide power settings", "sudo pmset -g"),
    ("List all installed macOS kernel modules", "kextstat -l"),
    ("Show information about macOS system user and group IDs", "dscl . -list /Users UniqueID"),
    ("Check macOS system-wide firewall logging status", "sudo /usr/libexec/ApplicationFirewall/socketfilterfw --getloggingmode"),
    ("Display macOS system-wide energy usage information", "sudo powermetrics --samplers smc"),
    ("List all macOS system keyboard layouts", "defaults read /Library/Preferences/com.apple.HIToolbox.plist AppleEnabledInputSources"),
    ("Show detailed information about macOS system memory statistics", "vm_stat"),
    ("Check macOS system's iCloud status", "defaults read MobileMeAccounts"),
    ("List all macOS system software RAID configurations", "diskutil appleraid list"),
    ("Display macOS system-wide energy efficiency settings", "sudo pmset -g assertions"),
    ("Check macOS system's System Integrity Protection (SIP) status", "csrutil status"),
    ("Show information about macOS system-wide security and privacy settings", "sudo spctl --status"),
    ("List all installed macOS user agents and daemons", "launchctl list"),
    ("Display macOS system-wide proxy auto-discovery (PAC) URL", "networksetup -getautoproxyurl"),
    ("Check macOS system's disk arbitration status", "diskutil arbstatus"),
    ("Show detailed information about macOS system-wide hardware sensors", "sudo powermetrics --samplers smc | grep -i sensor"),
    ("Check macOS system-wide proxy settings", "networksetup -getwebproxy Ethernet"),
    ("Show macOS system information in a concise format", "system_profiler -detailLevel mini"),
    ("List all macOS system services", "systemsetup -listallservices"),
    ("Display macOS system-wide energy usage", "sudo powermetrics --samplers smc"),
    ("Check macOS system display sleep settings", "pmset -g | grep displaysleep"),
    ("Show detailed information about macOS system sensors", "ioreg -n IODisplayConnect"),
    ("List all installed macOS command-line tools", "xcode-select -p"),
    ("Check macOS system-wide VPN settings", "networksetup -listallnetworkservices"),
    ("Display macOS system thermal conditions", "sudo powermetrics --samplers smc | grep -i temperature"),
    ("List all macOS system startup items", "sudo launchctl list"),
    ("Show information about macOS system boot volume", "bless --info /"),
    ("Check macOS system's Bluetooth PAN (Personal Area Network) status", "networksetup -getBluetoothPAN"),
    ("Display macOS system-wide power settings", "sudo pmset -g"),
    ("List all installed macOS kernel modules", "kextstat -l"),
    ("Show information about macOS system user and group IDs", "dscl . -list /Users UniqueID"),
    ("Check macOS system-wide firewall logging status", "sudo /usr/libexec/ApplicationFirewall/socketfilterfw --getloggingmode"),
    ("Display macOS system-wide energy usage information", "sudo powermetrics --samplers smc"),
    ("List all macOS system keyboard layouts", "defaults read /Library/Preferences/com.apple.HIToolbox.plist AppleEnabledInputSources"),
    ("Show detailed information about macOS system memory statistics", "vm_stat"),
    ("Check macOS system's iCloud status", "defaults read MobileMeAccounts"),
    ("List all macOS system software RAID configurations", "diskutil appleraid list"),
    ("Display macOS system-wide energy efficiency settings", "sudo pmset -g assertions"),
    ("Check macOS system's System Integrity Protection (SIP) status", "csrutil status"),
    ("Show information about macOS system-wide security and privacy settings", "sudo spctl --status"),
    ("List all installed macOS user agents and daemons", "launchctl list"),
    ("Display macOS system-wide proxy auto-discovery (PAC) URL", "networksetup -getautoproxyurl"),
    ("Check macOS system's disk arbitration status", "diskutil arbstatus"),
    ("Show detailed information about macOS system-wide hardware sensors", "sudo powermetrics --samplers smc | grep -i sensor"),
    ("Check macOS system-wide proxy settings", "networksetup -getwebproxy Ethernet"),
    ("Show macOS system information in a concise format", "system_profiler -detailLevel mini"),
    ("List all macOS system services", "systemsetup -listallservices"),
    ("Display macOS system-wide energy usage", "sudo powermetrics --samplers smc"),
    ("Check macOS system display sleep settings", "pmset -g | grep displaysleep"),
    ("Show detailed information about macOS system sensors", "ioreg -n IODisplayConnect"),
    ("List all installed macOS command-line tools", "xcode-select -p"),
    ("Check macOS system-wide VPN settings", "networksetup -listallnetworkservices"),
    ("Display macOS system thermal conditions", "sudo powermetrics --samplers smc | grep -i temperature"),
    ("List all macOS system startup items", "sudo launchctl list"),
    ("Show information about macOS system boot volume", "bless --info /"),
    ("Check macOS system's Bluetooth PAN (Personal Area Network) status", "networksetup -getBluetoothPAN")]

In [48]:
from langchain_core.documents import Document

def preprocess_dataset(dataset):
    new_list = []
    for i in dataset:
        query = i[0]
        command = i[1]
        doc = Document(page_content=query, metadata={"command": command})
        new_list.append(doc)
    return new_list


embeddings = HuggingFaceEmbeddings(model_name=EMBEDDER,
                                       model_kwargs={'device': 'mps'})

def generate_embeddings(new_list):
    db = FAISS.from_documents(new_list, embeddings)
    db.save_local(FAISS_PATH)

In [49]:
preprocessed_list = preprocess_dataset(mac_dataset)


In [50]:
print(preprocessed_list)

[Document(page_content='List all installed Homebrew packages', metadata={'command': 'brew list'}), Document(page_content='Show detailed information about the macOS version', metadata={'command': 'sw_vers'}), Document(page_content='Display information about disk space usage', metadata={'command': 'df -h'}), Document(page_content='Show top processes by CPU usage', metadata={'command': 'top -o cpu'}), Document(page_content='Check if port 443 on example.com is open using telnet', metadata={'command': 'echo | telnet example.com 443'}), Document(page_content='Check open ports on a host using nc', metadata={'command': 'nc -zv example.com 443'}), Document(page_content='List all installed applications', metadata={'command': 'ls /Applications'}), Document(page_content='Display information about system hardware', metadata={'command': 'system_profiler'}), Document(page_content='Show connected USB devices', metadata={'command': 'system_profiler SPUSBDataType'}), Document(page_content='Check the mac

In [51]:
generate_embeddings(preprocessed_list)

In [52]:
faiss_vectorstore = FAISS.load_local(FAISS_PATH, embeddings)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs=SEARCH_KWARGS)

In [None]:
# bm25_retriever = BM25Retriever.from_documents(texts)
# ensemble_retriever = EnsembleRetriever(retrievers=[faiss_retriever, bm25_retriever], weights=[0.8, 0.2])
# print("Vector Store Creation Completed")

In [62]:
query = "How much disk space does my computer have?"
docs = faiss_retriever.get_relevant_documents(query)
print('Retrieved docs:', docs)

print(docs[0])

Retrieved docs: [Document(page_content='Display information about disk space usage', metadata={'command': 'df -h'}), Document(page_content='Check available disk space on a specific directory', metadata={'command': 'du -h /path/to/directory'}), Document(page_content='Check disk usage for a specific directory', metadata={'command': 'du -h /path/to/directory'})]
page_content='Display information about disk space usage' metadata={'command': 'df -h'}
