In [9]:
import requests
from urllib.parse import quote

command = "cd package && sphinx-build -b doctest --keep-going ./doc/sphinx/source ./doc/html"
URL = f"https://www.explainshell.com/explain?cmd={quote(command)}"

headers = {"Content-type": "application/json", "Accept": "text/plain"}

r = requests.get(url=URL, headers=headers)
print(r.text)

<!DOCTYPE html>
<html>
    <head>
        <title>explainshell.com - cd package &amp;&amp; sphinx-build -b doctest --keep-going ./doc/sphinx/source ./doc/html</title>
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <!-- Bootstrap -->
    
        <link href="//cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/2.3.1/css/bootstrap.min.css" rel="stylesheet" media="screen" id="bootstrapCSS">
    
        <link href="//cdnjs.cloudflare.com/ajax/libs/font-awesome/3.2.1/css/font-awesome.min.css" rel="stylesheet">
        <link href="/static/css/es.css" rel="stylesheet" media="screen">
        <link href='//fonts.googleapis.com/css?family=Berkshire+Swash' rel='stylesheet' type='text/css'>
        
    </head>
    <body data-theme=default>
        <div class="container">
            <div class="small-push"></div>
            <div class="header">
                <div class="text-center logo">
                    <a href="/">
                        explain<sp

In [14]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote

def scrape_explainshell(command):
    encoded_command = quote(command)
    url = f"https://www.explainshell.com/explain?cmd={encoded_command}"

    headers = {
        "Content-type": "application/json",
        "Accept": "text/plain"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() 

        soup = BeautifulSoup(response.text, 'html.parser')
        
        help_boxes = soup.select('table#help .help-box')
        
        if not help_boxes:
            return "No explanations found."

        explanations = [box.get_text(strip=False)+"\n"+20*"_" for box in help_boxes]
        return "\n\n".join(explanations)

    except requests.exceptions.RequestException as e:
        return f"An error occurred: {e}"

# Example usage
command = "cd package && sphinx-build -b doctest --keep-going ./doc/sphinx/source ./doc/html"
explanation = scrape_explainshell(command)
print(explanation)


change the working directory
____________________

directory
       An absolute or relative pathname of the directory that shall become the new working directory. The
       interpretation of a relative pathname by cd depends on the  -L  option  and  the  CDPATH  and  PWD
       environment variables. If directory is an empty string, the results are unspecified.

-      When a hyphen is used as the operand, this shall be equivalent to the command:

       cd "$OLDPWD" && pwd

which changes to the previous working directory and then writes its name.
____________________

AND and OR lists are sequences of one of more pipelines separated by the &&  and  ||  control  operators,
respectively.  AND and OR lists are executed with left associativity.  An AND list has the form

       command1 && command2

command2 is executed if, and only if, command1 returns an exit status of zero.

An OR list has the form

       command1 || command2

command2  is  executed  if and only if command1 returns a

In [12]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import re

def scrape_explainshell(command):
    encoded_command = quote(command)
    url = f"https://www.explainshell.com/explain?cmd={encoded_command}"

    headers = {
        "Content-type": "application/json",
        "Accept": "text/plain"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')
        
        help_boxes = soup.select('table#help .help-box')

        if not help_boxes:
            return "No explanations found."

        command_parts = command.split()
        abstracted_command_parts = command_parts.copy()

        command_spans = soup.select('#command span[helpref]')
        for span in command_spans:
            command_text = span.get_text(strip=True)
            helpref = span.get('helpref')

            help_box = soup.find('pre', {'id': helpref})
            if help_box:
                explanation_text = help_box.get_text(strip=False)
                placeholder = extract_placeholder(explanation_text)

                if placeholder:
                    for i, part in enumerate(command_parts):
                        print(command_parts)
                        if command_text in part:
                            if command_text == part or command_text in part:
                                abstracted_command_parts[i] = placeholder

        abstracted_command = " ".join(abstracted_command_parts)
        
        return {
            "original_command": command,
            "abstracted_command": abstracted_command,
            "explanations": [box.get_text(strip=False) for box in help_boxes]
        }

    except requests.exceptions.RequestException as e:
        return f"An error occurred: {e}"

def extract_placeholder(explanation_text):
    placeholders = re.findall(r'<[^>]+>', explanation_text.split("\n")[0])
    return explanation_text.split("\n")[0] if placeholders else None

command = "cd package && sphinx-build -b doctest --keep-going ./doc/sphinx/source ./doc/html"
result = scrape_explainshell(command)

print("Original Command:", result["original_command"])
print("Abstracted Command:", result["abstracted_command"])
# print("Explanations:\n", result["explanations"])
result["explanations"]

['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
['cd', 'package', '&&', 'sphinx-build', '-b', 'doctest', '--keep-going', './doc/sphinx/source', './doc/html']
Original C

['change the working directory',
 'directory\n       An absolute or relative pathname of the directory that shall become the new working directory. The\n       interpretation of a relative pathname by cd depends on the  -L  option  and  the  CDPATH  and  PWD\n       environment variables. If directory is an empty string, the results are unspecified.\n\n-      When a hyphen is used as the operand, this shall be equivalent to the command:\n\n       cd "$OLDPWD" && pwd\n\nwhich changes to the previous working directory and then writes its name.',
 'AND and OR lists are sequences of one of more pipelines separated by the &&  and  ||  control  operators,\nrespectively.  AND and OR lists are executed with left associativity.  An AND list has the form\n\n       command1 && command2\n\ncommand2 is executed if, and only if, command1 returns an exit status of zero.\n\nAn OR list has the form\n\n       command1 || command2\n\ncommand2  is  executed  if and only if command1 returns a non-zero exit

In [30]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
import re

def change_paths_to_keyword(text):
    file_dir_pattern = r"(\./|/)[\w\-/]+"
    text = re.sub(file_dir_pattern, "CUSTOM_PATH_DIR", text)
    print(text)
    return text

def process_unknown_command(command):
    command = change_paths_to_keyword(command)
    return command

def scrape_explainshell(command):
    encoded_command = quote(command)
    url = f"https://www.explainshell.com/explain?cmd={encoded_command}"

    headers = {
        "Content-type": "application/json",
        "Accept": "text/plain"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        command_spans = soup.select('#command span[helpref], #command span[class*="unknown"]')

        command_explanation_map = dict()

        for span in command_spans:
            command_text = span.get_text()
            helpref = span.get('helpref')
            class_name = span.get('class')

            help_box = soup.find('pre', {'id': helpref})
            explanation_text = help_box.get_text() if help_box else "?"
            placeholder = extract_placeholder(explanation_text, class_name, command_text)
            
            if " ".join(class_name) in command_explanation_map and command_explanation_map:
                command_explanation_map[" ".join(class_name)]["command_text"]+=" " + command_text
            else:
                command_explanation_map[" ".join(class_name)] = {"command_text":command_text, "explanation_text": explanation_text, "placeholder":placeholder}
        
        display(command_explanation_map)
        
        
        abstracted_command = " ".join([process_unknown_command(value["command_text"]) if value["placeholder"]=="?" else value["placeholder"] for value in command_explanation_map.values()])
        
        display(abstracted_command)
        
        return {
            "original_command": command,
            "abstracted_command": abstracted_command,
        }

    except requests.exceptions.RequestException as e:
        return f"An error occurred: {e}"

def extract_placeholder(explanation_text, class_name, command_text):
    if explanation_text == "?":
        if "hasexpansion" in class_name:
            return "Parameter expantion"
        
    if explanation_text.startswith("AND and OR lists are sequences") and (command_text == "&&" or command_text == "||"):
        return command_text
    return explanation_text.split("\n")[0] or None

command = "find src/ -type f -name '*.php' -print0 | xargs -0 -L1 -P4 -- php -l -f"
result = scrape_explainshell(command)

{'command0 simplecommandstart': {'command_text': 'find(1)',
  'explanation_text': 'search for files in a directory hierarchy',
  'placeholder': 'search for files in a directory hierarchy'},
 'command0': {'command_text': "src/ -type f -name '*.php' -print0",
  'explanation_text': 'find [-H] [-L] [-P] [-D debugopts] [-Olevel] [path...] [expression]',
  'placeholder': 'find [-H] [-L] [-P] [-D debugopts] [-Olevel] [path...] [expression]'},
 'shell': {'command_text': '|',
  'explanation_text': "Pipelines\n    A  pipeline is a sequence of one or more commands separated by one of the control operators | or |&.  The\n    format for a pipeline is:\n\n           [time [-p]] [ ! ] command [ [|⎪|&] command2 ... ]\n\n    The standard output of command is connected  via  a  pipe  to  the  standard  input  of  command2.   This\n    connection  is performed before any redirections specified by the command (see REDIRECTION below).  If |&\n    is used, the standard error of command is connected to comma

-f


"search for files in a directory hierarchy find [-H] [-L] [-P] [-D debugopts] [-Olevel] [path...] [expression] Pipelines build and execute command lines from standard input --null PHP Command Line Interface 'CLI' --syntax-check -f"

In [29]:
# DOESNT PROPERLY WORK TODO:
command = '''for dirname in dev stable documentation_pages ; do if [ -d $dirname ]; then git add $dirname; fi done'''
result = scrape_explainshell(command)

{'shell': {'command_text': 'for dirname in dev stable documentation_pages ; do if ; then ; fi done',
  'explanation_text': 'for name [ [ in [ word ... ] ] ; ] do list ; done\n       The  list of words following in is expanded, generating a list of items.  The variable name is set\n       to each element of this list in turn, and list is executed each time.  If the in word is  omitted,\n       the  for  command  executes  list  once  for each positional parameter that is set (see PARAMETERS\n       below).  The return status is the exit status of the last command that executes.  If the expansion\n       of  the  items  following  in  results  in an empty list, no commands are executed, and the return\n       status is 0.',
  'placeholder': 'for name [ [ in [ word ... ] ] ; ] do list ; done'},
 'command0 simplecommandstart': {'command_text': '[(1)',
  'explanation_text': 'check file types and compare values',
  'placeholder': 'check file types and compare values'},
 'command0 hasexpansio

]


'for name [ [ in [ word ... ] ] ; ] do list ; done check file types and compare values -d FILE ] Add file contents to the index Parameter expantion'

In [28]:
command = "cp package1 package2 && cp package3 package4"
result = scrape_explainshell(command)

{'command0 simplecommandstart': {'command_text': 'cp(1)',
  'explanation_text': 'copy files and directories',
  'placeholder': 'copy files and directories'},
 'command0': {'command_text': 'package1 package2',
  'explanation_text': 'Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.',
  'placeholder': 'Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.'},
 'shell': {'command_text': '&&',
  'explanation_text': 'AND and OR lists are sequences of one of more pipelines separated by the &&  and  ||  control  operators,\nrespectively.  AND and OR lists are executed with left associativity.  An AND list has the form\n\n       command1 && command2\n\ncommand2 is executed if, and only if, command1 returns an exit status of zero.\n\nAn OR list has the form\n\n       command1 || command2\n\ncommand2  is  executed  if and only if command1 returns a non-zero exit status.  The return status of AND\nand OR lists is the exit status of the last command executed in the list.',
  'placeholder'

'copy files and directories Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY. && copy files and directories Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.'