# Análisis léxico

In [1]:
(def regex #"(?xi)
    ( [+-]? \d+ [.] \d* (?: e [+-]? \d+)? )  # Grupo 1: Flotante
  | ( \d+ )                                  # Grupo 2: Entero
  | ( // .* )                                # Grupo 3: Comentario
  | ( [a-z] \w* )                            # Grupo 4: Variable
  | ( [=] )                                  # Grupo 5: Asignación
  | ( [+] )                                  # Grupo 6: Suma
  | ( [-] )                                  # Grupo 7: Resta
  | ( [*] )                                  # Grupo 8: Multiplicación
  | ( [/] )                                  # Grupo 9: División
  | ( \^ )                                   # Grupo 10: Potencia
  | ( [(] )                                  # Grupo 11: Paréntesis que abre
  | ( [)] )                                  # Grupo 12: Paréntesis que cierra
  | ( \s )                                   # Grupo 13: Espacios
  | ( . )                                    # Grupo 14: Carácter inválido
  ")

#'user/regex

In [2]:
(defn tokenize-file
  [file-name]
  (->> (re-seq regex (slurp file-name))
       (remove (fn [match] (match 13)))
       (map (fn [match]
              (let [token (match 0)]
                (cond
                  (match 1) [token "Flotante"]
                  (match 2) [token "Entero"]
                  (match 3) [token "Comentario"]
                  (match 4) [token "Variable"]
                  (match 5) [token "Asignación"]
                  (match 6) [token "Sum"]
                  (match 7) [token "Resta"]
                  (match 8) [token "Multiplicación"]
                  (match 9) [token "División"]
                  (match 10) [token "Potencia"]
                  (match 11) [token "Paréntesis que abre"]
                  (match 12) [token "Paréntesis que cierra"]
                  (match 14) [token "Carácter inválido"]))))))

#'user/tokenize-file

In [3]:
(defn table-lines
  [file-name]
  (->> (tokenize-file file-name)
       (map (fn [t] (format "%-30s%s" (t 0) (t 1))))))

#'user/table-lines

In [4]:
(defn line
  [character size]
  (clojure.string/join "" (repeat size character)))

#'user/line

In [5]:
(defn table-string
  [file-name]
  (clojure.string/join \newline
                       (concat [(line \= 55)]
                               [(format "%-30s%s" "Token" "Tipo")]
                               [(line \= 55)]
                               (table-lines file-name)
                               [(line \= 55)])))

#'user/table-string

In [6]:
(defn print-token-table
  [file-name]
  (println (table-string file-name)))

#'user/print-token-table

In [7]:
(print-token-table "ejemplo.txt")

Token                         Tipo
b                             Variable
=                             Asignación
7                             Entero
a                             Variable
=                             Asignación
32.4                          Flotante
*                             Multiplicación
(                             Paréntesis que abre
-8.6                          Flotante
-                             Resta
b                             Variable
)                             Paréntesis que cierra
/                             División
6.1E-8                        Flotante
d                             Variable
=                             Asignación
a                             Variable
^                             Potencia
b                             Variable
// Esto es un comentario muy muy muy largoooooooooooComentario


nil

In [8]:
(defn file-token-table
  [input-file output-file]
  (spit output-file (table-string input-file)))

#'user/file-token-table

In [9]:
(file-token-table "ejemplo.txt" "salida.txt")

nil