# Actividad 2: Análisis sintáctico
**Universidad Internacional de La Rioja (UNIR) - Máster Universitario en Inteligencia Artificial - Procesamiento del Lenguaje Natural**

***
**Datos de los alumnos (Nombres y Apellidos):**
    
&emsp;- César David Sáenz Salazar
    
&emsp;- Alvaro Pinto Mamani

*Fecha: 24/01/2023*
***

## Índice
* [1) Definición de funciones](#1)
* [2) Inicialización del las reglas sintácticas](#2)
* [3) Aplicación del algoritmo CKY](#3)

### 1. Definición de funciones<a class="anchor" id="1"></a>

In [6]:
class MatrixRule:
    def __init__(self, symbol, rule, origin, probability):
        self.symbol = symbol
        self.rule = rule
        self.origin = origin
        self.probability = probability

    def get_symbol(self):
        return self.symbol

    def get_rule(self):
        return self.rule

    def get_origin(self):
        return self.origin

    def get_probability(self):
        return self.probability

    def get_data(self):
        return self.symbol + ": " + str(self.rule) + " <- " + str(self.origin) + " | " + str(self.probability)


def algorithm_CKY(phrase, grammar):
    # Definición de la matriz para el algoritmo CKY
    len_words = len(phrase)
    matrix = [[[] for _ in range(len_words)] for _ in range(len_words)]

    # Análisis de la primer diagonal
    for n in range(len_words):
        for key, rules in grammar.items():
            for rule in rules:
                rule_elements = rule[1]
                if phrase[n] in rule_elements:
                    matrix[n][n].append(
                        MatrixRule(symbol=key, rule=rule_elements, origin="-", probability=rule[0])
                    )

    # Análisis del resto de la matriz
    for n in range(1, len_words):
        for i in range(len_words - n):
            j = i + n
            for k in range(i, j):
                for key, rules in grammar.items():
                    for rule in rules:
                        rule_elements = rule[1]
                        for matrix_rule_1 in matrix[i][k]:
                            for matrix_rule_2 in matrix[k+1][j]:
                                symbol = matrix_rule_1.get_symbol() + " " + matrix_rule_2.get_symbol()
                                if symbol in rule_elements:
                                    origin = ((i, k), (k+1, j))
                                    probability = rule[0] * matrix_rule_1.get_probability() * matrix_rule_2.get_probability()
                                    matrix[i][j].append(
                                        MatrixRule(symbol=key, rule=rule_elements, origin=origin, probability=probability)
                                    )
    return len_words, matrix


def print_matrix(len_words, matrix):
    # Datos de la matriz
    for n in range(len_words):
        for i in range(len_words):
            print(n, i)
            for matrix_rule in matrix[n][i]:
                print(matrix_rule.get_data())

### 2. Inicialización del las reglas sintácticas<a class="anchor" id="2"></a>

In [7]:
# Definición de la frase a análizar
phrase = "time flies like an arrow"
phrase = phrase.split()

# Definición de las reglas gramaticales
grammar = {
    "S": ((0.8, ('NP VP')),),
    "NP": ((0.002, ('time', 'flies', 'arrow')),
           (0.3, ('Det Nom')),
           (0.2, ('Nom Nom'))
           ),
    "Nom": ((0.002, ('time', 'flies', 'arrow')),
                (0.1, ('Nom Noun')),
                (0.2, ('Nom PP'))
                ),
    "VP": ((0.004, ('time')),
           (0.008, ('flies', 'like')),
           (0.3, ('Verb NP')),
           (0.2, ('Verb PP'))
           ),
    "PP": ((0.1, ('Prep NP')),),
    "Verb": ((0.01, ('time')),
             (0.02, ('flies', 'like'))
             ),
    "Noun": ((0.01, ('time', 'flies', 'arrow')),),
    "Det": ((0.05, ('an')),),
    "Prep": ((0.05, ('like')),),
}

### 3. Aplicación del algoritmo CKY<a class="anchor" id="3"></a>

In [8]:
len_words, matrix = algorithm_CKY(phrase, grammar)
print_matrix(len_words, matrix)

0 0
NP: ('time', 'flies', 'arrow') <- - | 0.002
Nom: ('time', 'flies', 'arrow') <- - | 0.002
VP: time <- - | 0.004
Verb: time <- - | 0.01
Noun: ('time', 'flies', 'arrow') <- - | 0.01
0 1
S: NP VP <- ((0, 0), (1, 1)) | 1.2800000000000001e-05
NP: Nom Nom <- ((0, 0), (1, 1)) | 8.000000000000001e-07
Nom: Nom Noun <- ((0, 0), (1, 1)) | 2.0000000000000003e-06
VP: Verb NP <- ((0, 0), (1, 1)) | 6e-06
0 2
S: NP VP <- ((0, 1), (2, 2)) | 5.120000000000001e-09
0 3
0 4
S: NP VP <- ((0, 0), (1, 4)) | 9.600000000000004e-13
NP: Nom Nom <- ((0, 0), (1, 4)) | 2.400000000000001e-14
S: NP VP <- ((0, 1), (2, 4)) | 1.1520000000000003e-13
Nom: Nom PP <- ((0, 1), (2, 4)) | 6.000000000000003e-14
1 0
1 1
NP: ('time', 'flies', 'arrow') <- - | 0.002
Nom: ('time', 'flies', 'arrow') <- - | 0.002
VP: ('flies', 'like') <- - | 0.008
Verb: ('flies', 'like') <- - | 0.02
Noun: ('time', 'flies', 'arrow') <- - | 0.01
1 2
S: NP VP <- ((1, 1), (2, 2)) | 1.2800000000000001e-05
1 3
1 4
S: NP VP <- ((1, 1), (2, 4)) | 2.88e-10
N