In [1]:
!pip install gradio pydantic nltk sacrebleu numpy -qq

zsh:1: /usr/local/bin/pip: bad interpreter: /usr/local/opt/python@3.10/bin/python3.10: no such file or directory
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
codex-python-types 0.0.3 requires typing-extensions==4.9.0, but you have typing-extensions 4.10.0 which is incompatible.[0m[31m
[0m

In [6]:
import gradio as gr
from pydantic import BaseModel
from nltk.translate.bleu_score import sentence_bleu
from sacrebleu import corpus_bleu
import numpy as np
from nltk.translate.bleu_score import SmoothingFunction

class ScoredLine(BaseModel):
    content: str
    scores: list[float]
    normalized_score: float

class Corpus(BaseModel):
    text: str

    @property
    def lines(self):
        return [ScoredLine(content=line, scores=[], normalized_score=0.0) for line in self.text.split('\n')]

    def calculate_scores(self):
        for line in self.lines:
            line.scores = [
                self.relative_length(line.content),
                self.calculate_bleu_score(line.content),
                # Add more scoring methods here
            ]
            line.normalized_score = np.mean(line.scores)

    def relative_length(self, line):
        # Calculate relative length score
        return len(line) / len(self.text)

    def calculate_bleu_score(self, line):
        # Calculate BLEU score for the line
        reference = [self.text.split()]
        hypothesis = line.split()
        smoothie = SmoothingFunction().method4
        return sentence_bleu(reference, hypothesis, smoothing_function=smoothie)

class CrossLinguisticComparison(BaseModel):
    corpus1: Corpus
    corpus2: Corpus

    def compare_scores(self, method='absolute_difference'):
        scores = []
        for line1, line2 in zip(self.corpus1.lines, self.corpus2.lines):
            if method == 'absolute_difference':
                score = abs(line1.normalized_score - line2.normalized_score)
            elif method == 'euclidean_distance':
                score = np.linalg.norm(np.array(line1.scores) - np.array(line2.scores))
            elif method == 'cosine_similarity':
                score = np.dot(line1.scores, line2.scores) / (np.linalg.norm(line1.scores) * np.linalg.norm(line2.scores))
            scores.append(score)
        return scores

def launch():
    def compare_corpora(source_text, target_text, corpus_scoring_method, cross_linguistic_method):
        corpus1 = Corpus(text=source_text)
        corpus2 = Corpus(text=target_text)

        corpus1.calculate_scores()
        corpus2.calculate_scores()

        comparison = CrossLinguisticComparison(corpus1=corpus1, corpus2=corpus2)
        cross_scores = comparison.compare_scores(method=cross_linguistic_method)

        output_table = []
        for line1, line2, cross_score in zip(corpus1.lines, corpus2.lines, cross_scores):
            output_table.append([
                f"{line1.content}",
                "",
                ""
            ])
            output_table.append([
                f"Scores: {line1.scores}",
                f"Scores: {line2.scores}",
                ""
            ])
            output_table.append([
                f"Normalized Score: {line1.normalized_score:.2f}",
                f"Normalized Score: {line2.normalized_score:.2f}",
                cross_score
            ])

        return output_table

    iface = gr.Interface(
        fn=compare_corpora,
        inputs=[
            gr.Textbox(label="Source Text", lines=5, value="Source sentence 1\nSource sentence 2\nSource sentence 3\nSource sentence 4\nSource sentence 5"),
            gr.Textbox(label="Target Text", lines=5, value="Target sentence 1\nTarget sentence 2\nTarget sentence 3\nTarget sentence 4\nTarget sentence 5"),
            gr.Dropdown(label="Corpus Scoring Method", choices=["relative_length", "bleu"], value="bleu"),
            gr.Dropdown(label="Cross-Linguistic Scoring Method", choices=["absolute_difference", "euclidean_distance", "cosine_similarity"], value="absolute_difference")
        ],
        outputs=gr.Dataframe(headers=["Source", "Target", "Cross-Linguistic Score"]),
        title="Novel Machine Translation Scoring",
        description="Enter the source text and target text to compare their internal consistency and cross-linguistic scores.",
    )

    iface.launch()

if __name__ == "__main__":
    launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.
