# Install dependencies

# LLaMA2 evaluation

In [None]:
import os

os.environ['WANDB_MODE']='disabled'

import torch
import wandb
import platform
import gradio
import warnings
import transformers
from datetime import datetime

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, TextStreamer, DataCollatorForLanguageModeling
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model

from datasets import load_dataset
from trl import SFTTrainer, DPOTrainer, KTOConfig, KTOTrainer

from tqdm import tqdm
tqdm.pandas()

import pandas as pd
from pathlib import Path

import spacy
NLP = spacy.load('en_core_web_sm')

import nltk
from nltk.tokenize import sent_tokenize
nltk.download('punkt')

def tokenize(txt):
    doc = NLP(txt)
    return " ".join([t.text for t in doc])

import subprocess
def run(cmd):
    print("Run shell command:\n\t", cmd)
    return subprocess.run(cmd, shell=True)

# Define Prompts

In [None]:
INSTRUCTIONS = [
    "Fix grammatical errors for the following text . Keep only one variant .",
    "Rewrite this text to make it grammatically correct . ",
    "Rewrite the text to fix any grammatical errors .",
    "Correct the grammar mistakes in the following text .",
    "Rewrite the text . The output text should not contain any grammatical or spelling mistakes .",
    "Fix all grammatical errors , do not rephrase .",
    "Fix only grammatical errors precisely. ",
    "Precisely fix grammatical errors : ",

    "Revise the following sentence with proper grammar",
    "Correct grammatical errors in this sentence .",
    "Revise grammatical mistakes in the following text.",
    "Revise mistakes in the following text written by a beginner learner with a lot of mistakes.",
    "Revise mistakes in the following text written by a advanced learner with a few of mistakes.",
    
]

# Setup model

In [None]:
model_id = "meta-llama/Llama-2-7b-chat-hf"

# Setup quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

# Load model and setup quantization
use_flash_attn = False
load_in_8bit = False

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
    load_in_8bit=load_in_8bit,
    trust_remote_code=True,
    attn_implementation="flash_attention_2" if use_flash_attn else "eager"
)
model = prepare_model_for_kbit_training(model)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

In [None]:

def gec_predict(model, tokenizer, text, prompt_id=0):
    answers = [
        "Sure, here's the corrected text:",
        "Sure, here is a rewritten version of the text that is grammatically correct:",
        "Here is a corrected version of the text:",
        "Here's a corrected version of the sentence:",
        "Here is the rewritten text:",
        "Here is the corrected text:",
        "Here is a revised version of the text that is grammatically correct:",
        "Sure! Here's the corrected text:",
        "Here is a revised version of the text that is grammatically correct:",
        "Here is a rewritten version of the text that is grammatically correct:",
        "Sure, here's a corrected version of the sentence:",
        "Sure, here's a corrected version of the text:",
        "Here is the rewritten text with corrected grammar:",
        "Here is a corrected version of the sentence:",
        "Sure, here is the corrected text:",
        "Sure, here is a corrected version of the sentence:",
        "Sure, here is the rewritten text:",
        "Sure, I'd be happy to help! Here is a revised version of the sentence that is grammatically correct:",
        "Sure, here's the rewritten text:",
        "Here is a rewritten version of the text with corrected grammar and spelling:",
        "Here is the rewritten text with corrected grammar and punctuation:",
        "Here is a rewritten version of the text with grammatical corrections:",
        "Sure, here is a corrected version of the text:"
    ]
    
    instruction = INSTRUCTIONS[prompt_id]
    
    messages = [
        {"role": "system", "content": "You are a writing assistant. Please ensure that your responses consist only of corrected texts."},
        # {"role": "user", "content": "Fix grammatical errors for the following text: \"Ths text contains to email best ..\""},
        # {"role": "assistant", "content": "This text contains the best email ."},
        {"role": "user", "content": instruction + f"\"{text}\""},
    ]
    
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")
    generated_ids = model.generate(input_ids, max_new_tokens=1000, do_sample=False, temperature=0.001)
    output = tokenizer.batch_decode(generated_ids)[0]

    _output = output[output.index('[/INST]') + len('[/INST]') :]

    if ":\n" in _output:
        _output = _output[_output.index(":\n"):]
    
    _output = list(filter(lambda x: len(x)>1 and x not in answers, _output.split('\n')))[0]
    _output = _output.replace("</s>", "").replace('\n', '').replace("\"", '')

    return _output 



gec_predict(model, tokenizer, "Ths text contains to email best .")


## Evaluate NUCLE

In [None]:
for i in range( 0, 13):
    df = pd.read_csv('nucle.test.csv')
    df['output'] = df.src.progress_map(lambda x: gec_predict(model, tokenizer, x, i))
    df['output'] = df.output.map(tokenize)
    df.to_csv(f'outputs/nucle-llama-7b-chat-{i}.csv', index=False)
    


## Evaluate BEA

In [None]:
# BEA
cmd = """
docker run -it --rm  -v bea-dev.txt:/data/input.txt \
    -v bea-dev.m2:/data/ref.m2 \
    -v {pred_txt}:/data/pred.txt \
    errant \
        python3 /errant/parallel_to_m2.py -orig /data/input.txt -cor /data/pred.txt -out /data/pred.m2 && \
        python3 /errant/compare_m2.py -hyp /data/pred.m2 -ref /data/ref.m2
"""

for i in range( 0, 13):
    df = pd.read_csv('wi_locness.dev.gold.bea18.csv')
    df['output'] = df.src.progress_map(lambda x: gec_predict(model, tokenizer, x, i))
    df['output'] = df.output.map(tokenize)
    df.to_csv(f'outputs/bea-llama-7b-chat-{i}.csv', index=False)


# Evaluate Mistral AI

In [None]:
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

# Setup quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= True,
    llm_int8_enable_fp32_cpu_offload= True
)

# Load model and setup quantization
use_flash_attn = False
load_in_8bit = False

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
    load_in_8bit=load_in_8bit,
    trust_remote_code=True,
    attn_implementation="flash_attention_2" if use_flash_attn else "eager"
)
model = prepare_model_for_kbit_training(model)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [None]:
def gec_predict(model, tokenizer, text, prompt_id=0):
    answers = [
        "Sure, here's the corrected text:",
        "Sure, here is a rewritten version of the text that is grammatically correct:",
        "Here is a corrected version of the text:",
        "Here's a corrected version of the sentence:",
        "Here is the rewritten text:",
        "Here is the corrected text:",
        "Here is a revised version of the text that is grammatically correct:",
        "Sure! Here's the corrected text:",
        "Here is a revised version of the text that is grammatically correct:",
        "Here is a rewritten version of the text that is grammatically correct:",
        "Sure, here's a corrected version of the sentence:",
        "Sure, here's a corrected version of the text:",
        "Here is the rewritten text with corrected grammar:",
        "Here is a corrected version of the sentence:",
        "Sure, here is the corrected text:",
        "Sure, here is a corrected version of the sentence:",
        "Sure, here is the rewritten text:",
        "Sure, I'd be happy to help! Here is a revised version of the sentence that is grammatically correct:",
        "Sure, here's the rewritten text:",
        "Here is a rewritten version of the text with corrected grammar and spelling:",
        "Here is the rewritten text with corrected grammar and punctuation:",
        "Here is a rewritten version of the text with grammatical corrections:",
        "Sure, here is a corrected version of the text:"
    ]
    
    instruction = INSTRUCTIONS[prompt_id]
    
    messages = [
        {"role": "system", "content": "You are a writing assistant. Please ensure that your responses consist only of corrected texts."},
        {"role": "user", "content": instruction + f"\"{text}\""},
    ]
    
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")

    generated_ids = model.generate(input_ids, max_new_tokens=1000, do_sample=False, temperature=0.001)
    output = tokenizer.batch_decode(generated_ids)[0]
    
    _output = output[output.index('[/INST]') + len('[/INST]') :]
    if "grammatically correct" in output:
        return text

    if ":\n" in _output:
        _output = _output[_output.index(":\n"):]
    
    _output = list(filter(lambda x: len(x)>1 and x not in answers, _output.split('\n')))[0]
    _output = _output.replace("</s>", "").replace('\n', '').replace("\"", '')

    _output = sent_tokenize(_output)[0].strip()
    return _output 

gec_predict(model, tokenizer, "Ths text contains to email best .")

## Evaluate NUCLE

In [None]:
for i in range( 0, 13):
    df = pd.read_csv('nucle.test.csv')
    df['output'] = df.src.progress_map(lambda x: gec_predict(model, tokenizer, x, i))
    df['output'] = df.output.map(tokenize)
    df.to_csv(f'outputs/nucle-mistral-{i}.csv', index=False)


## Evaluate BEA

In [None]:
cmd = """
docker run -it --rm  -v /mnt/green-efs/kostiantyn.omelianchuk/gec_sota/data/evalsets/bea-dev.txt:/data/input.txt \
    -v /mnt/green-efs/kostiantyn.omelianchuk/gec_sota/data/evalsets/bea-dev.m2:/data/ref.m2 \
    -v {pred_txt}:/data/pred.txt \
    errant \
        python3 /errant/parallel_to_m2.py -orig /data/input.txt -cor /data/pred.txt -out /data/pred.m2 && \
        python3 /errant/compare_m2.py -hyp /data/pred.m2 -ref /data/ref.m2
"""

for i in [6]:
    df = pd.read_csv('/mnt/green-efs/oleksandr.korniienko/data/wi_locness.dev.gold.bea18.csv')
    df['output'] = df.src.progress_map(lambda x: gec_predict(model, tokenizer, x, i))
    df['output'] = df.output.map(tokenize)
    df.to_csv(f'outputs/bea-mistral-{i}.csv', index=False)


# Evaluate GEMMA

In [None]:
# model_id = "google/gemma-2b-it"
model_id = "google/gemma-1.1-7b-it"
model_id = "google/gemma-1.1-2b-it"


# Setup quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

# Load model and setup quantization
use_flash_attn = False
load_in_8bit = False

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
    load_in_8bit=load_in_8bit,
    trust_remote_code=True,
    attn_implementation="flash_attention_2" if use_flash_attn else "eager"
)
model = prepare_model_for_kbit_training(model)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1


tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [None]:

def gec_predict(model, tokenizer, text):
    answers = [
        "Sure, here's the corrected text:",
        "Sure, here is a rewritten version of the text that is grammatically correct:",
        "Here is a corrected version of the text:",
        "Here's a corrected version of the sentence:",
        "Here is the rewritten text:",
        "Here is the corrected text:",
        "Here is a revised version of the text that is grammatically correct:",
        "Sure! Here's the corrected text:",
        "Here is a revised version of the text that is grammatically correct:",
        "Here is a rewritten version of the text that is grammatically correct:",
        "Sure, here's a corrected version of the sentence:",
        "Sure, here's a corrected version of the text:",
        "Here is the rewritten text with corrected grammar:",
        "Here is a corrected version of the sentence:",
        "Sure, here is the corrected text:",
        "Sure, here is a corrected version of the sentence:",
        "Sure, here is the rewritten text:",
        "Sure, I'd be happy to help! Here is a revised version of the sentence that is grammatically correct:",
        "Sure, here's the rewritten text:",
        "Here is a rewritten version of the text with corrected grammar and spelling:",
        "Here is the rewritten text with corrected grammar and punctuation:",
        "Here is a rewritten version of the text with grammatical corrections:",
        "Sure, here is a corrected version of the text:"
    ]
    
    instruction = INSTRUCTIONS[7]
    
    messages = [
        {"role": "system", "content": "You are a writing assistant. Please ensure that your responses consist only of corrected texts."},
        # {"role": "user", "content": "Fix grammatical errors for the following text: \"Ths text contains to email best ..\""},
        # {"role": "assistant", "content": "This text contains the best email ."},
        {"role": "user", "content": instruction + f"\"{text}\""},
    ]
    
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt")

    generated_ids = model.generate(input_ids, max_new_tokens=1000, do_sample=False, temperature=0.001)
    output = tokenizer.batch_decode(generated_ids)[0]
    
    _output = output[output.rfind('\n'):]
    if '"' in _output:
        _output = _output[_output.find('"'):_output.rfind('"')]
    _output = _output.replace('\n', '').replace("\"", '').replace("<eos>", "")
    return _output 



# gec_predict(model, tokenizer, "Ths text contains to email best .")
gec_predict(model, tokenizer, "Keeping the Secret of Genetic Testing")




## Evaluate NUCLE

In [None]:
for i in range( 0, 13):
    df = pd.read_csv('nucle.test.csv')
    df['output'] = df.src.progress_map(lambda x: gec_predict(model, tokenizer, x, i))
    df['output'] = df.output.map(tokenize)
    df.to_csv(f'outputs/nucle-gemma-2b-{i}.csv', index=False)


## Evaluate BEA

In [None]:
cmd = """
docker run -it --rm  -v /mnt/green-efs/kostiantyn.omelianchuk/gec_sota/data/evalsets/bea-dev.txt:/data/input.txt \
    -v /mnt/green-efs/kostiantyn.omelianchuk/gec_sota/data/evalsets/bea-dev.m2:/data/ref.m2 \
    -v {pred_txt}:/data/pred.txt \
    errant \
        python3 /errant/parallel_to_m2.py -orig /data/input.txt -cor /data/pred.txt -out /data/pred.m2 && \
        python3 /errant/compare_m2.py -hyp /data/pred.m2 -ref /data/ref.m2
"""

for i in [6]:
    df = pd.read_csv('/mnt/green-efs/oleksandr.korniienko/data/wi_locness.dev.gold.bea18.csv')
    df['output'] = df.src.progress_map(lambda x: gec_predict(model, tokenizer, x, i))
    df['output'] = df.output.map(tokenize)
    df.to_csv(f'outputs/bea-mistral-{i}.csv', index=False)


# Start server with model

In [None]:
## Run inference with openai-like endpoint

In [None]:
from openai import OpenAI
client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="token-abc123",
)

In [None]:
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

In [None]:
df = pd.read_csv('./data/nucle.test.csv')

In [None]:
def infer(text):
    completion = client.chat.completions.create(
      # model="mistralai/Mistral-7B-Instruct-v0.2",
        model="gec-llama2-7b-public/",
        temperature=1,
        n=5,
        messages=[
            {"role": "system", "content": "Rewrite this text to make it grammatically correct ."},
            {"role": "user", "content": text}
        ]
    )
    preds = []
    for choise in completion.choices:
        pred = choise.message.content
        prefixes = ["text:", "text is:", ":\n\n", ":\n"]
        for p in prefixes:
            if p in pred:
                pred = pred[pred.index(p)+len(p)+1:]
                pred = pred.lstrip()
                if "\n" in pred:
                    pred = pred[:pred.index('\n')]
                break
        preds.append(pred)
    return preds

infer("I hop ths mesage find u ..")

In [None]:
df['output'] = df.src.progress_map(infer)

In [None]:
df.to_csv('./data/nucle.test.output.csv', index=False)