In [48]:
# %pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph


# !pip install -qU "langchain[openai]"
# !pip install -qU langchain-pinecone
# !pip install -qU langchain-mongodb
# !pip install beautifulsoup4
# !pip install pyarrow fastparquet
!pip install ace_tools



In [51]:
import pandas as pd
import re
import ace_tools as tools
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

ModuleNotFoundError: No module named 'ace_tools'

In [None]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass("LANGSMITH_API_KEY")

In [3]:
from pydantic.v1 import BaseModel
if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain.chat_models import init_chat_model

llm = init_chat_model("gpt-4o-mini", model_provider="openai")


In [9]:

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [10]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [60]:

df_ielts_hf = pd.read_parquet("hf://datasets/vietanh0802/ielts-evaluations-simple-ds/data/train-00000-of-00001.parquet")
df_ielts_hf.head()

Unnamed: 0,prompt,essay,evaluation,band,instruction
0,Interviews form the basic criteria for most la...,It is believed by some experts that the tradit...,**Task Achievement: [7]**\nThe essay effective...,7.5\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews form the basic criteri...
1,Interviews form the basic selecting criteria f...,Nowadays numerous huge firms allocate an inter...,**Task Achievement:** 5.0\n- The candidate has...,5.0\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews form the basic selecti...
2,Interview form the basic selection criteria fo...,The interview section is the most vital part o...,## Task Achievement:\n- The candidate has effe...,5.5\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterview form the basic selectio...
3,Interviews form the basic selection criteria f...,It is argued that the best method to recruit e...,## Task Achievement:\n- The candidate has adeq...,5.5\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews form the basic selecti...
4,Interviews from the basic selecting criteria f...,Nowadays many companies conduct interviews bef...,**Task Achievement:**\n\nThe essay adequately ...,4\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews from the basic selecti...


In [61]:
# Function to extract scores using regex
def extract_score(text, category):
    pattern = rf"\*\*{category}: \[(\d+(\.\d+)?)\]\*\*"  # Extract number inside brackets
    match = re.search(pattern, text)
    return float(match.group(1)) if match else None

# Function to extract comments related to each section
def extract_comment(text, category):
    pattern = rf"\*\*{category}: \[\d+(\.\d+)?\]\*\*\n(.*?)(?=\n\*\*|\Z)"  # Ensures it captures until the next **section** or end of text
    match = re.search(pattern, text, re.DOTALL)
    return match.group(2).strip() if match and match.group(2) else None  # Safely return None if no comment is found

# Function to extract feedback sections
def extract_feedback_section(text, section_name):
    pattern = rf"\*\*{section_name}:?\*\*(.*?)\n\*\*"  # Extract everything between **section** and next **
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else None

def extract_suggestions(text):
    pattern = r"\*\*Suggestions for Enhancement:\*\*\s*(.*)"  # Capture everything after the header
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else None  # Return extracted suggestions or None if not found

#

In [63]:

# Apply functions to extract scores and comments into new columns
df_ielts_hf["Task Achievement Score"] = df_ielts_hf["evaluation"].apply(lambda x: extract_score(x, "Task Achievement"))
df_ielts_hf["Task Achievement Comment"] = df_ielts_hf["evaluation"].apply(lambda x: extract_comment(x, "Task Achievement"))

df_ielts_hf["Coherence and Cohesion Score"] = df_ielts_hf["evaluation"].apply(lambda x: extract_score(x, "Coherence and Cohesion"))
df_ielts_hf["Coherence and Cohesion Comment"] = df_ielts_hf["evaluation"].apply(lambda x: extract_comment(x, "Coherence and Cohesion"))

df_ielts_hf["Lexical Resource Score"] = df_ielts_hf["evaluation"].apply(lambda x: extract_score(x, "Lexical Resource"))
df_ielts_hf["Lexical Resource Comment"] = df_ielts_hf["evaluation"].apply(lambda x: extract_comment(x, "Lexical Resource"))

df_ielts_hf["Grammatical Range and Accuracy Score"] = df_ielts_hf["evaluation"].apply(lambda x: extract_score(x, "Grammatical Range and Accuracy"))
df_ielts_hf["Grammatical Range and Accuracy Comment"] = df_ielts_hf["evaluation"].apply(lambda x: extract_comment(x, "Grammatical Range and Accuracy"))

df_ielts_hf["Overall Band Score"] = df_ielts_hf["evaluation"].apply(lambda x: extract_score(x, "Overall Band Score"))
df_ielts_hf["Overall Band Score Comment"] = df_ielts_hf["evaluation"].apply(lambda x: extract_comment(x, "Overall Band Score"))

# Extract feedback sections
df_ielts_hf["Strengths"] = df_ielts_hf["evaluation"].apply(lambda x: extract_feedback_section(x, "Strengths"))
df_ielts_hf["Areas for Improvement"] = df_ielts_hf["evaluation"].apply(lambda x: extract_feedback_section(x, "Areas for Improvement"))
df_ielts_hf["Suggestions for Enhancement"] = df_ielts_hf["evaluation"].apply(lambda x: extract_suggestions(x))

# Drop the original "evaluation" column for clarity
df_ielts_hf.drop(columns=["evaluation"], inplace=True)

# Display the cleaned DataFrame
df_ielts_hf.head()


Unnamed: 0,prompt,essay,band,instruction,Task Achievement Score,Task Achievement Comment,Coherence and Cohesion Score,Coherence and Cohesion Comment,Lexical Resource Score,Lexical Resource Comment,Grammatical Range and Accuracy Score,Grammatical Range and Accuracy Comment,Overall Band Score,Overall Band Score Comment,Strengths,Areas for Improvement,Suggestions for Enhancement
0,Interviews form the basic criteria for most la...,It is believed by some experts that the tradit...,7.5\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews form the basic criteri...,7.0,The essay effectively addresses the given task...,7.5,The essay is well-organized and easy to follow...,7.0,The candidate demonstrates a good range of voc...,7.0,The essay exhibits a variety of sentence struc...,7.5,The essay effectively addresses the given task...,* Clear and concise introduction\n * Releva...,* Explore the opposing viewpoint in more detai...,"* To strengthen the argument, the candidate co..."
1,Interviews form the basic selecting criteria f...,Nowadays numerous huge firms allocate an inter...,5.0\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews form the basic selecti...,,,,,,,,,,,,,
2,Interview form the basic selection criteria fo...,The interview section is the most vital part o...,5.5\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterview form the basic selectio...,,,,,,,,,,,,,
3,Interviews form the basic selection criteria f...,It is argued that the best method to recruit e...,5.5\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews form the basic selecti...,,,,,,,,,,,,,
4,Interviews from the basic selecting criteria f...,Nowadays many companies conduct interviews bef...,4\n\n\n\n\n\r\r\r\r\r\r\r\r\r\r\r\r\r,### Prompt:\nInterviews from the basic selecti...,,,,,,,,,,,,,


In [67]:


# Define the GitHub raw CSV URL
csv_url = "https://raw.githubusercontent.com/haydenkerr/INFT3039-Capstone1-GroupA-25/main/datasets/ielts_writing_dataset.csv"



# Load the CSV data
df = pd.read_csv(csv_url)
df = df[['Question', 'Essay', 'Overall']]  # Adjust if necessary



# Convert each row to a Document object
docs = [
    Document(
        page_content=f"Question: {row['Question']}\nEssay: {row['Essay']}\nOverall: {row['Overall']}"
    ) for _, row in df.iterrows()
]

# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Initialize embeddings
embeddings = OpenAIEmbeddings()

# Initialize In-Memory Vector Store
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(all_splits)

# Load LLM and prompt
llm = ChatOpenAI(model_name="gpt-4")
prompt = hub.pull("rlm/rag-prompt")

# Define State for LLM workflow
class State(TypedDict):
    question: str
    essay: str
    context: List[Document]
    score: str

# Retrieval function
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

# Grading function
def grade(state: State):
    example_texts = "\n\n".join(doc.page_content for doc in state["context"])
    user_input = f"New Question: {state['question']}\nNew Essay: {state['essay']}"
    
    messages = [
        {"role": "system", "content": "You are an IELTS examiner. Score the given essay based on the 0-9 IELTS band scale."},
        {"role": "user", "content": f"Here are some example graded essays:\n{example_texts}\n\nNow, evaluate this new essay:\n{user_input}"}
    ]
    
    response = llm.invoke(messages)
    return {"score": response.content}

# Build Graph
graph_builder = StateGraph(State).add_sequence([retrieve, grade])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()



In [70]:
# Example test case
new_question = "Rich countries often give money to poorer countries, but it does not solve poverty. Therefore, developed countries should give other types of help to the poor countries rather than financial aid. To what extent do you agree or disagree?	"
new_essay = """Poverty represents a worldwide crisis. It is the ugliest epidemic in a region, 
which could infect countries in the most debilitating ways. To tackle this issue, rich countries need to help those in need 
and give a hand when possible. I agree that there are several ways of aiding poor countries other than financial aid, 
like providing countries in need with engineers, workers, and soldiers who would build infrastructure. Building universities, 
hospitals, and roadways. By having a solid infrastructure, poor countries would be able to monetise their profits and build a 
stronger and more profitable economy which would help them in the long term. Once unprivilged countries find their niche, 
the major hurdle would be passed and would definitely pave the way for much brighter future. However, I do disagree that 
financial aid does not solve poverty, it does if used properly and efficiently. The most determining factor if financial aid 
would be the way to go, is by identifying what type of poor countries' representative are dealing with. Some countries will 
have a responsible leader and some will not, with that being said, implementing a strategy, to distinguish responsible leaders
from others, would tailor the type of aid rich countries could use. An example, A clear report and constant observation would 
be applied to track the progress and how this type of aid is being monetized. In summary, types of aid varies from country to another, 
and tailoring the type of aid is of paramount importance to solve this problem that had huge toll on poor countries.

"""

# Run LLM grading
result = graph.invoke({"question": new_question, "essay": new_essay})
print("Predicted Band Score:", result["score"])


Predicted Band Score: This essay would score around 7.5 to 8 on the IELTS band scale. The essay is well-organized, presents clear ideas and supports them with logical reasoning and examples. The language is mostly accurate, with a wide variety of sentence structures and good use of vocabulary. However, there are a few minor errors and awkward expressions that prevent the essay from reaching a higher score. The argument could also be developed more fully and coherently. Overall, the essay demonstrates a good command of English and a clear understanding of the task.
