In [108]:
import os

import dspy
from dspy.teleprompt import BootstrapFewShot
from dspy.evaluate import Evaluate
import pandas as pd
from dotenv import load_dotenv

from dataloader import build_eval_dataset, check_if_data_folder_exits

In [113]:
# load environment variables
load_dotenv()

AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
AZURE_OPENAI_VERSION = os.getenv("AZURE_OPENAI_VERSION")

DATA_FOLDER = "data/IR-Plag-Dataset"
check_if_data_folder_exits(DATA_FOLDER)

The folder 'data/IR-Plag-Dataset' exists. You can proceed with loading the data.


# Eval dataset for solution

In [116]:
eval_df = build_eval_dataset(DATA_FOLDER)
eval_df.sample(15)

Unnamed: 0,L,case,sample_1,sample_2,plagiarized,reason
204,3,4,public class T4 {\n\tpublic static void main(S...,/**\n *\n * @author 020A6EC1A4D0C5BDB29FF826A4...,True,
305,6,2,import java.util.Scanner;\n\npublic class T2 {...,import java.util.Scanner;\n\n\npublic class in...,True,
267,7,3,import java.util.Scanner;\n\npublic class T3 {...,"/*\n * To change this license header, choose L...",True,
58,0,6,public class T6 {\n\tpublic static void main(S...,import java.util.Scanner;\npublic class T06\n{...,False,
20,4,7,import java.util.Scanner;\n\npublic class T7 {...,import java.util.Scanner;\n\n/*\n * To change ...,True,
19,3,7,import java.util.Scanner;\n\npublic class T7 {...,import java.util.Scanner;\n\n/**\n *\n * @auth...,True,
8,0,7,import java.util.Scanner;\n\npublic class T7 {...,import java.util.Scanner;\n/**\n *\n * @author...,False,
41,6,7,import java.util.Scanner;\n\npublic class T7 {...,import java.util.Scanner;\n\n/*\n * To change ...,True,
212,3,4,public class T4 {\n\tpublic static void main(S...,/**\n *\n * @author 020A6EC1A4D0C5BDB29FF826A4...,True,
62,0,6,public class T6 {\n\tpublic static void main(S...,"/*\n * To change this license header, choose L...",False,


In [162]:
df = pd.read_csv("data/train.tsv", sep="\t")
df = df.sample(frac=1, random_state=1337).reset_index(drop=True)

In [210]:
lm = dspy.AzureOpenAI(
    api_base=AZURE_OPENAI_ENDPOINT,
    api_version=AZURE_OPENAI_VERSION,
    deployment_id=AZURE_OPENAI_DEPLOYMENT,
    api_key=AZURE_OPENAI_KEY,
)
dspy.settings.configure(lm=lm)


def create_example(row: pd.Series) -> dspy.Example:
    return dspy.Example(
        code_sample_1=row["sample_1"],
        code_sample_2=row["sample_2"],
        plagiarized="Yes" if row["plagiarized"] else "No",
        explanation=row["reason"],
    ).with_inputs("code_sample_1", "code_sample_2")


# Assuming df is your original DataFrame
train_size = int(0.8 * len(df))

train_examples = []
test_examples = []

for i, row in df.iterrows():
    example = create_example(row)
    if i < train_size:
        train_examples.append(example)
    else:
        test_examples.append(example)

In [214]:
class Signature(dspy.Signature):
    """Detect if two code samples are plagiarized. In plagiarized field answer only : Yes if the code samples are plagiarized, No otherwise. In explenation field add the reason why the code samples are/ are not plagiarized."""

    code_sample_1 = dspy.InputField(desc="The first code sample to compare")
    code_sample_2 = dspy.InputField(desc="The second code sample to compare")
    explanation = dspy.OutputField(
        desc="Explanation or reason why the code samples are/ are not plagiarized"
    )
    plagiarized = dspy.OutputField(
        desc="Yes/No indicating if code samples are plagiarized"
    )


class CoT(dspy.Module):
    def __init__(self) -> None:
        super().__init__()
        self.prog = dspy.ChainOfThought(Signature)

    def forward(self, code_sample_1: str, code_sample_2: str) -> Signature:
        return self.prog(code_sample_1=code_sample_1, code_sample_2=code_sample_2)


def validate_answer(
    example: dspy.Example, pred: Signature, trace: object = None
) -> bool:
    score = True if pred.plagiarized.lower() == example.plagiarized.lower() else False
    print(f"Pred: {pred.plagiarized} | Actual: {example.plagiarized} | Match: {score}")
    return score


config = {"max_bootstrapped_demos": 8, "max_labeled_demos": 8}

teleprompter = BootstrapFewShot(metric=validate_answer, **config)
optimized_cot = teleprompter.compile(CoT(), trainset=train_examples)
# CoT()(train_examples[0].code_sample_1, train_examples[0].code_sample_2)

  0%|          | 0/50 [00:00<?, ?it/s]

Pred: Yes | Actual: No | Match: False
Pred: Code Sample 1: import java.util.Scanner; public class T7 { public static void main(String[] args) { Scanner input = new Scanner(System.in); System.out.print("Enter a 4 by 4 matrix row by row: "); double[][] m = new double[4][4]; for (int i = 0; i < 4; i++) | Actual: Yes | Match: False
Pred: Yes | Actual: No | Match: False
Pred: No | Actual: Yes | Match: False
Pred: No | Actual: Yes | Match: False
Pred: Yes | Actual: Yes | Match: True
Pred: No | Actual: No | Match: True
Pred: Yes | Actual: Yes | Match: True
Pred: No | Actual: No | Match: True
Pred: Yes | Actual: Yes | Match: True
Pred: Yes | Actual: Yes | Match: True
Pred: Yes | Actual: Yes | Match: True
Pred: Plagiarized: No | Actual: No | Match: False


 28%|██▊       | 14/50 [00:03<00:10,  3.56it/s]

Pred: Yes | Actual: Yes | Match: True
Bootstrapped 8 full traces after 15 examples in round 0.





In [215]:
# Set up the evaluator, which can be used multiple times.
evaluate = Evaluate(
    devset=test_examples,
    metric=validate_answer,
    num_threads=4,
    display_progress=True,
    display_table=0,
)

# Evaluate our `optimized_cot` program.
evaluate(optimized_cot)

Average Metric: 1 / 1  (100.0):   8%|▊         | 1/13 [00:02<00:31,  2.59s/it]

Pred: No | Actual: No | Match: True


Average Metric: 1 / 2  (50.0):  15%|█▌        | 2/13 [00:03<00:15,  1.45s/it] 

Pred: No | Actual: Yes | Match: False


Average Metric: 2 / 4  (50.0):  23%|██▎       | 3/13 [00:03<00:10,  1.01s/it]

Pred: No | Actual: No | Match: TruePred: Yes | Actual: No | Match: False



Average Metric: 3 / 5  (60.0):  38%|███▊      | 5/13 [00:05<00:07,  1.08it/s]

Pred: Yes | Actual: Yes | Match: True


Average Metric: 4 / 6  (66.7):  46%|████▌     | 6/13 [00:05<00:05,  1.33it/s]

Pred: Yes | Actual: Yes | Match: True


Average Metric: 5 / 7  (71.4):  54%|█████▍    | 7/13 [00:06<00:04,  1.35it/s]

Pred: Yes | Actual: Yes | Match: True


Average Metric: 7 / 9  (77.8):  62%|██████▏   | 8/13 [00:07<00:04,  1.18it/s]

Pred: Yes | Actual: Yes | Match: True
Pred: Yes | Actual: Yes | Match: True


Average Metric: 8 / 10  (80.0):  77%|███████▋  | 10/13 [00:07<00:01,  1.88it/s]

Pred: Yes | Actual: Yes | Match: True


Average Metric: 9 / 11  (81.8):  85%|████████▍ | 11/13 [00:09<00:01,  1.47it/s]

Pred: No | Actual: No | Match: True


Average Metric: 10 / 12  (83.3):  92%|█████████▏| 12/13 [00:10<00:00,  1.17it/s]

Pred: Yes | Actual: Yes | Match: True


Average Metric: 11 / 13  (84.6): 100%|██████████| 13/13 [00:10<00:00,  1.18it/s]

Pred: Yes | Actual: Yes | Match: True





84.62