## Libraries

In [2]:
import json
import pandas as pd
import numpy as np
import os

## Config

In [3]:
# Load the config file
with open('../config/config.json', 'r') as f:
    config = json.load(f)

file_path = config["data_loc"]

 ## Datasets

### Inference batch

In [9]:
# Define file path
file_name = "test.csv"
final_path = os.path.join(file_path, file_name) 

# Load tsv file
test_df = pd.read_csv(final_path, encoding='windows-1252')
print(f"The test set has {test_df.shape[0]:,} observations and {test_df.shape[1]:,} columns.")
test_df.head()

The test set has 30,466 observations and 7 columns.


Unnamed: 0,Experiment,Topic,ID,Question,Response,CorrectAnswer,label
0,6,Statistics,6263701,What kind of bias can arise if the researcher ...,response bias from a leading question,Response bias,1
1,6,Statistics,6263701,What kind of bias can be produced by participa...,refusal bias,Nonresponse bias,-1
2,6,Statistics,6263701,Over- or under-representation of some groups i...,sample bias,Selection bias,-1
3,6,Statistics,6263701,A sample is unbiased if it is ___________ of t...,representative,Representative,1
4,6,Physics,6264278,"In 2014, where did the US Navy test a Laser We...",space,The Persian Gulf,-1


### Training Corpus

In [None]:
# from langchain_ollama.llms import OllamaLLM
# from langchain_core.prompts import ChatPromptTemplate

In [None]:
# Define file path
file_name = "train.csv"
final_path = os.path.join(file_path, file_name) 

# Load json file
df = pd.read_csv(final_path, encoding='windows-1252')
print("Shape of the train set is {:,} oservations and {} columns.\n".format(df.shape[0], df.shape[1]))
df.head()

Shape of the train set is 2,250 oservations and 7 columns.



Unnamed: 0,Experiment,Topic,ID,Question,Response,CorrectAnswer,label
0,1,Physics,104,How thin can a fiber optic be?,a strand of hair,As thin as a human hair,1
1,1,Physics,126,How thin can a fiber optic be?,Really thin and small,As thin as a human hair,-1
2,1,Physics,130,How thin can a fiber optic be?,as thin as a human hair,As thin as a human hair,1
3,1,Physics,131,How thin can a fiber optic be?,Very thin smaller than a pice of hair,As thin as a human hair,1
4,1,Physics,156,How thin can a fiber optic be?,Less than the width of a human hair,As thin as a human hair,1


### Define Example function

In [None]:
def few_shot_format(row):
    """
    Create formatted string for a single few-shot example.
    """
    output_format = f"""---
    Question: {row['Question']},
    Response: {row['Response']},
    Correct Answer: {row['CorrectAnswer']},
    Label: {row['label']} \n"""

    return output_format

# Create few shot examples
def create_few_shot_examples(df, n=2):
    """
    Randomly sample n examples from each label (1, 0, -1)
    """
    
    # Group by label
    label_groups = {label: df[df['label'] == label] for label in [1, 0, -1]}

    # Sample n rows per label, with replacement if needed
    sampled_dfs = []
    for label, group_df in label_groups.items():
        if group_df.shape[0] < n:
            sampled = group_df.sample(n=n, replace=True, random_state=42)
        else:
            sampled = group_df.sample(n=n, random_state=42)
        sampled_dfs.append(sampled)

    # Concatenate the sampled DataFrames
    sampled_df = pd.concat(sampled_dfs).reset_index(drop=True)
    return sampled_df

In [None]:
few_shot_df = create_few_shot_examples(df, n=3)

prompt_context = "\n".join(few_shot_df.apply(few_shot_format, axis=1).tolist())
print(prompt_context)

---
    Question: In 2014, where did the US Navy test a Laser Weapon System?,
    Response: The Persian gulf or the ,
    Correct Answer: The Persian Gulf,
    Label: 1 

---
    Question: What is the Strategic Defense Initiative (SDI) commonly known as?,
    Response: Reagans program. Star wars?,
    Correct Answer: The "Star Wars program",
    Label: 1 

---
    Question: What industry is one of the biggest user of lasers?,
    Response: healthcare, military,
    Correct Answer: The military,
    Label: 1 

---
    Question: What is the advantage of liquid dye lasers?,
    Response: You can easily change the frequency of the light compared to the others.,
    Correct Answer: You can make lasers with many different colors,
    Label: 0 

---
    Question: Over- or under-representation of some groups in the population produces a ___________ bias.,
    Response: selective,
    Correct Answer: Selection bias,
    Label: 0 

---
    Question: Name one property of a good sample. ,
    Resp

In [65]:
label1 = df[df['label'] == 1].reset_index(drop=True)
label0 = df[df['label'] == 0].reset_index(drop=True)
labelNeg1 = df[df['label'] == -1].reset_index(drop=True)

print(label1.shape[0], label0.shape[0], labelNeg1.shape[0])

1185 63 1002


In [66]:
label0.head()

Unnamed: 0,Experiment,Topic,ID,Question,Response,CorrectAnswer,label
0,1,Physics,130,Why are lasers cheaper than conventional missi...,they dont have to be remanufactured and stored,Because the only cost is the energy it takes t...,0
1,1,Physics,192,Why are lasers cheaper than conventional missi...,they take up less space and are easier to make,Because the only cost is the energy it takes t...,0
2,1,Physics,130,What industry is one of the biggest user of la...,defense,The military,0
3,1,Statistics,330,What kind of bias can be produced by participa...,non-answer bias,Nonresponse bias,0
4,1,Statistics,240,"In this form of sampling, the population is di...",strata samp,Stratified Sampling,0


In [None]:

label1_n = label0.shape[0]

rand_num = random.randint(0, label1_n - 1)
rand_num

62

In [71]:
label0.iloc[rand_num]['Question']

"What kind of bias can be produced by participants' unwillingness to participate?"

## Exploratory Analysis

In [21]:
len(df['Experiment'].unique())

1

In [19]:
len(df['Question'].unique())

36

In [20]:
len(df['Topic'].unique())

2

In [29]:
df[['Topic', 'Question']].drop_duplicates().reset_index(drop=True)

Unnamed: 0,Topic,Question
0,Physics,How thin can a fiber optic be?
1,Physics,A common application of lasers in medicine is ...
2,Physics,What is the Strategic Defense Initiative (SDI)...
3,Physics,"In 2014, where did the US Navy test a Laser We..."
4,Physics,Why are lasers cheaper than conventional missi...
5,Physics,What industry is one of the biggest user of la...
6,Statistics,Name one potential disadvantage of experiments.
7,Statistics,What sampling method ensures that every member...
8,Statistics,Over- or under-representation of some groups i...
9,Statistics,What kind of bias can be produced by participa...


### Document Distribution

In [None]:
import matplotlib.pyplot as plt

Label=1 means the response is correct. Label=0 means the response is similar to the correct answer but not precise. Label=-1 means the response is incorrect.

In [22]:
df['label'].value_counts()

label
 1    1185
-1    1002
 0      63
Name: count, dtype: int64

### Ideas