### Name list generation

In [None]:
import pandas as pd
from transformers import AutoTokenizer, GPT2Tokenizer

names = [
    "Alice", "Bob", "Charlie", "Diana", "Ethan", "Fiona", "George", "Hannah",
    "Isaac", "Julia", "Kevin", "Laura", "Michael", "Nina", "Oliver", "Paula",
    "Quentin", "Rachel", "Samuel", "Tina", "Umar", "Vera", "William", "Xena",
    "Yusuf", "Zara", "Aaron", "Bianca", "Caleb", "Denise", "Elijah", "Faith",
    "Gavin", "Hailey", "Ian", "Jasmine", "Kyle", "Lena", "Martin", "Nora",
    "Owen", "Penelope", "Quinn", "Rose", "Sebastian", "Talia", "Uri", "Valerie",
    "Wesley", "Xander", "Yara", "Zane", "Amira", "Diego", "Liam", "Noah",
    "Sofia", "Mateo", "Aaliyah", "Levi", "Mohammed", "Priya", "Jorge", "Akira",
    "Fatima", "Chang", "Layla", "Anika", "Raj", "Mei", "Imani", "Kenji",
    "Adriana", "Santiago", "Lucia", "Mila", "Khalid", "Jin", "Zane", "Sasha",
    "Giovanni", "Ines", "Tariq", "Naomi", "Reza", "Lina", "Omar", "Tanvi",
    "Marco", "Soraya", "Arjun", "Dmitri", "Salma", "Thiago", "Yuna", "Niko",
    "Carmen", "Ishaan", "Aliyah", "Hiro", "Luca", "Samira", "Fernando", "Alina",
    "Dalia", "Ravi", "Zara", "Emilio", "Kira", "Amina", "Yusuf", "Elena",
    "Andrew", "Bethany", "Clara", "Derek", "Erin", "Frank", "Gloria", "Henry",
    "Irene", "Jack", "Kelsey", "Leon", "Megan", "Nathan", "Olive", "Peter",
    "Rebecca", "Scott", "Travis", "Ulysses", "Vanessa", "Wendy", "Xavier",
    "Yvonne", "Zeke", "Amber", "Brett", "Colin", "Danielle", "Elliot", "Felix",
    "Grant", "Heidi", "Jonah", "Kara", "Landon", "Madeline", "Neil", "Phoebe",
    "Riley", "Shane", "Tiffany", "Victor", "Walter", "Zelda", "Miles", "Chloe",
    "Audrey", "Spencer", "Greta", "Dean", "Natalie", "Brooke", "Trent", "Hope",
    "Logan", "Seth", "Carla", "Graham", "Melanie", "Douglas", "April", "Connor",
    "Mallory", "Eleanor", "Brandon", "Joy", "Harvey", "Celeste",
    "Anouk", "Lorenzo", "Ingrid", "Marek", "Soren", "Frida", "Rafaël", "Katarina",
    "Tomas", "Elsa", "Nikolai", "Greta", "Leandro", "Marta", "Lucien", "Sabine",
    "Petra", "Emil", "Renata", "Dieter", "Alessia", "Joaquín", "Lukas", "Helena",
    "Isolde", "Mateusz", "Beatrix", "Alban", "Nadia", "Stefan", "Camille", "Viktor",
    "Simone", "Astrid", "Jens", "Florian", "Bruno", "Agnes", "Otto", "Liliane",
    "Pascal", "Anastasia", "Sergei", "Ivana", "Karolina", "Bjorn", "Magdalena",
    "Casper", "Milena", "Timo", "Leontine", "Gregor", "Sylvie", "Rocco", "Noemi",
    "Dagmar", "Cecile", "Kristof", "Edda", "Giulia", "Rudolf", "Martina", "Kamil",
    "Zuzana", "Andrei", "Laure", "Thibault", "Rozalia", "Niels", "Mireille",
    "Ashley", "Brady", "Cody", "Dakota", "Emily", "Garrett", "Haley", "Jared",
    "Kayla", "Logan", "Mason", "Natalie", "Peyton", "Reagan", "Savannah", "Tanner",
    "Addison", "Brayden", "Cassidy", "Devon", "Emmett", "Grayson", "Hunter", "Jace",
    "Kaitlyn", "Landon", "Madison", "Noelle", "Parker", "Ryder", "Skylar", "Tyler",
    "Aubrey", "Blake", "Colton", "Delaney", "Easton", "Faith", "Grant", "Hope",
    "Jayden", "Kendall", "Logan", "Morgan", "Nevaeh", "Preston", "Quinton", "Riley",
    "Sierra", "Tristan", "Wes", "Zayden", "Bryce", "Cheyenne", "Dallas", "Elle",
    "Finley", "Gage", "Harper", "Jillian", "Kinsley", "Lane", "Mckenzie", "Oakley",
    "Paxton", "Rowan", "Sadie", "Tucker", "Walker", "Zion"
]


models = [
    'meta-llama/Llama-3.2-3B-Instruct',
    'meta-llama/Llama-3.2-3B',
    'meta-llama/Llama-3.1-8B-Instruct',
    'meta-llama/Llama-3.1-8B',
    'gpt2',
    'google/gemma-2-2b',
    'google/gemma-2-2b-it',
    'Qwen/Qwen3-0.6B',
    'Qwen/Qwen3-4B',
    # 'google/gemma-3-4b-it'
]

def filter_names(tokenizer, names):
    filtered_names = []
    for name in names:
        tokenized_name = tokenizer(name, return_tensors='pt', add_special_tokens=False)

        if len(tokenized_name['input_ids'][0]) == 1:
            filtered_names.append(name)
    return filtered_names

df = pd.read_csv("datasets/templates/reasoning_templates.csv")

names = names + df['answer1'].unique().tolist() + df['answer2'].unique().tolist()

for model in models:
    if model == 'gpt2':
        tokenizer = GPT2Tokenizer.from_pretrained(model)
    else:
        tokenizer = AutoTokenizer.from_pretrained(model)
    # Filter out names that are tokenized as multiple tokens
    names = filter_names(tokenizer, names)

# Save as csv

df = pd.DataFrame(names, columns=['name']).drop_duplicates()


df.to_csv("datasets/templates/names.csv", index=False)

### Dataset generation

In [7]:
# N-way date reasoning

import pandas as pd
import copy
import random
import datetime
from transformers import AutoTokenizer

# Function to get the ordinal suffix for a day
def ordinal(n):
    return f"{n}{'st' if n in (1, 21, 31) else 'nd' if n in (2, 22) else 'rd' if n in (3, 23) else 'th'}"

N = 3
prompt = "{name} {action} on the {date}."
min_date = datetime.datetime(2019, 1, 1)
max_date = datetime.datetime(2019, 12, 31)

names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_date.csv")
actions = actions['action']
# Drop duplicates
actions = actions.drop_duplicates()
# Filter out actions that contain 'his' or 'her'
actions = actions[~actions.str.contains('his|her', case=False)]
actions = actions.tolist()

# Sample (names, date, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a date
    date = min_date + (max_date - min_date) * random.random()
    date = date.replace(hour=0, minute=0, second=0, microsecond=0)
    date_str = f"{ordinal(date.day)} of {date.strftime('%B')}"
    # Sample a context
    context = prompt.format(name=name, date=date_str, action='{action}')
    df_people.append((name, date, context))

df_people = pd.DataFrame(df_people, columns=['name', 'date', 'context'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_ranges = [
        # (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 12, 31)),
        (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 2, 28)),
        (datetime.datetime(2019, 3, 1), datetime.datetime(2019, 4, 30)),
        (datetime.datetime(2019, 5, 1), datetime.datetime(2019, 6, 30)),
        (datetime.datetime(2019, 7, 1), datetime.datetime(2019, 8, 31)),
        (datetime.datetime(2019, 9, 1), datetime.datetime(2019, 10, 31)),
        (datetime.datetime(2019, 11, 1), datetime.datetime(2019, 12, 31)),

        (datetime.datetime(2019, 2, 1), datetime.datetime(2019, 3, 31)),
        (datetime.datetime(2019, 4, 1), datetime.datetime(2019, 5, 31)),
        (datetime.datetime(2019, 6, 1), datetime.datetime(2019, 7, 31)),
        (datetime.datetime(2019, 8, 1), datetime.datetime(2019, 9, 30)),
        (datetime.datetime(2019, 10, 1), datetime.datetime(2019, 11, 30)),
        (datetime.datetime(2019, 12, 1), datetime.datetime(2019, 12, 31)),

    ]
    
    data = []
    for start_date, end_date in date_ranges:
        # Filter the dataframe to get people born in the date range
        df_people_filtered = df_people[(df_people['date'] >= start_date) & (df_people['date'] <= end_date)]

        for _ in range(num_samples//len(date_ranges)):
            action = random.choice(actions)
            sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)
            while sample['date'].nunique() < N or sample['name'].nunique() < N:
                sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)

            contexts = sample['context'].tolist()
            context = " ".join(contexts) + " The first person that {action} was"
            context = context.format(action=action)
            first_idx = sample['date'].idxmin()
            first_date = sample['date'].min()
            first_name = sample['name'].iloc[first_idx]

            row = {}
            for i, (n, d) in enumerate(zip(sample['name'], sample['date'])):
                row[f'name_{i+1}'] = n
                row[f'date_{i+1}'] = d
            row['context'] = context
            row['correct_date'] = first_date
            row['correct'] = first_name

            data.append(row)
    
    return pd.DataFrame(data)

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/date_{N}way.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51


In [16]:
# N-way temporal reasoning sentences with seasons

import pandas as pd
import copy
import random
import datetime
from transformers import AutoTokenizer
from tqdm import tqdm

# Function to get the ordinal suffix for a day
def ordinal(n):
    return f"{n}{'st' if n in (1, 21, 31) else 'nd' if n in (2, 22) else 'rd' if n in (3, 23) else 'th'}"

season_to_label = {
    'winter': 0,
    'spring': 1,
    'summer': 2,
    'fall': 3
}

N = 3
prompt = "{name} {action} on the {date}."
min_date = datetime.datetime(2019, 1, 1)
max_date = datetime.datetime(2019, 12, 31)

winter_start = datetime.datetime(2019, 12, 21)
winter_end = datetime.datetime(2019, 3, 20)
spring_start = datetime.datetime(2019, 3, 21)
spring_end = datetime.datetime(2019, 6, 20)
summer_start = datetime.datetime(2019, 6, 21)
summer_end = datetime.datetime(2019, 9, 22)
fall_start = datetime.datetime(2019, 9, 23)
fall_end = datetime.datetime(2019, 12, 20)

seasons = {
    'winter': (winter_start, winter_end),
    'spring': (spring_start, spring_end),
    'summer': (summer_start, summer_end),
    'fall': (fall_start, fall_end)
}

names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_date.csv")
actions = actions['action']
actions = actions.drop_duplicates()
actions = actions[~actions.str.contains('his|her', case=False)]
actions = actions.tolist()

# Sample (names, date, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a date
    date = min_date + (max_date - min_date) * random.random()
    date = date.replace(hour=0, minute=0, second=0, microsecond=0)
    date_str = f"{ordinal(date.day)} of {date.strftime('%B')}"
    season = None
    if date >= winter_start or date <= winter_end:
        season = 'winter'
    elif date >= spring_start and date <= spring_end:
        season = 'spring'
    elif date >= summer_start and date <= summer_end:
        season = 'summer'
    elif date >= fall_start and date <= fall_end:
        season = 'fall'
    # Sample a context
    context = prompt.format(name=name, date=date_str, action='{action}')
    df_people.append((name, date, context, season))

df_people = pd.DataFrame(df_people, columns=['name', 'date', 'context', 'season'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_ranges = [
        (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 12, 31)),
    ]
    
    data = []
    for start_date, end_date in date_ranges:
        # Filter the dataframe to get people born in the date range
        df_people_filtered = df_people[(df_people['date'] >= start_date) & (df_people['date'] <= end_date)]

        for _ in tqdm(range(num_samples//len(date_ranges))):
            action = random.choice(actions)
            sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)
            # Sample N unique names and dates, of which one is in a unique season
            while sample['date'].nunique() < N or sample['name'].nunique() < N or sample['season'].value_counts().min() > 1:
                sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)

            # Find the person with the unique season
            unique_season = sample['season'].value_counts().idxmin()
            answer_person = sample[sample['season'] == unique_season]
            # answer_idx = answer_person.index[0]
            answer_date = answer_person['date'].iloc[0]
            answer_name = answer_person['name'].iloc[0]
            answer_season = answer_person['season'].iloc[0]

            contexts = sample['context'].tolist()
            # context = " ".join(contexts) + " The only person born in " + answer_season + " is"
            
            context = " ".join(contexts) + " The only person that {action} in {answer_season} is"
            context = context.format(action=action, answer_season=answer_season)



            row = {}
            alternatives = []
            for i, (n, d) in enumerate(zip(sample['name'], sample['date'])):
                row[f'name_{i+1}'] = n
                row[f'date_{i+1}'] = d
                alternatives.append(n)
            row['alternatives'] = alternatives
            row['context'] = context
            row['correct_date'] = answer_date
            row['correct_season'] = answer_season
            row['correct_season_label'] = season_to_label[answer_season]
            row['correct'] = answer_name

            data.append(row)
    
    return pd.DataFrame(data)

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/date_{N}way_season.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51


  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:00<00:00, 1170.46it/s]


In [15]:
# N-way temporal reasoning sentences with temperature

import pandas as pd
import copy
import random
import datetime
from transformers import AutoTokenizer
from tqdm import tqdm

# Function to get the ordinal suffix for a day
def ordinal(n):
    return f"{n}{'st' if n in (1, 21, 31) else 'nd' if n in (2, 22) else 'rd' if n in (3, 23) else 'th'}"

def classify_temperature(date):
    cold_months = [11, 12, 1, 2, 3, 4]
    if date.month in cold_months:
        return 'cold'
    else:
        return 'warm'

temperature_to_label = {
    'cold': 0,
    'warm': 1
}

N = 3
prompt = "{name} {action} on the {date}."
min_date = datetime.datetime(2019, 1, 1)
max_date = datetime.datetime(2019, 12, 31)

names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_date.csv")
actions = actions['action']
actions = actions.drop_duplicates()
actions = actions[~actions.str.contains('his|her', case=False)]
actions = actions.tolist()

# Sample (names, date, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a date
    date = min_date + (max_date - min_date) * random.random()
    date = date.replace(hour=0, minute=0, second=0, microsecond=0)
    date_str = f"{ordinal(date.day)} of {date.strftime('%B')}"
    temperature = classify_temperature(date)
    # Sample a context
    context = prompt.format(name=name, date=date_str, action='{action}')
    df_people.append((name, date, context, temperature))

df_people = pd.DataFrame(df_people, columns=['name', 'date', 'context', 'temperature'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_ranges = [
        (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 12, 31)),
    ]
    
    data = []
    for start_date, end_date in date_ranges:
        # Filter the dataframe to get people born in the date range
        df_people_filtered = df_people[(df_people['date'] >= start_date) & (df_people['date'] <= end_date)]

        for _ in tqdm(range(num_samples//len(date_ranges))):
            action = random.choice(actions)
            sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)
            # Sample N unique names and dates, of which one is in a unique temperature
            while sample['date'].nunique() < N or sample['name'].nunique() < N or sample['temperature'].value_counts().min() > 1:
                sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)

            # Find the person with the unique temperature
            unique_temperature = sample['temperature'].value_counts().idxmin()
            answer_person = sample[sample['temperature'] == unique_temperature]
            # answer_idx = answer_person.index[0]
            answer_date = answer_person['date'].iloc[0]
            answer_name = answer_person['name'].iloc[0]
            answer_temperature = answer_person['temperature'].iloc[0]

            contexts = sample['context'].tolist()
            # context = " ".join(contexts) + " The only person born in " + answer_temperature + " is"
            
            context = " ".join(contexts) + " The only person that {action} in a {answer_temperature} month is"
            context = context.format(action=action, answer_temperature=answer_temperature)

            row = {}
            alternatives = []
            for i, (n, d) in enumerate(zip(sample['name'], sample['date'])):
                row[f'name_{i+1}'] = n
                row[f'date_{i+1}'] = d
                alternatives.append(n)
            row['alternatives'] = alternatives
            row['context'] = context
            row['correct_date'] = answer_date
            row['correct_temperature'] = answer_temperature
            row['correct_temperature_label'] = temperature_to_label[answer_temperature]
            row['correct'] = answer_name

            data.append(row)
    
    return pd.DataFrame(data)

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/date_{N}way_temperature.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51


  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:00<00:00, 1030.18it/s]


In [14]:
# N-way temporal reasoning sentences with months

import pandas as pd
import copy
import random
import datetime
from transformers import AutoTokenizer
from tqdm import tqdm

# Function to get the ordinal suffix for a day
def ordinal(n):
    return f"{n}{'st' if n in (1, 21, 31) else 'nd' if n in (2, 22) else 'rd' if n in (3, 23) else 'th'}"


N = 3
prompt = "{name} {action} on the {date}."
min_date = datetime.datetime(2019, 1, 1)
max_date = datetime.datetime(2019, 12, 31)


names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_date.csv")
actions = actions['action']
actions = actions.drop_duplicates()
actions = actions[~actions.str.contains('his|her', case=False)]
actions = actions.tolist()

# Sample (names, date, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a date
    date = min_date + (max_date - min_date) * random.random()
    date = date.replace(hour=0, minute=0, second=0, microsecond=0)
    date_str = f"{ordinal(date.day)} of {date.strftime('%B')}"
    month = date.strftime('%B')

    # Sample a context
    context = prompt.format(name=name, date=date_str, action='{action}')
    df_people.append((name, date, context, month))

df_people = pd.DataFrame(df_people, columns=['name', 'date', 'context', 'month'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_ranges = [
        (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 12, 31)),
    ]
    
    data = []
    for start_date, end_date in date_ranges:
        # Filter the dataframe to get people born in the date range
        df_people_filtered = df_people[(df_people['date'] >= start_date) & (df_people['date'] <= end_date)]

        for _ in tqdm(range(num_samples//len(date_ranges))):
            action = random.choice(actions)
            sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)
            # Sample N unique names and dates, of which one is in a unique month
            while sample['date'].nunique() < N or sample['name'].nunique() < N or sample['month'].value_counts().min() > 1:
                sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)

            # Find the person with the unique month
            unique_month = sample['month'].value_counts().sample(frac=1.0).idxmin()
            answer_person = sample[sample['month'] == unique_month]
            # answer_idx = answer_person.index[0]
            answer_date = answer_person['date'].iloc[0]
            answer_name = answer_person['name'].iloc[0]
            answer_month = answer_person['month'].iloc[0]

            contexts = sample['context'].tolist()
            context = " ".join(contexts)
            context = context + " The only person that {action} in {answer_month} is"
            context = context.format(action=action, answer_month=answer_month)
            
            row = {}
            alternatives = []
            for i, (n, d) in enumerate(zip(sample['name'], sample['date'])):
                row[f'name_{i+1}'] = n
                row[f'date_{i+1}'] = d
                alternatives.append(n)
            row['alternatives'] = alternatives
            row['context'] = context
            row['correct_date'] = answer_date
            row['correct_month'] = answer_month
            row['correct_month_label'] = answer_date.month - 1
            row['correct'] = answer_name

            data.append(row)
    
    return pd.DataFrame(data)

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/date_{N}way_month.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51


  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:00<00:00, 1128.23it/s]


In [None]:
# Time of day reasoning - actions

import pandas as pd
import copy
import random
import datetime

N = 3
prompt = "{name} {action} at {time}."
time_start = datetime.time(hour=0, minute=0)
time_end = datetime.time(hour=23, minute=59)

names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_time_of_day.csv")
actions = actions.drop_duplicates()
# Drops actions that contain 'his' or 'her' across actions_present, actions_past
actions = actions[~actions['actions_present'].str.contains('his|her', case=False)]
actions = actions[~actions['actions_past'].str.contains('his|her', case=False)]
actions = actions.values.tolist()

# Create a list of all times of day between time_start and time_end with 15 minute intervals
times = []
for hour in range(time_start.hour, time_end.hour + 1):
    for minute in range(0, 60, 15):
        if hour == time_start.hour and minute < time_start.minute:
            continue
        if hour == time_end.hour and minute > time_end.minute:
            continue
        times.append(datetime.time(hour, minute))

# Sample (names, date, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a time from list
    time = random.choice(times)
    if time.hour < 10 and time.hour != 0:
        time_str = time.strftime("%-H:%M").lower()
    else:
        time_str = time.strftime("%H:%M").lower()

    # Generate the relevant context
    context = prompt.format(name=name, time=time_str, action='{action_present}')
    df_people.append((name, time, context))

df_people = pd.DataFrame(df_people, columns=['name', 'time', 'context'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_change_count = 0
    data = []
    for _ in range(num_samples):
        action_present, action_past = random.choice(actions)
        date_change_flag = False
        # Sample random time in the time_start and time_end range
        # sample from any possible value, not just the ones in times
        current_time = datetime.time(random.randint(0, 23), random.randint(0, 59))
        if current_time.hour < 10 and current_time.hour != 0:
            current_time_str = current_time.strftime("%-H:%M").lower()
        else:
            current_time_str = current_time.strftime("%H:%M").lower()

        # Sample random date in the date_start and date_end range
        sample = df_people.sample(n=N, replace=False).reset_index(drop=True)
        while sample['time'].nunique() < N or sample['name'].nunique() < N:
            sample = df_people.sample(n=N, replace=False).reset_index(drop=True)

        contexts = sample['context'].tolist()
        context = " ".join(contexts) + " It is now {current_time_str}. The last person who {action_past} is"
        # context = " ".join(contexts) + f" Today is the {today_date}. The next person to celebrate their birthday is"
        # context = " ".join(contexts) + f" Today is the 31st of December. The next person to celebrate their birthday is"
        context = context.format(action_present=action_present, action_past=action_past, current_time_str=current_time_str)

        # Find the person with the next birthday
        next_time = sample[sample['time'] < current_time].sort_values(by='time', ascending=False).head(1)
        if next_time.empty:
            date_change_count += 1
            date_change_flag = True
            next_time = sample.sort_values(by='time', ascending=False).head(1)
        answer_person = next_time
        # answer_idx = answer_person.index[0]
        answer_time = answer_person['time'].iloc[0]
        answer_name = answer_person['name'].iloc[0]        

        row = {}
        alternatives = []
        for i, (n, d) in enumerate(zip(sample['name'], sample['time'])):
            row[f'name_{i+1}'] = n
            row[f'time_{i+1}'] = d
            alternatives.append(n)
        row['context'] = context
        row['date_change'] = date_change_flag
        row['alternatives'] = alternatives
        row['correct_time'] = answer_time
        row['correct_time_expr'] = answer_time.strftime("%-H:%M").lower()
        # Time diff in minutes
        row['correct_time_diff'] = abs(datetime.datetime.combine(datetime.date.today(), answer_time) - datetime.datetime.combine(datetime.date.today(), current_time)).seconds // 60
        row['correct'] = answer_name
        row['time_idx_start'] = context.find(row['correct_time_expr'])
        row['time_idx_end'] = row['time_idx_start'] + len(row['correct_time_expr'])

        data.append(row)
    print(f"Date change count: {date_change_count}")
    print(f"Percentage of date change: {date_change_count/num_samples}")
    
    return pd.DataFrame(data)

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/time_of_day_{N}way.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51
Date change count: 219
Percentage of date change: 0.219


In [32]:
# Time of day reasoning - phases

import pandas as pd
import copy
import random
import datetime

def classify_time_of_day(time):
    if time.hour < 6:
        return 'night'
    elif time.hour < 12:
        return 'morning'
    elif time.hour < 18:
        return 'afternoon'
    else:
        return 'evening'

phase_to_label = {
    'night': 0,
    'morning': 1,
    'afternoon': 2,
    'evening': 3
}

N = 3
prompt = "{name} {action} at {time}."
time_start = datetime.time(hour=0, minute=0)
time_end = datetime.time(hour=23, minute=59)

names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_time_of_day.csv")
actions = actions.drop_duplicates()
# Drops actions that contain 'his' or 'her' across actions_present, actions_past
actions = actions[~actions['actions_present'].str.contains('his|her', case=False)]
actions = actions[~actions['actions_past'].str.contains('his|her', case=False)]
actions = actions.values.tolist()

# Create a list of all times of day between time_start and time_end with 15 minute intervals
times = []
for hour in range(time_start.hour, time_end.hour + 1):
    for minute in range(0, 60, 15):
        if hour == time_start.hour and minute < time_start.minute:
            continue
        if hour == time_end.hour and minute > time_end.minute:
            continue
        times.append(datetime.time(hour, minute))

# Sample (names, date, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a time from list
    time = random.choice(times)
    if time.hour < 10 and time.hour != 0:
        time_str = time.strftime("%-H:%M").lower()
    else:
        time_str = time.strftime("%H:%M").lower()

    # Classify the phase of the day
    phase = classify_time_of_day(time)

    # Generate the relevant context
    context = prompt.format(name=name, time=time_str, action='{action_present}')
    df_people.append((name, time, context, phase))

df_people = pd.DataFrame(df_people, columns=['name', 'time', 'context', 'phase'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_change_count = 0
    data = []
    for _ in range(num_samples):
        action_present, action_past = random.choice(actions)

        # Sample random date in the date_start and date_end range
        sample = df_people.sample(n=N, replace=False).reset_index(drop=True)
        while sample['time'].nunique() < N or sample['name'].nunique() < N or sample['phase'].value_counts().min() > 1:
            sample = df_people.sample(n=N, replace=False).reset_index(drop=True)

        # Find the person with the unique phase
        unique_phase = sample['phase'].value_counts().idxmin()
        answer_person = sample[sample['phase'] == unique_phase]
        # answer_idx = answer_person.index[0]
        answer_time = answer_person['time'].iloc[0]
        answer_name = answer_person['name'].iloc[0]
        answer_phase = answer_person['phase'].iloc[0]
        answer_phase_label = phase_to_label[answer_phase]

        contexts = sample['context'].tolist()
        context = " ".join(contexts) + " The only person that {action_present} in the {answer_phase} is"
        # context = " ".join(contexts) + f" Today is the {today_date}. The next person to celebrate their birthday is"
        # context = " ".join(contexts) + f" Today is the 31st of December. The next person to celebrate their birthday is"
        context = context.format(action_present=action_present, action_past=action_past, answer_phase=answer_phase)

        row = {}
        alternatives = []
        for i, (n, d) in enumerate(zip(sample['name'], sample['time'])):
            row[f'name_{i+1}'] = n
            row[f'time_{i+1}'] = d
            alternatives.append(n)
        row['context'] = context
        row['alternatives'] = alternatives
        row['correct_time'] = answer_time
        row['correct_time_expr'] = answer_time.strftime("%-H:%M").lower()
        row['correct'] = answer_name
        row['correct_phase'] = answer_phase
        row['correct_phase_label'] = answer_phase_label
        row['time_idx_start'] = context.find(row['correct_time_expr'])
        row['time_idx_end'] = row['time_idx_start'] + len(row['correct_time_expr'])
        
        data.append(row)    
    return pd.DataFrame(data)

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/time_of_day_{N}way_phase.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51


In [18]:
# Duration reasoning sentences

import pandas as pd
import copy
import random
import datetime
from transformers import AutoTokenizer

# Function to get the ordinal suffix for a day
def ordinal(n):
    return f"{n}{'st' if n in (1, 21, 31) else 'nd' if n in (2, 22) else 'rd' if n in (3, 23) else 'th'}"

N = 3
# prompt = "{name}'s subscription starts on the {date} and lasts for {duration}."
min_date = datetime.datetime(2019, 1, 1)
max_date = datetime.datetime(2019, 12, 31)

names = pd.read_csv("datasets/templates/names.csv")
names = names['name'].tolist()
print(names)
print(len(names))

actions = pd.read_csv("datasets/generated/actions_duration.csv")
actions = actions.drop_duplicates()
actions = actions[~actions['action'].str.contains('his|her', case=False)]

duration_days = ['1 day', '2 days', '3 days', '4 days', '5 days', '6 days', '7 days', '8 days', '9 days', '10 days']
length_days = [datetime.timedelta(days=1), datetime.timedelta(days=2), datetime.timedelta(days=3),
               datetime.timedelta(days=4), datetime.timedelta(days=5), datetime.timedelta(days=6),
               datetime.timedelta(days=7), datetime.timedelta(days=8), datetime.timedelta(days=9), datetime.timedelta(days=10)]
duration_weeks = ['1 week', '2 weeks', '3 weeks', '4 weeks',
                  '7 days', '10 days', '14 days', '21 days', '25 days', '30 days']
length_weeks = [datetime.timedelta(weeks=1), datetime.timedelta(weeks=2), datetime.timedelta(weeks=3), datetime.timedelta(weeks=4),
                    datetime.timedelta(days=7), datetime.timedelta(days=10), datetime.timedelta(days=14),
                    datetime.timedelta(days=21), datetime.timedelta(days=25), datetime.timedelta(days=30)]
duration_months = ['1 month', '2 months', '3 months', '4 months', '6 months', '8 months',
                    '4 weeks', '6 weeks', '8 weeks', '10 weeks']
length_months = [datetime.timedelta(days=30), datetime.timedelta(days=30*2), datetime.timedelta(days=30*3), datetime.timedelta(days=30*4),
                    datetime.timedelta(days=30*6), datetime.timedelta(days=30*8),
                    datetime.timedelta(weeks=4), datetime.timedelta(weeks=6), datetime.timedelta(weeks=8), datetime.timedelta(weeks=10)]
duration_years = ['1 year', '2 years', '3 years', '4 years',
                  '12 months', '18 months', '24 months', '36 months']
length_years = [datetime.timedelta(days=365), datetime.timedelta(days=365*2), datetime.timedelta(days=365*3), datetime.timedelta(days=365*4),
                    datetime.timedelta(days=30*12), datetime.timedelta(days=30*18), datetime.timedelta(days=30*24), datetime.timedelta(days=30*36)]

# Sample (names, date, duration, context)
df_people = []
for i in range(1000):
    # Sample a name
    name = random.choice(names)
    # Sample a date
    date = min_date + (max_date - min_date) * random.random()
    date = date.replace(hour=0, minute=0, second=0, microsecond=0)
    date_str = f"{ordinal(date.day)} of {date.strftime('%B')}"

    # Sample a duration type
    duration_type, durations, durations_str = random.choice([('days',length_days, duration_days),
                                                ('weeks', length_weeks, duration_weeks),
                                                ('months', length_months, duration_months),
                                                ('years', length_years, duration_years)])
    # Sample a duration
    idx_duration = random.randint(0, len(durations)-1)
    duration = durations[idx_duration]
    duration_str = durations_str[idx_duration]
    duration_length = duration.days

    # Compute the corresponding end date
    # NB: this is only approximate, as it does not take into account leap years or month lengths
    end_date = date + duration

    # Generate the relevant context
    # context = prompt.format(name=name, date=date_str, duration=duration_str, action='{action}')
    df_people.append((name, date, end_date, duration, duration_str, duration_length, duration_type))

df_people = pd.DataFrame(df_people, columns=['name', 'date', 'end_date', 'duration', 'duration_str', 'duration_length', 'duration_type'])
df_people = df_people.drop_duplicates()

def create_sampled_df(df_people, N, num_samples):
    date_ranges = [
        # (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 12, 31)),
        (datetime.datetime(2019, 1, 1), datetime.datetime(2019, 2, 28)),
        (datetime.datetime(2019, 3, 1), datetime.datetime(2019, 4, 30)),
        (datetime.datetime(2019, 5, 1), datetime.datetime(2019, 6, 30)),
        (datetime.datetime(2019, 7, 1), datetime.datetime(2019, 8, 31)),
        (datetime.datetime(2019, 9, 1), datetime.datetime(2019, 10, 31)),
        (datetime.datetime(2019, 11, 1), datetime.datetime(2019, 12, 31)),

        (datetime.datetime(2019, 2, 1), datetime.datetime(2019, 3, 31)),
        (datetime.datetime(2019, 4, 1), datetime.datetime(2019, 5, 31)),
        (datetime.datetime(2019, 6, 1), datetime.datetime(2019, 7, 31)),
        (datetime.datetime(2019, 8, 1), datetime.datetime(2019, 9, 30)),
        (datetime.datetime(2019, 10, 1), datetime.datetime(2019, 11, 30)),
        (datetime.datetime(2019, 12, 1), datetime.datetime(2019, 12, 31)),
    ]
    
    data = []
    for start_date, end_date in date_ranges:
        # Filter the dataframe to get people born in the date range
        df_people_filtered = df_people[(df_people['date'] >= start_date) & (df_people['date'] <= end_date)]

        for _ in range(num_samples//len(date_ranges)):

            # Sample random date in the date_start and date_end range
            # target_date = date_start + datetime.timedelta(days=random.randint(0, 364))


            sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)
            while sample['date'].nunique() < N or sample['name'].nunique() < N or sample['duration_type'].nunique() > 1:
                sample = df_people_filtered.sample(n=N, replace=False).reset_index(drop=True)
            
            duration_type = sample['duration_type'].iloc[0]

            # Sample a random action compatible with the duration type
            action = actions.sample()
            while action[duration_type].item() is False:
                action = actions.sample()

            context = action['action'].item()
            contexts = [context.format(name=s['name'], date=f"{ordinal(s['date'].day)} of {s['date'].strftime('%B')}",
                                        duration=s['duration_str']) for i, s in sample.iterrows()]
            context = " ".join(contexts) + f" The person whose {action['activity'].item()} ends first is"

            answer_person = sample.sort_values(by='end_date').head(1).iloc[0]
            answer_date = answer_person['date']
            answer_end_date = answer_person['end_date']
            answer_duration = answer_person['duration']
            answer_duration_str = answer_person['duration_str']
            answer_duration_length = answer_person['duration_length']
            answer_name = answer_person['name']

            row = {}
            for i, (n, d, dur_str) in enumerate(zip(sample['name'], sample['date'], sample['duration_str'])):
                row[f'name_{i+1}'] = n
                row[f'date_{i+1}'] = d
                row[f'duration_str_{i+1}'] = dur_str
            row['context'] = context
            row['duration_type'] = duration_type
            # row['year_change'] = False if min_col == 'diff' else True
            # row['distance'] = answer_person[min_col].days
            row['correct_date'] = answer_date
            row['correct_date_expr'] = f"the {ordinal(answer_date.day)} of {answer_date.strftime('%B')}"
            row['correct_end_date'] = answer_end_date
            row['correct_duration'] = answer_duration
            row['correct_duration_str'] = answer_duration_str
            row['correct_duration_length'] = answer_duration_length
            row['correct_month'] = answer_date.strftime('%B')
            row['correct'] = answer_name

            data.append(row)
    data = pd.DataFrame(data)
    # print(f"Year change count: {data['year_change'].sum()}")
    # print(f"Percentage of year change: {data['year_change'].sum()/num_samples}")
    
    return data

# Example usage
df_sampled = create_sampled_df(df_people, N=N, num_samples=1000)

df_sampled.to_csv(f"datasets/templates/duration_{N}way.csv", index=False)

['Alice', 'Bob', 'Charlie', 'George', 'Kevin', 'Laura', 'Michael', 'Rachel', 'William', 'Aaron', 'Ian', 'Kyle', 'Martin', 'Rose', 'Marco', 'Andrew', 'Frank', 'Henry', 'Jack', 'Leon', 'Peter', 'Scott', 'Grant', 'Neil', 'Dean', 'Hope', 'April', 'Connor', 'Brandon', 'Joy', 'Emily', 'Hunter', 'Tyler', 'Blake', 'Dallas', 'Walker', 'John', 'Fred', 'Steve', 'Matt', 'Luke', 'Richard', 'Maria', 'Jerry', 'Robert', 'Mark', 'Max', 'Jason', 'Alex', 'Josh', 'Ryan']
51
