<a href="https://colab.research.google.com/github/brendanpshea/logic-prolog/blob/main/Werewolf_Python_Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np

# Setting the seed for reproducibility
np.random.seed(42)

# Constants
N = 1000  # Total number of students
WEREWOLF_PERCENTAGE = 0.25
N_WEREWOLVES = int(N * WEREWOLF_PERCENTAGE)
N_HUMANS = N - N_WEREWOLVES

# Sex distribution
sex = ['Male', 'Female']
students_sex = np.random.choice(sex, N)

# Height distribution (in inches)
# Assuming average heights for males and females are 69 and 64 inches respectively
# Standard deviation assumed to be 3 inches for both
heights_human = np.where(students_sex == 'Male',
                         np.random.normal(69, 3, N),
                         np.random.normal(64, 3, N))

# Werewolves are around 2 inches taller
heights_werewolf = heights_human + np.random.normal(2, 0.5, N)

# Eye color
eye_colors = ['Brown', 'Blue', 'Green', 'Grey']
eye_colors_werewolf = eye_colors + ['Yellow']

# Tardy days
# Assuming an average of 5 tardy days with a standard deviation of 2
tardy_days_human = np.random.normal(5, 2, N)
tardy_days_werewolf = tardy_days_human + 3

# GPA
# Normally distributed with mean 3.2 and standard deviation 0.5
gpa = np.random.normal(3.2, 0.5, N)

# Number of werewolf parents
def werewolf_parents(is_werewolf):
    if is_werewolf:
        return np.random.choice([0, 1, 2], p=[0.76, 0.2, 0.04])  # Corrected probabilities for werewolves
    else:
        return np.random.choice([0, 1, 2], p=[0.96, 0.03, 0.01])  # Corrected probabilities for non-werewolves

# Detentions (Pareto distribution)
alpha = 1.16  # Alpha parameter for the Pareto distribution
detentions = (np.random.pareto(alpha, N) * 3).astype(int)

# Assign werewolf status and modify characteristics accordingly
is_werewolf = np.array([True] * N_WEREWOLVES + [False] * N_HUMANS)
np.random.shuffle(is_werewolf)

# Adjusting heights and tardy days based on werewolf status
heights = np.where(is_werewolf, heights_werewolf, heights_human)
tardy_days = np.where(is_werewolf, tardy_days_werewolf, tardy_days_human).astype(int)

# Assigning eye colors
eye_colors_final = [np.random.choice(eye_colors_werewolf if is_wolf else eye_colors) for is_wolf in is_werewolf]

# Assigning number of werewolf parents with corrected probabilities
parents = [werewolf_parents(iw) for iw in is_werewolf]

# Detentions are associated with lower gpa
gpa -= 0.01 * detentions  # Element-wise subtraction
# Cap at 4.0
gpa = np.minimum(gpa, 4.0)

# Creating the DataFrame with all corrected data
df = pd.DataFrame({
    'Sex': students_sex,
    'Height': heights,
    'EyeColor': eye_colors_final,
    'TardyDays': tardy_days,
    'GPA': gpa,
    'WerewolfParents': parents,
    'Detentions': detentions,
    'IsWerewolf': is_werewolf
})

df = df.round(2)

# CSV file path
csv_file_path = 'high_school_werewolf_data.csv'

# Saving to CSV
df.to_csv(csv_file_path, index=False)


In [18]:
df.head(20)

Unnamed: 0,Sex,Height,EyeColor,TardyDays,GPA,WerewolfParents,Detentions,IsWerewolf
0,Male,70.03,Brown,3,2.67,0,19,False
1,Female,68.68,Grey,5,2.48,0,0,False
2,Male,74.3,Brown,9,3.26,1,1,True
3,Male,69.79,Yellow,8,3.46,0,3,True
4,Male,67.38,Green,4,3.46,0,0,True
5,Female,63.5,Brown,6,3.06,0,55,True
6,Male,66.29,Green,7,3.14,0,22,True
7,Male,76.17,Brown,7,2.93,0,0,True
8,Male,72.54,Brown,8,3.62,0,0,False
9,Female,64.55,Brown,2,2.52,0,0,False


In [5]:
df["IsWerewolf"].mean()

0.25