In [None]:
# !pip3 install torch torchvision torchaudio
# !pip3 install pyro-ppl 

In [None]:
import torch
import pandas as pd
import pyro
from pyro.distributions import Bernoulli, Categorical, Normal

# Save all csv files to your google drive
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


The seed is set to a different value than the training data set in order to try and avoid duplicating data.

In [None]:
# set pyro seed for reproducibility
pyro.set_rng_seed(100)

In [None]:
houses = ["Gryffindor", "Hufflepuff", "Slytherin", "Ravenclaw"]
personalities = ["Type A", "Type B", "Type C", "Type D"]
herbology = ["Weak", "Strong"]

def sample_house():
    """Samples the 4 Hogwarts houses with equal probability and returns one house.
    The possible houses are: Gryffindor, Hufflepuff, Slytherin and Ravenclaw.
    For example, a house_idx of 2 corresponds to Slytherin.

    Args: None.

    Returns:
        house_idx: pytorch tensor where the value represents the index corresponding 
                  to a Hogwarts houses.
                  shape: (N)
                          N: number of data points.
    """
    house_idx = Categorical(probs=torch.ones(4)).sample()
    return house_idx


def sample_personality(house_idx):
    """Samples the 4 personality types with the probability for the given house.

    Args:
        house_idx: pytorch tensor where the value represents the index corresponding 
                  to a Hogwarts houses.
                  shape: (N)
                          N: number of data points.

    Returns:
        personality_idx: pytorch tensor where the value represents the index 
                  corresponding to the four personality types A, B, C or D.
                  shape: (N, num_personality_types)
                          N: number of data points.
    """
    probs = torch.tensor([
             [.39, .39, .18, .04], # Gryffindor
             [.05, .05, .40, .50], # Hufflepuff
             [.39, .39, .18, .04], # Slytherin
             [.25, .25, .40, .10], # Ravenclaw
    ])
    prob = probs[house_idx]
    personality_idx = Categorical(probs=torch.tensor(prob)).sample()
    return personality_idx

def sample_herbology(house_idx):
    """Samples the studednt's herbology score with the probability for the given house.

    Args:
        house_idx: pytorch tensor where the value represents the index corresponding 
                  to a Hogwarts houses.
                  shape: (N)
                          N: number of data points.

    Returns:
        herbology_idx: pytorch tensor where the value represents the index corresponding 
                  to either a strong or weak herbology score.
                  shape: (N)
                          N: number of data points.
    """
    probs = torch.tensor([.4, .9, .6, .8])
    prob = probs[house_idx]
    herbology_idx = Bernoulli(probs=torch.tensor(prob)).sample()
    return herbology_idx

def sample_quidditch_rank(house_idx):
    """Samples the studednt's quidditch rank with the probability for the given house.

    Args:
        house_idx: pytorch tensor where the value represents the index corresponding 
                  to a Hogwarts houses.
                  shape: (N)
                          N: number of data points.

    Returns:
        score: pytorch tensor where the value represents the quidditch score.
                  shape: (N)
                          N: number of data points.
    """
    mu = torch.tensor([580.0, 460.0, 300.0, 500.0])
    sigma = torch.tensor([mu[0]**.5, mu[1]**.5, 2.0*mu[2]**.5, 2.0*mu[3]**.5])
    score = Normal(mu[house_idx], sigma[house_idx]).sample()
    return score

In [None]:
num_data_points = 10000
houses = [sample_house() for i in range(num_data_points)]

sim_data_dict = {
    'house': [house.item() for house in houses],
    'personality': torch.tensor(
        [sample_personality(house) for house in houses]),
    'herbology': torch.tensor(
        [sample_herbology(house) for house in houses]),
    'quidditch rank': torch.tensor(
        [sample_quidditch_rank(house) for house in houses])
}

personality_data = sim_data_dict['personality']
herbology_data = sim_data_dict['herbology']
quidditch_rank_data = sim_data_dict['quidditch rank']

sim_data = pd.DataFrame(sim_data_dict)

Note: the out_filepath should correspond to the location in your Google drive that you would like to save the csv file.

In [None]:
# uncomment to overwrite current csv file
# make supervised data set and write to csv file

# out_filepath = "/drive/My Drive/CS7290_Hogwarts_Project/test_set.csv"
# sim_data.to_csv(out_filepath, index=False)

The next data set is used to investigate the behavior of the Neural Net before and after training on completely new data.

In [None]:
num_data_points = 5
houses = [sample_house() for i in range(num_data_points)]

sim_data_dict = {
    'house': [house.item() for house in houses],
    'personality': torch.tensor(
        [sample_personality(house) for house in houses]),
    'herbology': torch.tensor(
        [sample_herbology(house) for house in houses]),
    'quidditch rank': torch.tensor(
        [sample_quidditch_rank(house) for house in houses])
}

personality_data = sim_data_dict['personality']
herbology_data = sim_data_dict['herbology']
quidditch_rank_data = sim_data_dict['quidditch rank']

sim_data = pd.DataFrame(sim_data_dict)
print(sim_data)