# Complete guide

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



import trecs
from trecs.models import ContentFiltering, ImplicitMF
from trecs.random import Generator
from trecs.metrics import MSEMeasurement, RecSimilarity, InteractionSimilarity, RecallMeasurement
from trecs.components import Users
import trecs.matrix_ops as mo #Note, in order for the ideal recommender to result in non-insane results, the normalize_users parameter in mo.inner_product must be set to False

In [None]:
#https://web.ma.utexas.edu/users/mks/statmistakes/skeweddistributions.html

In [None]:
class IdealRecommender(ContentFiltering):
    """
    With the Ideal Recommender, we make the *strong assumption* that the true scores are provided
    to the recommender system through a custom scoring function, which always returns the true
    underlying user-item scores. Therefore, this class is pretty much an empty skeleton; the only
    modification is that we don't update any internal state of the recommender at each time step.
    
    Amy: Note to self, as a part of the baseline ContentFiltering class, if an actual_item_representation 
    """
    def __init__(self, *args, **kwargs):
        
        
        super().__init__(*args, **kwargs)
    

    def _update_internal_state(self, interactions):
        # do not change users_hat!
        pass


In [None]:
#np.random.seed(1)
plt.style.use('seaborn')
plt.rcParams.update({'font.size': 50})

In [2]:
# Keep the dimensions small for easy visualization
number_of_users = 5
number_of_attributes = 10
number_of_items = 15
# We define user_representation using the standard integer generator in Numpy.
# We assume a number of interactions with each attribute in the interval [0,4).

users_shape = (number_of_users, number_of_attributes)
actual_user_representation = Users(size=users_shape, num_users=number_of_users)
model_user_representation = np.random.randint(4, size=(number_of_users, number_of_attributes))

# We define item_representation using the Generator that comes with the framework
# We assume a binary matrix with a binomial distribution

actual_item_representation = Generator().binomial(
    n=1, p=.3, size=(number_of_attributes, number_of_items)
)

model_item_representation = Generator().binomial(
    n=1, p=.3, size=(number_of_attributes, number_of_items)
)

In [None]:
#Not positive this is correct

content = ContentFiltering(
    num_users=number_of_users,
    num_items=number_of_items,
    num_attributes=number_of_attributes,
    actual_user_representation=actual_user_representation,
    user_representation=model_user_representation, 
    actual_item_representation = actual_item_representation,
    item_representation=actual_item_representation, #model has the true item values
)
# add an MSE measurement
content.add_metrics(MSEMeasurement(), RecallMeasurement())
# Run for 5 time steps
content.run(timesteps=1)

In [None]:
ideal = IdealRecommender(
    num_users=number_of_users,
    num_items=number_of_items,
    num_attributes=number_of_attributes,
    actual_user_representation=actual_user_representation,
    user_representation=actual_user_representation.actual_user_profiles.value, 
    actual_item_representation = actual_item_representation,
    item_representation=actual_item_representation, #model has the true item values
    num_items_per_iter="all",
)
# add an MSE measurement
ideal.add_metrics(MSEMeasurement(), RecallMeasurement())
# Run for 5 time steps
ideal.run(timesteps=5)

In [31]:
mf = ImplicitMF(
    num_users=number_of_users,
    num_items=number_of_items,
    num_latent_factors=number_of_attributes,
    actual_user_representation=actual_user_representation,
    actual_item_representation = actual_item_representation,
    num_items_per_iter=3,
)
# add an MSE measurement
mf.add_metrics(MSEMeasurement())
# Run for 5 time steps
#mf.run(timesteps=5,train_between_steps=True)

In [32]:
mf.run(timesteps=1)

100%|██████████| 1/1 [00:00<00:00, 566.87it/s]


In [85]:
print(recommender.predicted_scores.value)
print(recommender.items_shown)

print(shown_item_scores)

print(recommender.predicted_scores.value[4])

[[-1.14191380e+00  2.05797979e+00 -2.90734031e-01 -3.21875378e-01
   1.64575314e+00 -2.55419443e-01  3.48110023e-01 -2.19369368e+00
  -3.20387278e-01 -3.97012475e-01 -1.68412571e+00 -4.07926262e-01
  -7.24148813e-02 -4.96943205e-01 -3.90776070e-01]
 [-1.35494813e+00 -5.62433259e-01 -4.31621749e-01 -2.16467514e+00
  -3.76389433e-01  1.03274101e+00 -3.82748331e-01  2.29881233e+00
  -4.87667695e-01 -2.24900883e+00  8.35125701e-01 -1.47733814e+00
  -1.67646907e+00 -1.03272766e+00 -4.66855513e-01]
 [-1.64125219e+00  5.47461891e-01 -9.38897719e-01  6.98278073e-01
   7.57604301e-01  1.44376519e-01  5.34723293e-01 -1.50289861e+00
   3.23485875e-02 -2.27994456e-01  3.09597978e-01 -1.01968001e+00
  -9.52203821e-01 -5.37110173e-01  1.19390273e+00]
 [ 7.08681698e-02  1.81433271e-01  4.06857147e-01 -1.82868970e-01
   1.22016358e+00 -8.34681900e-02  5.01537684e-01 -9.93660517e-01
  -8.90125184e-01  1.40563245e+00 -2.30809812e+00  8.38389488e-01
   1.05309238e+00 -3.38738287e-01  7.52203389e-04]
 [-1

In [103]:
#Testing recall stuff
k=2
shown_item_scores = np.take_along_axis(recommender.predicted_scores.value, recommender.items_shown, axis=1)

shown_item_ranks = np.argsort(shown_item_scores, axis=1)
#top_item_idxs = shown_item_ranks[:, recommender.items_shown.shape[1] - k:]
top_item_idxs = shown_item_ranks[:, -k:]

top_k_items = np.take_along_axis(recommender.items_shown, top_item_idxs, axis=1)

#np.take_along_axis(recommender.users.actual_user_scores.value, top_item_ids, axis=1)

print(recommender.items_shown)
print(shown_item_scores)
print(shown_item_ranks)
print(top_item_ids)

print(top_k_items)

[[ 1  4  6]
 [ 7  5 10]
 [14  4  3]
 [ 9  4 12]
 [ 2 14  1]]
[[2.05797979 1.64575314 0.34811002]
 [2.29881233 1.03274101 0.8351257 ]
 [1.19390273 0.7576043  0.69827807]
 [1.40563245 1.22016358 1.05309238]
 [1.08373781 0.91814404 0.84650509]]
[[2 1 0]
 [2 1 0]
 [2 1 0]
 [2 1 0]
 [2 1 0]]
[[1 0]
 [2 1]
 [2 1]
 [2 1]
 [0 2]]
[[ 4  1]
 [ 5  7]
 [ 4 14]
 [ 4  9]
 [14  2]]


In [75]:
interactions = recommender.interactions.reshape(recommender.num_users, 1)
print(interactions)
recall = len(np.where(interactions == top_k_items)[0]) / recommender.num_users
print(recall)

[[ 6]
 [ 7]
 [ 3]
 [12]
 [ 1]]
0.6


In [90]:
shown_item_rel = np.take_along_axis(recommender.predicted_scores.value, recommender.items_shown, axis=1)
#actual_rel = np.take_along_axis(recommender.users.actual_user_scores.value, recommender.items_shown, axis=1)


shown_item_ranks = np.argsort(shown_item_rel, axis=1)
print(recommender.predicted_scores.value)
print(recommender.items_shown)
print(shown_item_rel)
print(shown_item_ranks)
top_k_items = np.take_along_axis(recommender.items_shown, top_item_idxs, axis=1)
print(top_k_items)

print(recommender.predicted_scores.value[1])

[[-1.14191380e+00  2.05797979e+00 -2.90734031e-01 -3.21875378e-01
   1.64575314e+00 -2.55419443e-01  3.48110023e-01 -2.19369368e+00
  -3.20387278e-01 -3.97012475e-01 -1.68412571e+00 -4.07926262e-01
  -7.24148813e-02 -4.96943205e-01 -3.90776070e-01]
 [-1.35494813e+00 -5.62433259e-01 -4.31621749e-01 -2.16467514e+00
  -3.76389433e-01  1.03274101e+00 -3.82748331e-01  2.29881233e+00
  -4.87667695e-01 -2.24900883e+00  8.35125701e-01 -1.47733814e+00
  -1.67646907e+00 -1.03272766e+00 -4.66855513e-01]
 [-1.64125219e+00  5.47461891e-01 -9.38897719e-01  6.98278073e-01
   7.57604301e-01  1.44376519e-01  5.34723293e-01 -1.50289861e+00
   3.23485875e-02 -2.27994456e-01  3.09597978e-01 -1.01968001e+00
  -9.52203821e-01 -5.37110173e-01  1.19390273e+00]
 [ 7.08681698e-02  1.81433271e-01  4.06857147e-01 -1.82868970e-01
   1.22016358e+00 -8.34681900e-02  5.01537684e-01 -9.93660517e-01
  -8.90125184e-01  1.40563245e+00 -2.30809812e+00  8.38389488e-01
   1.05309238e+00 -3.38738287e-01  7.52203389e-04]
 [-1

In [10]:
print(shown_item_rel[0])
print(shown_item_ranks)
#print(shown_item_ranks+2)

k=7
recommender=mf

shown_item_rel = np.take_along_axis(recommender.predicted_scores.value, recommender.items_shown, axis=1)
#actual_rel = np.take_along_axis(recommender.users.actual_user_scores.value, recommender.items_shown, axis=1)
shown_item_ranks = np.argsort(shown_item_rel, axis=1)
dcg = np.sum(shown_item_rel / np.log2(shown_item_ranks+2), axis=1)


print(dcg)

ideal_ranks=np.tile(np.arange(0,15),(recommender.num_users, recommender.num_items))

NameError: name 'shown_item_rel' is not defined

In [112]:
np.tile(np.arange(0,recommender.items_shown.shape[1]),(recommender.num_users, 1))

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

In [108]:
ideal_rank = np.argsort(recommender.users.actual_user_scores.value)
ideal_item_idxs = ideal_rank[:,-3:]
ideal_item_rel = np.take_along_axis(recommender.users.actual_user_scores.value, ideal_item_idxs, axis=1)

ideal_ranks=np.tile(np.arange(0,recommender.items_shown.shape[1]),(recommender.num_users, 1))

idcg = 




print(recommender.users.actual_user_scores.value)
print(ideal_rank)
print(ideal_item_rel)
print(ideal_ranks)

[[ 0.20081493 -0.62669818 -0.13549283  0.31272886  0.         -0.43569558
   0.24622815 -0.37755385 -1.13868181  0.24763184 -0.31587262 -0.50090452
  -0.51111674 -0.06713889 -0.43376564]
 [-0.31312586  0.06671706  0.11743417 -1.04139937  0.         -0.13851063
  -0.31368262  0.57135809 -0.78673142 -0.32282974 -0.2721681  -1.09411609
   0.17205193 -0.43886514 -0.65525096]
 [-0.67811974 -1.04852726  0.01136625  0.32997882  0.         -0.17514042
   0.48465576 -0.15774528  0.18974285 -0.05955792 -0.29364017  0.12104795
   0.15581012 -0.00600078  0.12704873]
 [ 1.14875318  0.35770991  0.25861001 -0.07594744  0.         -0.11691005
   0.51411     0.62815202 -0.11672326  0.5481165   1.00268376  0.16662842
   0.77158909  0.39871142 -0.23208299]
 [-0.15477756  0.23233256  0.11958448  0.05969266  0.         -0.28521795
   0.31953653 -0.50131822 -0.04174253 -0.15995709 -0.44951751 -0.68637632
  -1.07559972  0.29270762 -0.97908394]]
[[ 8  1 12 11  5 14  7 10  2 13  4  0  6  9  3]
 [11  3  8 14 13

In [43]:
actual_item_ranks[:, recommender.num_items-recommender.items_shown.shape[1]:]

array([[ 6,  9,  3],
       [ 2, 12,  7],
       [ 8,  3,  6],
       [12, 10,  0],
       [ 1, 13,  6]])

In [51]:
recommender.users.actual_user_scores.value

array([[ 0.20081493, -0.62669818, -0.13549283,  0.31272886,  0.        ,
        -0.43569558,  0.24622815, -0.37755385, -1.13868181,  0.24763184,
        -0.31587262, -0.50090452, -0.51111674, -0.06713889, -0.43376564],
       [-0.31312586,  0.06671706,  0.11743417, -1.04139937,  0.        ,
        -0.13851063, -0.31368262,  0.57135809, -0.78673142, -0.32282974,
        -0.2721681 , -1.09411609,  0.17205193, -0.43886514, -0.65525096],
       [-0.67811974, -1.04852726,  0.01136625,  0.32997882,  0.        ,
        -0.17514042,  0.48465576, -0.15774528,  0.18974285, -0.05955792,
        -0.29364017,  0.12104795,  0.15581012, -0.00600078,  0.12704873],
       [ 1.14875318,  0.35770991,  0.25861001, -0.07594744,  0.        ,
        -0.11691005,  0.51411   ,  0.62815202, -0.11672326,  0.5481165 ,
         1.00268376,  0.16662842,  0.77158909,  0.39871142, -0.23208299],
       [-0.15477756,  0.23233256,  0.11958448,  0.05969266,  0.        ,
        -0.28521795,  0.31953653, -0.50131822, 

In [54]:
k=3
recommender=mf


actual_item_ranks = np.argsort(recommender.users.actual_user_scores.value, axis=1)
#Take the number of users' actual top items according to the number recommended per iteration
#actual_top_k_items = np.take(recommender.items_shown, shown_item_ranks[:, recommender.num_items-recommender.items_shown.shape[1]:])

#actual_top_k_rel = np.take(recommender.users.actual_user_scores.value, actual_item_ranks[:, recommender.num_items-recommender.items_shown.shape[1]:])
top_item_ids = actual_item_ranks[:, recommender.num_items-recommender.items_shown.shape[1]:]
top_item_rel = np.take_along_axis(recommender.users.actual_user_scores.value, top_item_ids, axis=1)


# print(recommender.users.actual_user_scores.value[0])
# print(recommender.users.actual_user_scores.value[1])
# print(recommender.users.actual_user_scores.value[2])

# print(actual_item_ranks[0])
# print(actual_item_ranks[1])
# print(actual_item_ranks[2])

# print(top_item_ids)

# #print(actual_item_ranks[0:2])
# print(actual_item_ranks[0:2, recommender.num_items-recommender.items_shown.shape[1]:])

# print(actual_top_k_rel)

[ 0.20081493 -0.62669818 -0.13549283  0.31272886  0.         -0.43569558
  0.24622815 -0.37755385 -1.13868181  0.24763184 -0.31587262 -0.50090452
 -0.51111674 -0.06713889 -0.43376564]
[-0.31312586  0.06671706  0.11743417 -1.04139937  0.         -0.13851063
 -0.31368262  0.57135809 -0.78673142 -0.32282974 -0.2721681  -1.09411609
  0.17205193 -0.43886514 -0.65525096]
[-0.67811974 -1.04852726  0.01136625  0.32997882  0.         -0.17514042
  0.48465576 -0.15774528  0.18974285 -0.05955792 -0.29364017  0.12104795
  0.15581012 -0.00600078  0.12704873]
[ 8  1 12 11  5 14  7 10  2 13  4  0  6  9  3]
[11  3  8 14 13  9  6  0 10  5  4  1  2 12  7]
[ 1  0 10  5  7  9 13  4  2 11 14 12  8  3  6]
[[ 6  9  3]
 [ 2 12  7]
 [ 8  3  6]
 [12 10  0]
 [ 1 13  6]]


In [80]:
ideal_ranks=np.tile(np.arange(0,15),(recommender.num_users, recommender.num_items))

array([[ 0,  1,  2, ..., 12, 13, 14],
       [ 0,  1,  2, ..., 12, 13, 14],
       [ 0,  1,  2, ..., 12, 13, 14],
       [ 0,  1,  2, ..., 12, 13, 14],
       [ 0,  1,  2, ..., 12, 13, 14]])

In [None]:
np.log

In [62]:
np.take_along_axis(recommender.users.actual_user_scores.value, top_item_ids, axis=1)

array([[0.24622815, 0.24763184, 0.31272886],
       [0.11743417, 0.17205193, 0.57135809],
       [0.18974285, 0.32997882, 0.48465576],
       [0.77158909, 1.00268376, 1.14875318],
       [0.23233256, 0.29270762, 0.31953653]])

In [59]:
top_item_ids

array([[ 6,  9,  3],
       [ 2, 12,  7],
       [ 8,  3,  6],
       [12, 10,  0],
       [ 1, 13,  6]])

In [None]:
print(mf.users.actual_user_scores.shape)
print(mf.predicted_user_item_scores.shape)

actual = np.reshape(mf.users.actual_user_scores.value, (number_of_users*number_of_items, 1))
predicted = np.reshape(mf.predicted_user_item_scores, (number_of_users*number_of_items, 1))
print(test.shape)

plt.hist(actual, alpha=0.7)
plt.hist(predicted, alpha=0.7)#predicted scores are more spread out, which kind of makes sense

In [None]:
abs_error = abs(actual-predicted)

plt.hist(abs_error)

In [None]:
# Collect measurements about the simulation
results = mf.get_measurements()

print("Results of the simulation:")
pd.DataFrame(results)

In [None]:
# Collect measurements about the simulation
results = ideal.get_measurements()

print("Results of the simulation:")
pd.DataFrame(results)

In [None]:
results = content.get_measurements()

print("Results of the simulation:")
pd.DataFrame(results)

In [None]:
#generate a distribution that hides a subpopulation
number_of_attributes = 10
number_of_maj_users = 150
number_of_min_users = 50

maj_user_representation = np.random.normal(1, 2, size=(number_of_maj_users, number_of_attributes))
min_user_representation = np.random.normal(0.5, 1.25, size=(number_of_min_users, number_of_attributes))
actual_user_representation = np.vstack((maj_user_representation, min_user_representation))
split_indices=number_of_maj_users

In [None]:
#If plotted without respect to the subgroups, preference means look more or less normally distributed
plt.hist(actual_user_representation.mean(axis=1))



In [None]:
#when plotting out mean preferences when accounting for group, we can see a clear distinction in preference
plt.hist(maj_user_representation.mean(axis=1), alpha=.7, color='b')
plt.hist(min_user_representation.mean(axis=1), alpha=0.7, color='r')



In [None]:
filtering = ContentFiltering(actual_user_representation=actual_user_representation, 
                             num_attributes=number_of_attributes,
                             num_items=500)


mse = MSEMeasurement(diagnostics=True)
recall=RecallMeasurement()

filtering.add_metrics(mse, recall)

filtering.startup_and_train(50)
filtering.run(450)

In [None]:
mse_diagnostics = filtering.metrics[0].get_diagnostics()
mse_beginning = mse_diagnostics.loc[50:, :]
mse_beginning.head()



In [None]:
def mse_histogram(model, split_indices=None):
    metric = (
                model.predicted_scores.value.mean(axis=1)- model.users.actual_user_scores.value.mean(axis=1))** 2
    
    colors = ["blue", "orange", "red", "yellow", "green"]
    if split_indices is not None and len(split_indices) > 0:
        splits = [0] + split_indices + [metric.size]
        for i in range(len(splits) - 1):
            values = metric[splits[i] : splits[i + 1]]
            plt.hist(values, alpha=0.7, color=colors[i])
    else:
        plt.hist(metric, bins="auto")
        plt.ylabel("observation count (total n={})".format(metric.size))
        plt.xlabel("mean sqaured error")

In [None]:
mf.startup_and_train(50)


In [None]:
mse_histogram(mf)

In [None]:
mf.run(50)
mf.train()
mse_histogram(mf)

In [None]:
mf.run(50)
mf.train()
mse_histogram(mf)

In [None]:
mf.run(50)
mf.train()
mse_histogram(mf)

In [None]:
content.run(50)
mse_histogram(content)

In [None]:
content.run(50)
mse_histogram(content)

In [None]:
print(in_k)
print(not_in_k)
print(len(model.interactions))
k

In [None]:

np.concatenate((np.ones(len(in_k)), np.zeros(len(not_in_k))), axis=None)

In [None]:
model=mf
k=5

#split_indices = number_of_maj_users

colors = ["blue", "orange", "red", "yellow", "green"]

shown_item_scores = np.take(model.predicted_scores.value, model.items_shown)
shown_item_ranks = np.argsort(shown_item_scores, axis=1)
top_k_items = np.take(model.items_shown, shown_item_ranks[:, k :])
in_k = (np.where(np.isin(model.interactions, top_k_items))[0])
not_in_k = (np.where(~np.isin(model.interactions, top_k_items))[0])
metric = np.concatenate((np.ones(len(in_k)), np.zeros(len(not_in_k))), axis=None)

plt.hist(metric)

##Amy, implement this pie chart for recall at k
# # Pie chart, where the slices will be ordered and plotted counter-clockwise:
# labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
# sizes = [15, 30, 45, 10]
# explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

# fig1, ax1 = plt.subplots()
# ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
#         shadow=True, startangle=90)
# ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

# plt.show()

# maj_population_outcomes = metric[:split_indices]
# min_population_outcomes = metric[split_indices:]

# plt.hist(maj_population_outcomes, color=colors[0])
# plt.hist(min_population_outcomes, color=colors[1])



# if split_indices is not None:
#     splits = [0] + split_indices + [metric.size]
#     for i in range(len(splits) - 1):
#         values = metric[splits[i] : splits[i + 1]]
#         plt.hist(values, alpha=0.7, color=colors[i])

# plt.hist(metric, bins="auto")
# plt.ylabel("observation count (total n={})".format(metric.size))
# plt.xlabel("recall at k")


# 
#     if split_indices is not None and len(split_indices) > 0:
#         splits = [0] + split_indices + [metric.size]
#         for i in range(len(splits) - 1):
#             values = metric[splits[i] : splits[i + 1]]
#             plt.hist(values, alpha=0.7, color=colors[i])
#     else:
#         plt.hist(metric, bins="auto")
#         plt.ylabel("observation count (total n={})".format(metric.size))
#         plt.xlabel("mean sqaured error")

In [None]:
plt.hist(min_population_outcomes)

In [None]:
len(min_population_outcomes)

In [None]:
metric_histogram(filtering)

In [None]:
metric_histogram(filtering, [number_of_maj_users])

In [None]:
#generate a bimodal distribution
N=500
mu, sigma = 1.845, 1
mu2, sigma2 = 5.845, 1
X1 = np.random.normal(mu, sigma, N)
X2 = np.random.normal(mu2, sigma2, N)
X_bimodal = np.concatenate([X1, X2])

In [None]:
# print majority / minority outcome stats
def majority_minority_outcomes(metric, split_index):
    split_indices = [split_index]

        
    maj_mean = metric.last_observation[:split_index].mean()
    maj_std = metric.last_observation[:split_index].std()

    min_mean = metric.last_observation[split_index:].mean()
    min_std = metric.last_observation[split_index:].std()

    print("Majority group statistics: ", maj_mean, "(mean), ", maj_std, "(std)")
    print("Minority group statistics: ", min_mean, "(mean), ", min_std, "(std)")
    print()
    
    metric.hist(split_indices)

In what follows, we expand on this minimal example to gain a deeper understanding of what happens under the hood.

In [None]:
filtering = ContentFiltering(actual_user_representation=actual_user_representation, 
                             num_attributes=number_of_attributes,
                             num_items=500)


mse = MSEMeasurement(diagnostics=True)
filtering.add_metrics(mse)

filtering.startup_and_train(50)
majority_minority_outcomes(mse, number_of_maj_users)


In [None]:
filtering.run(450)
majority_minority_outcomes(mse, number_of_maj_users)

In [None]:
bimodal = plt.hist(X_bimodal, bins=30)
plt.xlabel('Dependent Variable Value')
plt.ylabel('Number of Observations')
plt.title('Bimodal Distribution')
plt.show()

In [None]:
print(np.mean(X_bimodal))
print(np.std(X_bimodal))

In [None]:
N=1000
mu, sigma = 14.99, 4
X1 = np.random.normal(mu, sigma, N)
X_skew = np.log2(X1)

skew = plt.hist(X_skew, bins=30)
plt.xlabel('Dependent Variable Value')
plt.ylabel('Number of Observations')
plt.title('Skewed Distribution')
plt.show()

In [None]:
N=1000
mu, sigma = 3.85, 2.2
normal_dist = np.random.normal(mu, sigma, N)

skew = plt.hist(normal_dist, bins=30)
plt.xlabel('Dependent Variable Value')
plt.ylabel('Number of Observations')
plt.title('Normal Distribution')
plt.show()

In [None]:
#How to characterize power, type 1 vs type 2 errors 