Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanweed committed Nov 1, 2023
1 parent 32e924d commit db11866
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 251 deletions.
198 changes: 98 additions & 100 deletions 04.03-estimation.html

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
216 changes: 94 additions & 122 deletions _sources/04.03-estimation.ipynb

Large diffs are not rendered by default.

91 changes: 63 additions & 28 deletions reports/04.03-estimation.err.log
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,68 @@ nbclient.exceptions.CellExecutionError: An error occurred while executing the fo
import statistics
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt

# generate data from 10000 "IQ" studies, where each study consists of two scores
n = 2
sample_sds = []


ns = range(2,11)


averageSampleSds = []
averageSampleMeans = []

# Simulate data for N = 2 to 10
for n in ns:
sample_sds = []
sample_means = []
for i in range(1,10000):
sample_sd = statistics.stdev(np.random.normal(loc=100,scale=15,size=n).astype(int))
sample_sds.append(sample_sd)
sample_mean = statistics.mean(np.random.normal(loc=100,scale=15,size=n).astype(int))
sample_means.append(sample_mean)
averageSampleSds.append(statistics.mean(sample_sds))
averageSampleMeans.append(statistics.mean(sample_means))

# Simulate data for N = 1. This is not possible in the loop above, because Python can't calculate a SD
# from only one observation
n = 1
sample_mean_1 = []
for i in range(1,10000):
sample_sd = statistics.stdev(np.random.normal(loc=100,scale=15,size=n).astype(int))
sample_sds.append(sample_sd)
sample_mean = statistics.mean(np.random.normal(loc=100,scale=15,size=n).astype(int))
sample_mean_1.append(sample_mean)

# Add in sample mean and SD for N=1 at the beginning of the lists
# For N = 1, the sample SD is simply 0
averageSampleSds.insert(0,0)
averageSampleMeans.insert(0,statistics.mean(sample_mean_1))

# Collect simulated data in a dataframe, together with a vector from 1 to 10 representing N
df = pd.DataFrame(
{'N': range(1,11),
'SampleMeans': averageSampleMeans,
'SampleSDs': averageSampleSds
})

# Plot the data
fig, axes = plt.subplots(1, 2, figsize=(15, 5), sharey=False)
fig.suptitle('Simulated IQ Data')

# plot a histogram of the distribution of sample standard deviations, together with dashed line indicating
# population standard deviation
#fig, ax = plt.subplots()
ax=sns.histplot(sample_sds, binwidth=4)
plt.axvline(15, color = 'black', linestyle = "dashed")
# Format the figure
sns.lineplot(data=df, x='N', y='SampleMeans',ax=axes[0], linestyle = "dashdot")
sns.lineplot(data=df, x='N', y='SampleSDs',ax=axes[1], linestyle = "dashdot")
axes[0].set(ylim=(0,120))
axes[1].set(ylim=(0,17))
axes[0].axhline(100, color = 'black', linestyle = "dashed")
axes[1].axhline(15, color = 'black', linestyle = "dashed")
axes[0].set_title("Sample Means")
axes[1].set_title("Sample Standard Deviations")
axes[0].spines[['top', 'right']].set_visible(False)
axes[1].spines[['top', 'right']].set_visible(False)

ax.set(yticklabels=[])
ax.set(ylabel=None)
ax.set(xlabel='Sample Standard Deviation')
ax.set_title("Population Standard Deviation")
ax.tick_params(left=False)
ax.spines[['top', 'right']].set_visible(False)
ax.tick_params(axis='both',
which='both',
left=False,
right=False)
labels = ['A', 'B']
for s, ax in enumerate(axes):
axes[s].text(-0.1, 1, labels[s], transform=axes[s].transAxes,fontsize=16, fontweight='bold', va='top', ha='right')



Expand All @@ -54,14 +91,12 @@ ax.tick_params(axis='both',

---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[15], line 9
 7 sample_sds = []
 8 for i in range(1,10000):
----> 9 sample_sd = statistics.stdev(np.random.normal(loc=100,scale=15,size=n).astype(int))
 10 sample_sds.append(sample_sd)
 13 # plot a histogram of the distribution of sample standard deviations, together with dashed line indicating 
 14 # population standard deviation
 15 #fig, ax = plt.subplots()
Cell In[16], line 20
 18 sample_means = []
 19 for i in range(1,10000):
---> 20 sample_sd = statistics.stdev(np.random.normal(loc=100,scale=15,size=n).astype(int))
 21 sample_sds.append(sample_sd)
 22 sample_mean = statistics.mean(np.random.normal(loc=100,scale=15,size=n).astype(int))

File ~/opt/miniconda3/envs/pythonbook3/lib/python3.11/statistics.py:922, in stdev(data, xbar)
 920 if issubclass(T, Decimal):
Expand Down
2 changes: 1 addition & 1 deletion searchindex.js

Large diffs are not rendered by default.

0 comments on commit db11866

Please sign in to comment.