# Lecture 13: Estimation

In [None]:
#!pip install datascience

from datascience import *
import numpy as np

import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
%matplotlib inline

## Estimating enemy planes

In [None]:
N = 300
serialno = Table().with_column('Serial number', np.arange(N)+1)
serialno

In [None]:
serialno.sample(30).column(0).max()

### Interlude:  appending to arrays

In [None]:
a = make_array(1, 2, 3, 4, 5)
a

In [None]:
np.append(a, 42)

In [None]:
a

In [None]:
a = np.append(a, 42)
a

What is the result of this code?

```
x = make_array(100, 42, 7)
x = np.append(x, 0)
x = np.append(x, 1)
x
```

A. `array([100, 42, 7])`  
B. `array([1, 0, 100, 42, 7, 0, 1])`  
C. `array([100, 42, 7, 0, 1])`  
D. I don't know  
E. none of the above  

In [None]:
x = make_array(100, 42, 7)
x = np.append(x, 0)
x = np.append(x, 1)
x

### Simulation of estimation with max

In [None]:
sample_size = 10
repetitions = 750

maxes = make_array()

for _ in np.arange(repetitions):
    sampled_numbers = serialno.sample(sample_size).column(0)
    statistic = sampled_numbers.max()
    maxes = np.append(maxes, statistic)

estimates = Table().with_column('Sample Max', maxes)
estimates

In [None]:
estimates.hist(bins=np.arange(1, N+100, 10), unit='serial number')

What would happen if we changed sample size to 10 instead of 30?

A. The histogram would spread out, with more area to the left of 300.  
B. The histogram would spread out, with more area to the right of 300.  
C. The histogram would compress, with more area concentrated around 300.  
D. I don't know  
E. None of the above  

### Simulation of estimate with average

In [None]:
2*np.mean(np.arange(1, 300+1))

In [None]:
sample_size = 30
repetitions = 750

double_averages = make_array()

for _ in np.arange(repetitions):
    sampled_numbers = serialno.sample(sample_size).column(0)
    statistic = 2 * sampled_numbers.mean()
    double_averages = np.append(double_averages, statistic)

estimates = Table().with_columns(
    '2 * average', double_averages
)

estimates

In [None]:
estimates.hist(bins=np.arange(1, N+100, 10), unit='serial number')

### Comparison of statistics

In [None]:
sample_size = 30
repetitions = 750

maxes = make_array()
double_averages = make_array()

for _ in np.arange(repetitions):
    sampled_numbers = serialno.sample(sample_size).column(0)
    maxes = np.append(maxes, sampled_numbers.max())  
    double_averages = np.append(double_averages, 2 * sampled_numbers.mean())

estimates = Table().with_columns(
    'Max', maxes,
    '2 * average', double_averages
)

estimates

In [None]:
estimates.hist(bins=np.arange(1, N+100, 10), unit='serial number')

**Discussion question:** which estimate would be better for the Allies?