# [Python Reference Link](http://www.data8.org/sp20/python-reference.html)
*Run the cell below so that we can set our modules up*# Importing our modules

In [None]:
import numpy as np
from datascience import *
import math as m

# These lines do some fancy plotting magic.
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import warnings
warnings.simplefilter('ignore', FutureWarning)

In [None]:
births = Table.read_table('baby.csv').drop(5)
births

# Mean vs. Median

#### Calculating the mean and the median

In [None]:
sample_data = make_array(2,3,3,9)
print('sample data:',sample_data)
print('mean/average:',np.mean(sample_data))
print('median:',np.median(sample_data))

#### Visualizing the concept of the mean acting as the 'Center of Gravity

In [None]:
Table().with_column('Sample Data', sample_data).hist(bins=np.arange(0,10,1))
plt.ylim(-0.04,0.5)
plt.plot([0,10],[0,0],color='grey',lw=2)
plt.scatter(np.mean(sample_data),-0.015,marker='^',color='red',s=100)
plt.title('Average as a "Center of Gravity"')

#### A histogram of a Symmetric Distribution (more specifically, a Normal Distribution; Bell-Shaped Curve)

In [None]:
weight_bins = np.arange(50,180,10)
births.hist("Birth Weight",bins=weight_bins)

#### A Histogram of a Right-Skewed Distribution

In [None]:
input_values = np.arange(0,15,0.1)
exponential_curve = 0.25*m.e**(0.25*input_values)

r_skew_bins = np.arange(0,12,1)
Table().with_columns('values',exponential_curve).hist(bins=r_skew_bins)

#### A Histogram of a Left-Skewed Distribution

In [None]:
reverse_exponential = 130 - exponential_curve

l_skew_bins = np.arange(118,130,1)
Table().with_columns('others',reverse_exponential).hist(bins = l_skew_bins)

## Let's look at the relationship between the Mean and the Median between these different distribution types

In [None]:
births.hist("Birth Weight",bins=weight_bins)
plt.scatter(np.mean(births.column('Birth Weight')), 0, color='red', zorder=2, s=50, label="Mean")
plt.scatter(np.median(births.column('Birth Weight')), 0, color='blue', zorder=2, s=50, label="Median")
plt.legend()
plt.ylim(-0.001, 0.03); 

In [None]:
Table().with_columns('values',exponential_curve).hist(bins=r_skew_bins)
plt.scatter(np.mean(exponential_curve), 0, color='red', zorder=2, s=50, label="Mean")
plt.scatter(np.median(exponential_curve), 0, color='blue', zorder=2, s=50, label="Median")
plt.legend()
plt.ylim(-0.005, 0.37);

In [None]:
Table().with_columns('others',reverse_exponential).hist(bins = l_skew_bins)
plt.scatter(np.mean(reverse_exponential), 0, color='red', zorder=2, s=50, label="Mean")
plt.scatter(np.median(reverse_exponential), 0, color='blue', zorder=2, s=50, label="Median")
plt.legend()
plt.ylim(-0.005, 0.37);

# Quantifying Variability i.e. Standard Deviation

In [None]:
#since we are computing a formula, we will use some methods from the math module
import math

In [None]:
birth_weights = births.column('Birth Weight')
print('mean:',np.mean(birth_weights))
print('birth weights:',birth_weights)

### Root Mean Square of "***Deviations from Average***"
We first want to measure the deviations from average (i.e. look at how far away each data point is from the mean/average)

In [None]:
deviations = ...
deviations_table = Table().with_columns('Birth Weights',birth_weights,
                                       'Average',np.ones(len(birth_weights))*np.mean(birth_weights),
                                       'Deviations', deviations)
deviations_table.show()
print('Here is the array of deviations:')
deviations

### Root Mean "***Square***" of Deviations from Average
An effort to turn all the values positive by squaring each deviation from average

In [None]:
square_deviations = ...
square_deviations

### Root "***Mean***" Square of Deviations from Average
Now we compute the mean/average of the square deviations from above

In [None]:
mean_square_deviations = ...
mean_square_deviations

### "***Root***" Mean Square of Deviations from Average
Since we squared the deviations above, we are going to undo that by taking the square root. 

In [None]:
root_mean_square_deviations = ...
root_mean_square_deviations

In [None]:
np.std(birth_weights)

# Looking at the relationship between Standard Deviation and Histograms

In [None]:
for each_label in births.labels:
    
    the_mean = np.mean(births.column(each_label))
    the_standard_deviation = np.std(births.column(each_label))
    title_string = ('Mean: ({mean}) & Standard Deviation: ({std})'
                    .format(mean = the_mean, std = the_standard_deviation))
    
    births.hist(each_label)
    
    
    plt.scatter(the_mean,-0.00015,color='blue',s=100)
    plt.scatter(the_mean - the_standard_deviation
                ,-0.00015,marker = '^',color='red',s=100)
    plt.scatter(the_mean + the_standard_deviation
                ,-0.00015,marker = '^',color='red',s=100)
    plt.title(title_string)