In [1]:
import pandas as pd
import numpy as np
import random
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt  
import seaborn as sns  
%matplotlib inline

np.random.seed(42)

# **Applying Point Estimation**

Insurance company sampled the files from inherent defects department to come up with the following amounts (in thousands of SAR) of damages for 10 structural defect of buildings incidents:

130, 55, 600, 100, 80, 150, 44, 580, 62, 123

What is the estimate of the average amount of damage of structural defect in the buildings ?

In [2]:
inherent_defects_sample = np.array([130, 55, 600, 100, 80, 150, 44, 580, 62, 123])

x_bar = np.mean(inherent_defects_sample)
x_bar

192.4

**Insight** 

- The estimate of the average amount of damages of structural defect in the buildings is : **192,000 SAR**

- Usually, the point estimate of an unknown population parameter is the corresponding sample statistic.

# **Applying Confidence Interval**

#### 1.**Let's see how the confidence interval is constructed for the population mean when std dev is known**

The amount of water (in Liters) that an electric kettle can hold was examined for a random sample of 50 cups of water dispensed by a new kettle, the mean of the sample is found to be 1.5 L, it is known that the standard deviation from all the kettles of that manufacturer is 0.3 L, we will construct a 95% confidence interval for μ, the mean amount water for cups distributed by the new kettle.

In [3]:
from scipy.stats import norm

x_bar, sigma = 1.5, 0.3

n = 50

np.round(norm.interval(0.95, loc=x_bar, scale=sigma / np.sqrt(n)), 1)

array([1.4, 1.6])

**Insight**

95% of the time, the mean of amount for cups of water dispensed by the new kettle will be between 1.4 L and 1.6 L.

#### 2.**Let's see how the confidence interval is constructed for the population mean when std dev is unknown**

The amount of water (in Liters) that an electric kettle can hold was examined for a random sample of 50 cups of water dispensed by a new kettle, the mean of the sample is found to be 1.5 L, it is known that the standard deviation from sample of the kettles of that manufacturer is 0.8 L, we will construct a 95% confidence interval for μ, the mean amount water for cups distributed by the new kettle.

In [4]:
from scipy.stats import t

x_bar, s = 1.5, 0.8

n = 50
k = n - 1#degree of freedom

np.round(t.interval(0.95, df=k, loc=x_bar, scale=s / np.sqrt(n)), 1)

array([1.3, 1.7])

**Insight**

95% of the time, the mean of amount for cups of water dispensed by the new kettle will be between 1.3 L and 1.7 L.