## Port of Oakland
Ryan Campa

Neerja Doshi

Sooraj Subrahmannian

Jake Toffler

In [2]:
#import packages
import numpy as np
import pandas as pd
import math

from collections import Counter
from collections import OrderedDict

In [3]:
# Create a dictionary of the distribution
test_dict = {"<10": 0.08, "10-15": 0.27, "15-20": 0.10,
             "20-25":0.11,"25-30":0.15,"30-35":0.20,"35-37":0.07,
             "37+":0.02}
n = 4989

### Question 1

In [4]:
def trucksim(n,d):
    """
    This function takes a number, n, of trucks to simulate and a dictionary, d, of value-probability pairs and outputs
    a list of n different truck levels randomly simulated.  To do this, we generate a random number between 0 and 1
    then loop through each key-value pair, adding the probability to a counter.  Once the counter is greater than the 
    generated random number, we append the key (truck level) to the list to be output.
    """
    truck_levels = [] 
    for j in range(n): 
        cumulative_prob = 0
        random_num = np.random.uniform(0,1) 
        for (k,v) in d.items():
            cumulative_prob += v 
            if cumulative_prob > random_num: 
                truck_levels.append(k)
                break 
    return truck_levels

### Question 2

In [5]:
simlist = trucksim(1000,test_dict)

In [6]:
def truck1CI(alpha,keyname,n,simlist):
    """
    The function creates n bootstraps from the original simlist. The function calculates the percentage of each of 
    these n lists that are equal to the given keyname. It returns the mean, a/2 and 1-a/2 percentiles and the standard 
    deviation of the bootstrapped mean.
    """
    finallist = []
    deltalist = []
    for i in range(n):
        templist = np.random.choice(simlist,len(simlist))
        pos = Counter(templist)[keyname]
        finallist.append(1.0*pos/len(templist))
    samplemean = 1.0*Counter(simlist)[keyname]/len(simlist)
    stderror = np.std(finallist)/np.sqrt(n)    
    deltalist = [fill - samplemean for fill in finallist]
    deltalist = sorted(deltalist,reverse=False)
    ub = deltalist[int(alpha*n)]
    lb = deltalist[int((1-alpha)*n)]
    return pd.Series([samplemean,samplemean+lb,samplemean+ub,stderror],index = ['Est.Value','CI Lower Bound','CI Upper Bound','Standard Error'])


In [7]:
"""
Populate a table that shows the Confidence Level, Truck Fill Level, No: of Bootstraps, Estimated Value, 
CI Lower Bound, CI Upper Bound and Standard Error by calling the 'truck1CI' function. At 90% confidence, we compare
computed values for 30-35 and <10 truck fill levels by changing the number of bootstraps.
"""
df = pd.DataFrame([[1-0.1, '30-35', 100],[1-0.1, '30-35', 1000],[1-0.1, '30-35', 2500],
                   [1-0.1, '30-35', 5000],[1-0.1, '<10', 100],[1-0.1, '<10', 1000],
                   [1-0.1, '<10', 2500],[1-0.1, '<10', 5000]],
                  columns=['Confid.Level','Truck Fill Level','No of Bootstraps'])
df['Est.Value'],df['CI Lower Bound'],df['CI Upper Bound']= np.zeros(8),np.zeros(8),np.zeros(8)
df2= df.apply(lambda x:(truck1CI(x[0],x[1],x[2],simlist)),axis=1)
df['Est.Value'] = df2['Est.Value']
df['CI Lower Bound']= df2['CI Lower Bound']
df['CI Upper Bound']= df2['CI Upper Bound']
df['Standard Error']= df2['Standard Error']

In [8]:
df

Unnamed: 0,Confid.Level,Truck Fill Level,No of Bootstraps,Est.Value,CI Lower Bound,CI Upper Bound,Standard Error
0,0.9,30-35,100,0.208,0.194,0.226,0.001168
1,0.9,30-35,1000,0.208,0.191,0.224,0.0004
2,0.9,30-35,2500,0.208,0.192,0.225,0.00026
3,0.9,30-35,5000,0.208,0.192,0.225,0.000184
4,0.9,<10,100,0.077,0.065,0.089,0.000947
5,0.9,<10,1000,0.077,0.066,0.088,0.000266
6,0.9,<10,2500,0.077,0.066,0.088,0.000167
7,0.9,<10,5000,0.077,0.066,0.088,0.000118


### Question 3

In [9]:
simlist = trucksim(4989,test_dict)

In [10]:
"""
Populate a table to show the Confid.Level, Truck Fill Level, No: of Bootstraps, Estimated Value, CI Lower Bound,
CI Upper Bound and Standard Error by calling the 'truck1CI' function. This time around, we keep the number of 
bootstraps constant at 200 and compare the computed values for 35-37 and 37+  truck fill levels at 90% and 95% 
confidence.
"""
dfQ3 = pd.DataFrame([[1-0.05, '35-37', 200],[1-0.05, '37+', 200],[1-0.1, '35-37', 200],
                   [1-0.1, '37+', 200]],
                  columns=['Confid.Level','Truck Fill Level','No of Bootstraps'])
dfQ3['Est.Value'],dfQ3['CI Lower Bound'],dfQ3['CI Upper Bound']= np.zeros(4),np.zeros(4),np.zeros(4)
df2 = dfQ3.apply(lambda x:(truck1CI(x[0],x[1],x[2],simlist)),axis=1)
dfQ3['Est.Value'] = df2['Est.Value']
dfQ3['CI Lower Bound']= df2['CI Lower Bound']
dfQ3['CI Upper Bound']= df2['CI Upper Bound']
dfQ3['Standard Error']= df2['Standard Error']

In [11]:
dfQ3

Unnamed: 0,Confid.Level,Truck Fill Level,No of Bootstraps,Est.Value,CI Lower Bound,CI Upper Bound,Standard Error
0,0.95,35-37,200,0.067549,0.061936,0.073762,0.000251
1,0.95,37+,200,0.020044,0.017238,0.023652,0.000137
2,0.9,35-37,200,0.067549,0.063339,0.072159,0.000239
3,0.9,37+,200,0.020044,0.017639,0.022449,0.000136


### Question 4

In [12]:
"""
Create a simulated list of n (4,989) trucks and find the estimated probability that the number of trucks that
have between 35-37 feet filled is greater than or equal to 368
"""
pos = 0
for i in range(1000):
    result_list = trucksim(n, test_dict)
    if result_list.count('35-37') >= 368:
        pos = pos + 1
        
print('Estimated probability of 35-37ft >= 368:')
print(1.0*pos/1000)        

Estimated probability of 35-37ft >= 368:
0.16


In [13]:
"""
Create a simulated list of n (4,989) trucks and find the estimated probability that the number of trucks that
have 37+ feet filled is greater than or equal to 108.
"""
pos = 0
for i in range(1000):
    result_list = trucksim(n, test_dict)
    if result_list.count('37+') >= 108:
        pos = pos + 1
print('Estimated probability of 37+ >= 108:')
print(1.0*pos/1000)        

Estimated probability of 37+ >= 108:
0.201


### Question 5

In [14]:
"""
Perform a joint test of both hypothesis. Here we take a product (intersection) of 37+ truck fill level > 108 
and 35-37 truck fill level > 368.
"""
pos = 0
for i in range(1000):
    result_list = trucksim(n, test_dict)
    if result_list.count('37+') >= 108 and result_list.count('35-37') >= 368:
        pos = pos + 1

In [15]:
print('Estimated probability of 37+ >= 108 AND 35-37 >= 368:')
print(1.0*pos/1000)

Estimated probability of 37+ >= 108 AND 35-37 >= 368:
0.025
