In [5]:
import numpy as np
import math
import matplotlib.pyplot as plt

In [6]:
def distinct_values(n, N):
    return N*(1 - (1-1/N)**n)

def approx(n, N):
    #Function that approximates the expected number of distinct values if N>>1, which allows N*(1 - (1-1/N)**n) to be 
    #rewritten as N*(1 - exp(-n/N)). This approximated value is compared to the 'real' value for different N values.
    return N*(1 - np.exp(-n/N))

def testing(n, N, k, show_results):
    #Function that tests if the expected number of distinct values corresponds to the experiments
    #'n' = number of random integers between 1 and N
    #'k' = nubmer of times the experiment is run
    #'show_results' = if True=> array that shows the experiment is shown
    n_value = n
    N_value = N
    N_range = np.arange(1, N_value, 1)

    for l in range(0, k):
        nr = 0 #number of unique values
        repeat = 0
        zeros = np.zeros(N_value)

        for i in range(0, n_value):
            integer = np.random.randint(1, N_value+1)
            if integer in zeros: 
                repeat+=1
            else:
                zeros[integer-1]=integer
                nr+=1
        if show_results==True:
            print('-------------------------------------------------------------------------')
            print('Index of experiment:', l)
            #print('Last random integer:', integer)        
            print('Generated integers:', zeros)
            print('Number of distinct values:', nr)
            print('Number of repeated values:', repeat)
        plt.scatter(l, nr)
    
    nr_expected = 0
    nr_approx = 0
    for i in range(1, n_value+1):
        nr_expected = distinct_values(i, N_value)
        nr_approx = approx(i, N_value)
    print('Expected number of distinct values:', nr_expected)
    print('Approx expected number of distinct values:', nr_approx) #prints the expected number of distinct values based
                                                                   # on the approx(n, N) function 
    print('Difference = ', (nr_expected - nr_approx)/nr_approx * 100, '%')
    
    plt.axhline(nr_expected, linestyle='dashed', label='Expected number of distinct values')
    

        
    plt.ylim(0, nr_expected*1.5)
    plt.xlabel('Test index')
    plt.ylabel('Number of distinct values')
    plt.legend()
    plt.show()
    


   
    
    


In [None]:
testing(2, 2, 10, False) 

In [None]:
testing(2, 4, 25, False)  

In [None]:
testing(100, 100, 25, False) 

In [None]:
testing(100, 365, 25, False) 

In [None]:
testing(1000, 1000, 25, False) 