In [1]:
## Suppose we have a population of size 1 million, and suppose 52% of them vote +1 and 48% of them vote −1.

In [2]:
# The below get_population method works in following manner :-

# Purpose: This function creates a simulated population based on the given proportions of votes.
# Parameters:
# total_population: The total number of individuals in the population.
# percent_plus_one: The percentage of the population voting +1.
# Process:
# It first calculates the number of individuals voting -1 (percent_minus_one).
# Then, it calculates the actual number of individuals voting +1 (pop_plus_one) and -1 (pop_minus_one) based on the given percentages.
# Two lists are created: one filled with +1's (population_plus_one) and another with -1's (population_minus_one).
# These two lists are combined to form the complete population.

In [3]:
import random

def get_population(total_population, percent_plus_one):
    percent_minus_one = 1 - percent_plus_one
    pop_plus_one = int(percent_plus_one * total_population)
    pop_minus_one = int(percent_minus_one * total_population)
    population_plus_one = [+1 for _ in range(pop_plus_one)]
    population_minus_one = [-1 for _ in range(pop_minus_one)]
    population = population_plus_one + population_minus_one
    return population

In [4]:
# The below get_sample_pred method works in following manner :-

# Purpose: To determine the probability of +1 being the majority in randomly picked samples of various sizes.
# Parameters:
# population: The list representing the population.
# sample_sizes: A list of different sample sizes to test.
# experiments: The number of times to repeat the sampling for each sample size.
# Process:
# For each sample size, it performs the specified number of experiments.
# In each experiment, it randomly selects a sample from the population and checks if +1 is the majority (i.e., the sum of the sample is positive).
# It counts the number of times +1 is in the majority and calculates the probability for each sample size.

In [5]:
def get_sample_pred(population, sample_sizes, experiments):
    answer = {}
    for sample_size in sample_sizes:
        majority = 0
        for _ in range(experiments):
            sample = random.sample(population, sample_size)
            if sum(sample) > 0:  # +1 is in the majority
                majority += 1
        answer[sample_size] = majority / experiments
    return answer

In [6]:
# The below get_majority_threshold method works in following manner :-

# Purpose: To find the smallest sample size for which the probability of +1 being in the majority exceeds a specified threshold.
# Parameters:
# answer: A dictionary where keys are sample sizes and values are the corresponding probabilities of +1 being in the majority.
# value: The threshold probability (default is 0.9).
# Process:
# It iterates through the answer dictionary and returns the first sample size where the probability exceeds the given threshold.

In [7]:
def get_majority_threshold(answer, value=0.9):
    for sample_size, probability in sorted(answer.items()):
        if probability > value:
            return sample_size

In [8]:
## Part A

In [9]:
population = get_population(1000000, 0.52)
sample_sizes = [20, 100, 400]
answer_a = get_sample_pred(population, sample_sizes, 100)
print(answer_a)

{20: 0.5, 100: 0.6, 400: 0.82}


In [10]:
## Part B

In [11]:
population = get_population(1000000, 0.52)
sample_sizes = [100 * i for i in range(1, 21)]
answer_b = get_sample_pred(population, sample_sizes, 100)
sample = get_majority_threshold(answer_b)
print("The sample size for probability 0.9 is:", sample)

The sample size for probability 0.9 is: 1000
