# Simulation

Simulation allows you to generate more data, when you don't have enough.

| Feature | Uniform Distribution | Normal Distribution |
|---|---|---|
| Shape | Rectangular | Bell-shaped |
| Probability | Equal probability for all outcomes | Probability decreases as you move away from the mean |
| Real-world Examples | Rolling a dice, random number generator | Heights, weights, IQ scores |

In [12]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd
import statistics

## Random sampling from uniform distribution

In [2]:
# Generate a sample
sample_size = 20
sample = np.random.uniform(low=0, high=1, size=sample_size)
sample

array([0.79215183, 0.10762506, 0.32422101, 0.09219966, 0.28585869,
       0.26611665, 0.88797959, 0.12027102, 0.24911214, 0.11972559,
       0.75448402, 0.40031947, 0.3879805 , 0.22747054, 0.981742  ,
       0.97638389, 0.21285137, 0.7343763 , 0.23734868, 0.37760803])

## Random sampling from normal distribution

In [2]:
# Generate a sample
sample_size = 20
sample = np.random.normal(loc=0, scale=1, size=sample_size)
# parameters:
# loc - the center of the distribution; higher values skew to the right, lower to the left
# scale: the standard deviation, higher for spread distribution, lower for narrow distribution
# size: shape of the output array

sample

array([ 0.0880375 , -1.63893002,  0.38649392, -1.23972922,  0.61368346,
        0.08980626, -0.27048208,  0.15648425,  0.18821922, -0.10214723,
        2.10368225,  0.25969651, -1.213096  ,  0.28906961,  1.73145571,
        2.16653103,  0.23150815,  1.82565578,  1.27795573, -1.50479277])

## Example of simulation

<img src="../img/simulation.png" width="80%">



In [3]:
# Function to simulate scoring
def simulate_scoring(attempts, shooting_percentage):
    """
    Simulates scoring attempts based on shooting percentage.
    
    :param attempts: Number of attempts (int).
    :param shooting_percentage: Shooting percentage (0-100).
    :return: Number of successful attempts (int).
    """
    # Generate random numbers between 0 and 1 for each attempt
    random_draws = np.random.rand(attempts)
    # Count successes (if random draw < shooting percentage)
    successes = np.sum(random_draws < (shooting_percentage / 100))
    return successes

In [6]:
# Player parameters (free throw, 2-point, 3-point)
player1_shot_percentage = pd.Series(data=[80, 50, 35], index=['ft', 'two_pt', 'three_pt'])
player2_shot_percentage = pd.Series(data=[75, 45, 40], index=['ft', 'two_pt', 'three_pt'])

player1_attempts = pd.Series(data=[6, 10, 5], index=['ft', 'two_pt', 'three_pt'])
player2_attempts = pd.Series(data=[4, 12, 7], index=['ft', 'two_pt', 'three_pt'])

In [7]:
def player_score(player_shot_percentage, player_attempts):
    free_throws = simulate_scoring(player_attempts['ft'], player_shot_percentage['ft'])
    two_point = simulate_scoring(player_attempts['two_pt'], player_shot_percentage['two_pt'])
    three_point = simulate_scoring(player_attempts['three_pt'], player_shot_percentage['three_pt'])
    
    return 1 * free_throws + 2 * two_point + 3 * three_point # returns the score of the player

### Simulate games
Here we will run the simulation for player 1 and 2, and then decide who is better, and if there's a statistically significant difference in them.

In [8]:
games = 100
attempts_per_game = 20

player1_scores = []

for i in range(games):
    player1_scores.append(player_score(player1_shot_percentage, player1_attempts))
    
print(player1_scores)

[22, 10, 19, 23, 27, 18, 18, 26, 20, 19, 11, 18, 21, 24, 22, 19, 19, 24, 16, 12, 16, 22, 22, 27, 16, 21, 12, 28, 18, 29, 24, 13, 17, 20, 22, 20, 20, 25, 17, 15, 28, 21, 25, 12, 23, 24, 19, 12, 14, 17, 24, 19, 29, 20, 20, 22, 27, 26, 19, 23, 23, 28, 20, 25, 16, 23, 17, 20, 22, 24, 15, 18, 22, 15, 18, 19, 22, 9, 25, 16, 23, 20, 25, 16, 14, 21, 13, 16, 27, 15, 14, 20, 20, 20, 16, 21, 27, 16, 24, 24]


In [9]:
player2_scores = []

for i in range(games):
    player2_scores.append(player_score(player2_shot_percentage, player2_attempts))
    
print(player2_scores)

[17, 22, 30, 10, 9, 20, 15, 26, 23, 24, 19, 24, 23, 23, 10, 27, 20, 35, 21, 20, 29, 23, 21, 31, 19, 29, 22, 21, 15, 30, 33, 25, 15, 22, 27, 23, 20, 18, 16, 22, 22, 27, 20, 20, 26, 19, 21, 28, 17, 24, 20, 17, 21, 28, 22, 14, 30, 26, 11, 11, 19, 22, 24, 25, 19, 18, 26, 14, 24, 20, 13, 23, 35, 18, 23, 28, 17, 21, 25, 21, 18, 22, 24, 34, 21, 32, 16, 26, 27, 18, 12, 31, 25, 21, 24, 17, 20, 16, 24, 27]


In [13]:
# First let's check their average scores
print("Player 1 average =", statistics.mean(player1_scores)) # np.mean() doesn't work on a list so you're using statistics.mean
print("Player 2 average =", statistics.mean(player2_scores))

Player 1 average = 20
Player 2 average = 21


In [14]:
# Is player 2 better than player 1?
# we will check now with a t-tailed test:

player_comparison = stats.ttest_ind(player1_scores, player2_scores)
player_comparison

TtestResult(statistic=-2.5952057996612443, pvalue=0.010161174430278867, df=198.0)

In [15]:
alpha = 0.05
p_value = player_comparison[1]

if p_value < alpha:
    print(f"The null hypothesis is rejected with the confidence of {(1-alpha)*100}%.")
else:
    print("There is not enough evidence to reject the null hypothesis.")

The null hypothesis is rejected with the confidence of 95.0%.
