# Probability

## Example 1 

In [1]:
# import numpy
import numpy as np

### 1. Two fair coin flips produce exactly two heads

In [2]:
# simulate 1 million tests of two fair coin flips
tests = np.random.randint(2, size=(int(1e6), 2))

# sums of all tests
test_sums = tests.sum(axis=1)

#proportion of tests that produced exactly two heads
(test_sums == 0).mean()

0.250276

Why we use **test_sums == 0**: because '0' means the number of tail is 0 time. So '0' tails means **2 two heads** as well.

### 2. Three fair coin flips produce exactly one head

In [3]:
# simulate 1 million tests of three fair coin flips
tests = np.random.randint(2, size=(int(1e6), 3))

# sums of all tests
test_sums = tests.sum(axis=1)
test_sums
# proportion of tests that produced exactly one head
(test_sums == 2).mean()

0.374443

Why we use **test_sums == 2**: because '2' means the number of tail is 2 times. So '2' tails means **one head** as well.

### 3. Three bias coin flips with P(H) = 0.6 produce exactly one head

In [4]:
# simulate 1 million tests of three bias coin flips
# hint: use np.random.choice()
tests = np.random.choice([0, 1], size=(int(1e6), 3), p=[0.6, 0.4])

# sums of all tests
test_sums = tests.sum(axis=1)

# proportion of tests that produced exactly one head
(test_sums == 2).mean()

0.287665

### 4. A die rolls an even number

In [5]:
# simulate 1 million tests of one die roll
tests = np.random.choice(np.arange(1, 7), size=int(1e6))

# proportion of tests that produced an even number (gerade Zahl)
(tests % 2 == 0).mean()

0.500356

### 5. Two dice roll a double

In [6]:
# simulate the first million die rolls
first = np.random.choice(np.arange(6), size=int(1e6))

# simulate the second million die rolls
second = np.random.choice(np.arange(6), size=int(1e6))

# proportion of tests where the 1st and 2nd die rolled the same number
(first == second).mean()

0.16637

## Example 2

In [7]:
import pandas as pd
df = pd.read_csv('admission_data.csv') ;df.head()

Unnamed: 0,student_id,gender,major,admitted
0,35377,female,Chemistry,False
1,56105,male,Physics,True
2,31441,female,Chemistry,False
3,51765,male,Physics,True
4,53714,female,Physics,True


### Proportion and admission rate for each gender

In [8]:
# Proportion of students that are female
df['gender'].value_counts()[0] / sum(df['gender'][:].value_counts())

0.514

In [9]:
# Proportion of students that are male
df['gender'].value_counts()[1] / sum(df['gender'][:].value_counts())

0.486

In [10]:
# Admission rate for females
df[df['gender']=='female']['admitted'].value_counts()[1] / sum(df[df['gender']=='female']['admitted'].value_counts()[:])

0.28793774319066145

In [11]:
# Admission rate for males
df[df['gender']=='male']['admitted'].value_counts()[1] / sum(df[df['gender']=='male']['admitted'].value_counts()[:])

0.48559670781893005

### Proportion and admission rate for physics majors of each gender

In [12]:
# What proportion of female students are majoring in physics 
(df[df['gender']=='female']['major'].value_counts()[1]) / sum(df[df['gender']=='female']['major'].value_counts()[:])

0.12062256809338522

In [13]:
# What proportion of male students are majoring in physics?
(df[df['gender']=='male']['major'].value_counts()[0]) / sum(df[df['gender']=='male']['major'].value_counts()[:])

0.9259259259259259

In [14]:
# Admission rate for female physics majors
df[(df['gender']=='female') & (df['major'] =='Physics')]['admitted'].value_counts()[1] / sum(df[(df['gender']=='female') & (df['major'] =='Physics')]['admitted'].value_counts()[:])

0.7419354838709677

In [15]:
# Admission rate for male physics majors
df[(df['gender']=='male') & (df['major'] =='Physics')]['admitted'].value_counts()[1] / sum(df[(df['gender']=='male') & (df['major'] =='Physics')]['admitted'].value_counts()[:])

0.5155555555555555

### Proportion and admission rate for chemistry majors of each gender

In [16]:
# What proportion of female students are majoring in chemistry?
df[df['gender']=='female']['major'].value_counts()[0] / sum(df[df['gender']=='female']['major'].value_counts()[:])

0.8793774319066148

In [17]:
# What proportion of male students are majoring in chemistry?
df[df['gender']=='male']['major'].value_counts()[1]/ sum(df[df['gender']=='male']['major'].value_counts()[:])

0.07407407407407407

In [18]:
# Admission rate for female chemistry majors
df[(df['gender']=='female') & (df['major'] =='Chemistry')]['admitted'].value_counts()[1] / sum(df[(df['gender']=='female') & (df['major'] =='Chemistry')]['admitted'].value_counts()[:])

0.22566371681415928

In [19]:
# Admission rate for male chemistry majors
df[(df['gender']=='male') & (df['major'] =='Chemistry')]['admitted'].value_counts()[1] / sum(df[(df['gender']=='male') & (df['major'] =='Chemistry')]['admitted'].value_counts()[:])

0.1111111111111111

### Admission rate for each major

In [20]:
# Admission rate for physics majors
df[df['major'] == 'Physics']['admitted'].value_counts()[1] / sum(df[df['major'] == 'Physics']['admitted'].value_counts()[:])

0.54296875

In [21]:
# Admission rate for chemistry majors
df[df['major'] == 'Chemistry']['admitted'].value_counts()[1] / sum(df[df['major'] == 'Chemistry']['admitted'].value_counts()[:])

0.21721311475409835