In [1]:
import numpy as np
import pandas as pd
import statistics as st
from scipy import stats
import random
import math

In [3]:
from google.colab import drive
drive.mount('/content/drive/')
%cd '/content/drive/MyDrive/統計學/data'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/統計學/data


# Recent Changes in the Number of Master's Students

## functions

### if n >= 30 -> perform 1-sided, 2-sample z-test

In [135]:
def z_test (data, year1, year2):
  df1 = data[data['Year'] == year1]
  df2 = data[data['Year'] == year2]
  df_n1 = len(df1['School'].unique())
  df_n2 = len(df2['School'].unique())

  print("1. Analyze Statistical Measures")
  print(f"Calculate the average total number of master's graduates per school for the {year1}th and {year2}th academic years, µ_{year1} & µ_{year2}")

  df1_mean = df1['gradTotal'].mean()
  df2_mean = df2['gradTotal'].mean()

  df1_std = df1['gradTotal'].std()
  df2_std = df2['gradTotal'].std()

  print(f"number of schools in year {year1}:", df_n1)
  print(f"number of schools in year {year2}:", df_n2)
  print(f'mean number of master\'s graduates in year {year1}: ', df1_mean)
  print(f'mean number of master\'s graduates in year {year2}: ', df2_mean)
  print(f'std of master\'s graduates in year {year1}: ', df1_std)
  print(f'std of master\'s graduates in year {year2}: ', df2_std)


  print("\n2. Hypothesis")
  print(f"H0: µ{year1} - µ{year2} ≧ 0")
  print(f"Ha: µ{year1} - µ{year2} < 0")


  print("\n3. Perform 1-sided, 2-sample z-test")
  z = (df1_mean - df2_mean) / math.sqrt(df1_std**2 / df_n1 + df2_std**2 / df_n2)
  print("the test statistic is ", z)

  z_alpha = -1.65
  if z < z_alpha:
    print(f"Since the observed value of the test statistic falls in the rejection region (z < -1.65), H0 is rejected.")
    print(f"There is sufficient evidence to indicate that there is a difference in the mean number of students in year {year1} and year {year2} at alpha = 0.05.")
  else:
    print(f"Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.")
    print(f"There is insufficient evidence to indicate that there is a difference in the mean number of students in year {year1} and year {year2} at alpha = 0.05.")

### if n < 30, variance unknown -> perform 1-sided, 2-sample t-test

1. use F-test to check if the sample variance are the same

In [136]:
def F_test(std1, std2, n1, n2):
  print("H0: the variances are the same, Ha: the variances are different")
  F = std1**2 / std2**2
  p_value = stats.f.cdf(F, n1-1, n2-1)

  return p_value

2. perform perform 1-sided, 2-sample

In [137]:
def t_test (data, year1, year2):
  df1 = data[data['Year'] == year1]
  df2 = data[data['Year'] == year2]
  df_n1 = len(df1['School'].unique())
  df_n2 = len(df2['School'].unique())

  print("1. Analyze Statistical Measures")
  print(f"Calculate the average total number of master's graduates per school for the {year1}th and {year2}th academic years, µ_{year1} & µ_{year2}")

  df1_mean = df1['gradTotal'].mean()
  df2_mean = df2['gradTotal'].mean()

  df1_std = df1['gradTotal'].std()
  df2_std = df2['gradTotal'].std()

  print(f"number of schools in year {year1}:", df_n1)
  print(f"number of schools in year {year2}:", df_n2)
  print(f'mean number of master\'s graduates in year {year1}: ', df1_mean)
  print(f'mean number of master\'s graduates in year {year2}: ', df2_mean)
  print(f'std of master\'s graduates in year {year1}: ', df1_std)
  print(f'std of master\'s graduates in year {year2}: ', df2_std)


  print("\n2. Hypothesis")
  print(f"H0: µ{year1} - µ{year2} ≧ 0")
  print(f"Ha: µ{year1} - µ{year2} < 0")


  print("\n3. Perform F test to check if the sample variance are the same")
  f_pval = F_test(df1_std, df2_std, df_n1, df_n2)
  if f_pval < 0.025:
    print(f"Since the p-value({f_pval}) is less than 0.025, H0 is rejected.")
    print(f"There is sufficient evidence to indicate the master\'s graduates variances for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1['gradTotal'], df2['gradTotal'], equal_var=False, alternative="less")
  else:
    print(f"Since the p-value({f_pval}) is greater than 0.025, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate the master\'s graduates variances for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1['gradTotal'], df2['gradTotal'], equal_var=True, alternative="less")


  print("\n4. Perform 1-sided, 2-sample t-test")
  print(f"the test statistic is {t}, p-value is {p_value}")

  alpha = 0.05
  if p_value < alpha:
    print(f"Since the p-value is less than 0.05, H0 is rejected.")
    print(f"There is sufficient evidence to indicate that there is a difference in the mean number of master\'s graduates in year {year1} and year {year2} at alpha = 0.05.")
  else:
    print(f"Since the p-value is greater than 0.05, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate that there is a difference in the mean number of master\'s graduates in year {year1} and year {year2} at alpha = 0.05.")

## statistical test (6 years)

load dataset

In [138]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/graduates.csv"

# df = pd.read_csv(url)
df = pd.read_csv('graduates(6).csv')

df.head()

Unnamed: 0,Year,School,gradTotal,Type,Ownership
0,107,世新大學,202,General,Private
1,107,中信金融管理學院,14,General,Private
2,107,中原大學,610,General,Private
3,107,中國文化大學,298,General,Private
4,107,中國科技大學,47,Tech,Private


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [139]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

### test results for each category

1. general, public

In [140]:
z_test(general_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools in year 107: 31
number of schools in year 112: 31
mean number of master's graduates in year 107:  703.5806451612904
mean number of master's graduates in year 112:  720.1290322580645
std of master's graduates in year 107:  826.32571359376
std of master's graduates in year 112:  864.0040023802159

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  -0.07706766896695796
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 107 and year 112 at alpha = 0.05.


2. general, private

In [141]:
z_test(general_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools in year 107: 37
number of schools in year 112: 35
mean number of master's graduates in year 107:  216.0
mean number of master's graduates in year 112:  201.45714285714286
std of master's graduates in year 107:  181.5986111057999
std of master's graduates in year 112:  172.1883959431616

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  0.3487978544046838
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 107 and year 112 at alpha = 0.05.


3. tech, public

In [142]:
t_test(tech_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools in year 107: 12
number of schools in year 112: 12
mean number of master's graduates in year 107:  474.0833333333333
mean number of master's graduates in year 112:  511.6666666666667
std of master's graduates in year 107:  471.479963583402
std of master's graduates in year 112:  516.6362063357249

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.3834861411965651) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the master's graduates variances for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.1861400759576133, p-value is 0.42702029127984
Since the p-value is greater than 

4. tech, private

In [143]:
z_test(tech_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools in year 107: 54
number of schools in year 112: 48
mean number of master's graduates in year 107:  55.96296296296296
mean number of master's graduates in year 112:  51.395833333333336
std of master's graduates in year 107:  59.123140750363405
std of master's graduates in year 112:  51.973290842440484

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  0.41518020825496166
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 107 and year 112 at alpha = 0.05.


## statistical test (10 years)

load dataset

In [144]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/graduates.csv"

# df = pd.read_csv(url)
df = pd.read_csv('graduates(10).csv')

df.head()

Unnamed: 0,Year,School,gradTotal,Type,Ownership
0,103,世新大學,267,General,Private
1,103,中原大學,706,General,Private
2,103,中國文化大學,509,General,Private
3,103,中國科技大學,48,Tech,Private
4,103,中國醫藥大學,214,General,Private


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [145]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

### test results for each category

1. general, public

In [146]:
z_test(general_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools in year 103: 31
number of schools in year 112: 31
mean number of master's graduates in year 103:  741.4193548387096
mean number of master's graduates in year 112:  720.1290322580645
std of master's graduates in year 103:  820.7034695590341
std of master's graduates in year 112:  864.0040023802159

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  0.0994741380578472
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 103 and year 112 at alpha = 0.05.


2. general, private

In [147]:
z_test(general_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools in year 103: 34
number of schools in year 112: 34
mean number of master's graduates in year 103:  279.94117647058823
mean number of master's graduates in year 112:  207.3235294117647
std of master's graduates in year 103:  209.82343821131232
std of master's graduates in year 112:  171.19078858471528

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  1.5636314206034825
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 103 and year 112 at alpha = 0.05.


3. tech, public

In [148]:
t_test(tech_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools in year 103: 12
number of schools in year 112: 12
mean number of master's graduates in year 103:  526.75
mean number of master's graduates in year 112:  511.6666666666667
std of master's graduates in year 103:  473.25509650043534
std of master's graduates in year 112:  516.6362063357249

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.38813723211780116) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the master's graduates variances for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is 0.07457595066518273, p-value is 0.5293869647180232
Since the p-value is greater than 0.05, 

4. tech, private

In [149]:
z_test(tech_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools in year 103: 48
number of schools in year 112: 48
mean number of master's graduates in year 103:  68.33333333333333
mean number of master's graduates in year 112:  51.395833333333336
std of master's graduates in year 103:  78.06961795876133
std of master's graduates in year 112:  51.973290842440484

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  1.2511953997290648
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 103 and year 112 at alpha = 0.05.


# Recent Changes in the Proportion of Female Students in Computer Science

## function

### if n < 30, variance unknown -> perform 1-sided, 2-sample t-test

1. use F-test to check if the sample variance are the same

In [178]:
def F_test(std1, std2, n1, n2):
  print("H0: the variances are the same, Ha: the variances are different")
  F = std1**2 / std2**2
  p_value = stats.f.cdf(F, n1-1, n2-1)

  return p_value

2. perform perform 1-sided, 2-sample

In [179]:
def t_test (data, year1, year2):
  df1 = data[data['Year'] == year1]
  df2 = data[data['Year'] == year2]
  df_n1 = len(df1['School'].unique())
  df_n2 = len(df2['School'].unique())

  print("1. Analyze Statistical Measures")
  print(f"Calculate the average female student ratio (µ{year1} & µ{year2}) for each school in the academic years {year1} and {year2}")

  df1_proportion = df1['Female'] / df1['Total']
  df2_proportion = df2['Female']  / df2['Total']

  df1_mean = df1_proportion.mean()
  df2_mean = df2_proportion.mean()
  df1_std = df1_proportion.std()
  df2_std = df2_proportion.std()

  print(f"number of schools in year {year1}:", df_n1)
  print(f"number of schools in year {year2}:", df_n2)
  print(f'mean number of female student ratio in year {year1}: ', df1_mean)
  print(f'mean number of female student ratio in year {year2}: ', df2_mean)
  print(f'std of female student ratio in year {year1}: ', df1_std)
  print(f'std of female student ratio in year {year2}: ', df2_std)


  print("\n2. Hypothesis")
  print(f"H0: µ{year1} - µ{year2} ≧ 0")
  print(f"Ha: µ{year1} - µ{year2} < 0")


  print("\n3. Perform F test to check if the sample variance are the same")
  f_pval = F_test(df1_std, df2_std, df_n1, df_n2)
  if f_pval < 0.025:
    print(f"Since the p-value({f_pval}) is less than 0.025, H0 is rejected.")
    print(f"There is sufficient evidence to indicate the variances of female student ratio for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1_proportion, df2_proportion, equal_var=False, alternative="less")
  else:
    print(f"Since the p-value({f_pval}) is greater than 0.025, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate the variances of female student ratio for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1_proportion, df2_proportion, equal_var=True, alternative="less")


  print("\n4. Perform 1-sided, 2-sample t-test")
  print(f"the test statistic is {t}, p-value is {p_value}")

  alpha = 0.05
  if p_value < alpha:
    print(f"Since the p-value is less than 0.05, H0 is rejected.")
    print(f"There is sufficient evidence to indicate that there is a difference in the mean number of female student ratio in year {year1} and year {year2} at alpha = 0.05.")
  else:
    print(f"Since the p-value is greater than 0.05, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate that there is a difference in the mean number of female student ratio in year {year1} and year {year2} at alpha = 0.05.")

## statistical test (6 years)

load data

In [180]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/cs_students.csv"

# df = pd.read_csv(url)
df = pd.read_csv('cs_students(6).csv')
df.head()

Unnamed: 0,Year,School,Total,Male,Female,Type,Ownership
0,107,國立清華大學,584,445,139,General,Public
1,107,國立臺灣大學,554,485,69,General,Public
2,107,國立臺灣師範大學,201,161,40,General,Public
3,107,國立成功大學,478,364,114,General,Public
4,107,國立交通大學,781,629,152,General,Public


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [181]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

### statistical testing of each category

1. general, public

In [182]:
t_test(general_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools in year 107: 22
number of schools in year 112: 23
mean number of female student ratio in year 107:  0.18806324212459577
mean number of female student ratio in year 112:  0.22024074892477066
std of female student ratio in year 107:  0.03854424961559331
std of female student ratio in year 112:  0.04599962261965814

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.21078989990021388) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -2.5374950328185006, p-value is 0.0074360930692068125
Since the p-value is 

2. general, private

In [183]:
t_test(general_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools in year 107: 18
number of schools in year 112: 18
mean number of female student ratio in year 107:  0.16819386337942302
mean number of female student ratio in year 112:  0.21379035340886318
std of female student ratio in year 107:  0.05409858482773544
std of female student ratio in year 112:  0.06390164683957127

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.24980573357970332) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -2.3105015274622955, p-value is 0.013529012051065376
Since the p-value is l

3. tech, public

In [184]:
t_test(tech_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools in year 107: 7
number of schools in year 112: 8
mean number of female student ratio in year 107:  0.09430542486580698
mean number of female student ratio in year 112:  0.11693152361531407
std of female student ratio in year 107:  0.026743150704289505
std of female student ratio in year 112:  0.032488648970467

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.32570540602339026) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -1.4585211329537202, p-value is 0.08421585626957422
Since the p-value is great

4. tech, private

In [185]:
t_test(tech_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools in year 107: 20
number of schools in year 112: 16
mean number of female student ratio in year 107:  0.06025355802108652
mean number of female student ratio in year 112:  0.091619651103692
std of female student ratio in year 107:  0.032292814336185714
std of female student ratio in year 112:  0.0348183301509321

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.37302515976738504) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -2.797311176258539, p-value is 0.004210928439592767
Since the p-value is less

## statistical test (10 years)

load data

In [186]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/cs_students.csv"

# df = pd.read_csv(url)
df = pd.read_csv('cs_students(10).csv')
df.head()

Unnamed: 0,Year,School,Total,Male,Female,Type,Ownership
0,103,中原大學,442,372,70,General,Private
1,103,中國文化大學,459,396,63,General,Private
2,103,中國科技大學,178,170,8,Tech,Private
3,103,中華大學,393,341,52,General,Private
4,103,中華科技大學,348,316,32,Tech,Private


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [187]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

### statistical testing of each category

1. general, public

In [188]:
t_test(general_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools in year 103: 21
number of schools in year 112: 23
mean number of female student ratio in year 103:  0.18067314038554508
mean number of female student ratio in year 112:  0.22024074892477066
std of female student ratio in year 103:  0.035683693457220736
std of female student ratio in year 112:  0.04599962261965814

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.1289987678733906) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -3.1658612917888393, p-value is 0.0014384258904201
Since the p-value is les

2. general, private

In [189]:
t_test(general_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools in year 103: 14
number of schools in year 112: 18
mean number of female student ratio in year 103:  0.14294559661289377
mean number of female student ratio in year 112:  0.21379035340886318
std of female student ratio in year 103:  0.03812152613146123
std of female student ratio in year 112:  0.06390164683957125

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.032399547911030904) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -3.6642717743103295, p-value is 0.0004759779248657224
Since the p-value is

3. tech, public

In [190]:
t_test(tech_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools in year 103: 8
number of schools in year 112: 8
mean number of female student ratio in year 103:  0.08770382183660903
mean number of female student ratio in year 112:  0.11693152361531407
std of female student ratio in year 103:  0.01822330432814595
std of female student ratio in year 112:  0.032488648970467

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.0750191383282493) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -2.2192556698084567, p-value is 0.021749834872001203
Since the p-value is less t

4. tech, private

In [191]:
t_test(tech_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools in year 103: 22
number of schools in year 112: 16
mean number of female student ratio in year 103:  0.05355201189140669
mean number of female student ratio in year 112:  0.091619651103692
std of female student ratio in year 103:  0.02603823378793482
std of female student ratio in year 112:  0.03481833015093211

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.10813755348800919) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -3.860675683109847, p-value is 0.00022589971597581604
Since the p-value is le