In [1]:
import numpy as np
import pandas as pd
import statistics as st
from scipy import stats
import random
import math

In [3]:
from google.colab import drive
drive.mount('/content/drive/')
%cd '/content/drive/MyDrive/統計學/data'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/統計學/data


# Recent Changes in the Number of Master's Students

## functions

### if n >= 30 -> perform 1-sided, 2-sample z-test

In [80]:
def z_test (data, year1, year2):
  df1 = data[data['Year'] == year1]
  df2 = data[data['Year'] == year2]
  df_n = len(data['School'].unique())

  print("1. Analyze Statistical Measures")
  print(f"Calculate the average total number of master's graduates per school for the {year1}th and {year2}th academic years, µ_{year1} & µ_{year2}")

  df1_mean = df1['gradTotal'].mean()
  df2_mean = df2['gradTotal'].mean()

  df1_std = df1['gradTotal'].std()
  df2_std = df2['gradTotal'].std()

  print("number of schools:", df_n)
  print(f'mean number of master\'s graduates in year {year1}: ', df1_mean)
  print(f'mean number of master\'s graduates in year {year2}: ', df2_mean)
  print(f'std of master\'s graduates in year {year1}: ', df1_std)
  print(f'std of master\'s graduates in year {year2}: ', df2_std)


  print("\n2. Hypothesis")
  print(f"H0: µ{year1} - µ{year2} ≧ 0")
  print(f"Ha: µ{year1} - µ{year2} < 0")


  print("\n3. Perform 1-sided, 2-sample z-test")
  z = (df1_mean - df2_mean) / math.sqrt(df1_std**2 / df_n + df2_std**2 / df_n)
  print("the test statistic is ", z)

  z_alpha = -1.65
  if z < z_alpha:
    print(f"Since the observed value of the test statistic falls in the rejection region (z < -1.65), H0 is rejected.")
    print(f"There is sufficient evidence to indicate that there is a difference in the mean number of students in year {year1} and year {year2} at alpha = 0.05.")
  else:
    print(f"Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.")
    print(f"There is insufficient evidence to indicate that there is a difference in the mean number of students in year {year1} and year {year2} at alpha = 0.05.")

### if n < 30, variance unknown -> perform 1-sided, 2-sample t-test

1. use F-test to check if the sample variance are the same

In [81]:
def F_test(std1, std2, n):
  print("H0: the variances are the same, Ha: the variances are different")
  F = std1**2 / std2**2
  p_value = stats.f.cdf(F, n-1, n-1)

  return p_value

2. perform perform 1-sided, 2-sample

In [82]:
def t_test (data, year1, year2):
  df1 = data[data['Year'] == year1]
  df2 = data[data['Year'] == year2]
  df_n = len(data['School'].unique())

  print("1. Analyze Statistical Measures")
  print(f"Calculate the average total number of master's graduates per school for the {year1}th and {year2}th academic years, µ_{year1} & µ_{year2}")

  df1_mean = df1['gradTotal'].mean()
  df2_mean = df2['gradTotal'].mean()

  df1_std = df1['gradTotal'].std()
  df2_std = df2['gradTotal'].std()

  print("number of schools:", df_n)
  print(f'mean number of master\'s graduates in year {year1}: ', df1_mean)
  print(f'mean number of master\'s graduates in year {year2}: ', df2_mean)
  print(f'std of master\'s graduates in year {year1}: ', df1_std)
  print(f'std of master\'s graduates in year {year2}: ', df2_std)


  print("\n2. Hypothesis")
  print(f"H0: µ{year1} - µ{year2} ≧ 0")
  print(f"Ha: µ{year1} - µ{year2} < 0")


  print("\n3. Perform F test to check if the sample variance are the same")
  f_pval = F_test(df1_std, df2_std, df_n)
  if f_pval < 0.025:
    print(f"Since the p-value({f_pval}) is less than 0.025, H0 is rejected.")
    print(f"There is sufficient evidence to indicate the master\'s graduates variances for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1['gradTotal'], df2['gradTotal'], equal_var=False, alternative="less")
  else:
    print(f"Since the p-value({f_pval}) is greater than 0.025, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate the master\'s graduates variances for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1['gradTotal'], df2['gradTotal'], equal_var=True, alternative="less")


  print("\n4. Perform 1-sided, 2-sample t-test")
  print(f"the test statistic is {t}, p-value is {p_value}")

  alpha = 0.05
  if p_value < alpha:
    print(f"Since the p-value is less than 0.05, H0 is rejected.")
    print(f"There is sufficient evidence to indicate that there is a difference in the mean number of master\'s graduates in year {year1} and year {year2} at alpha = 0.05.")
  else:
    print(f"Since the p-value is greater than 0.05, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate that there is a difference in the mean number of master\'s graduates in year {year1} and year {year2} at alpha = 0.05.")

## statistical test (6 years)

load dataset

In [83]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/graduates.csv"

# df = pd.read_csv(url)
df = pd.read_csv('graduates(6).csv')

df.head()

Unnamed: 0,Year,School,gradTotal,Type,Ownership
0,107,世新大學,202,General,Private
1,107,中信金融管理學院,14,General,Private
2,107,中原大學,610,General,Private
3,107,中國文化大學,298,General,Private
4,107,中國科技大學,47,Tech,Private


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [84]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

print("number of schools in each category")
print("general, public：", len(general_public['School'].unique()))
print("general, private：", len(general_private['School'].unique()))
print("tech, public：", len(tech_public['School'].unique()))
print("tech, private：", len(tech_private['School'].unique()))

number of schools in each category
general, public： 31
general, private： 37
tech, public： 12
tech, private： 55


### test results for each category

1. general, public

In [85]:
z_test(general_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools: 31
mean number of master's graduates in year 107:  703.5806451612904
mean number of master's graduates in year 112:  720.1290322580645
std of master's graduates in year 107:  826.32571359376
std of master's graduates in year 112:  864.0040023802159

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  -0.07706766896695796
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 107 and year 112 at alpha = 0.05.


2. general, private

In [86]:
z_test(general_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools: 37
mean number of master's graduates in year 107:  216.0
mean number of master's graduates in year 112:  201.45714285714286
std of master's graduates in year 107:  181.5986111057999
std of master's graduates in year 112:  172.1883959431616

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  0.3534843128552399
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 107 and year 112 at alpha = 0.05.


3. tech, public

In [87]:
t_test(tech_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools: 12
mean number of master's graduates in year 107:  474.0833333333333
mean number of master's graduates in year 112:  511.6666666666667
std of master's graduates in year 107:  471.479963583402
std of master's graduates in year 112:  516.6362063357249

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.3834861411965651) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the master's graduates variances for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.1861400759576133, p-value is 0.42702029127984
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficien

4. tech, private

In [88]:
z_test(tech_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 107th and 112th academic years, µ_107 & µ_112
number of schools: 55
mean number of master's graduates in year 107:  55.96296296296296
mean number of master's graduates in year 112:  51.395833333333336
std of master's graduates in year 107:  59.123140750363405
std of master's graduates in year 112:  51.973290842440484

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  0.4302709634657829
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 107 and year 112 at alpha = 0.05.


## statistical test (10 years)

load dataset

In [89]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/graduates.csv"

# df = pd.read_csv(url)
df = pd.read_csv('graduates(10).csv')

df.head()

Unnamed: 0,Year,School,gradTotal,Type,Ownership
0,103,世新大學,267,General,Private
1,103,中原大學,706,General,Private
2,103,中國文化大學,509,General,Private
3,103,中國科技大學,48,Tech,Private
4,103,中國醫藥大學,214,General,Private


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [90]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

print("number of schools in each category")
print("general, public：", len(general_public['School'].unique()))
print("general, private：", len(general_private['School'].unique()))
print("tech, public：", len(tech_public['School'].unique()))
print("tech, private：", len(tech_private['School'].unique()))

number of schools in each category
general, public： 31
general, private： 36
tech, public： 12
tech, private： 56


### test results for each category

1. general, public

In [91]:
z_test(general_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools: 31
mean number of master's graduates in year 103:  741.4193548387096
mean number of master's graduates in year 112:  720.1290322580645
std of master's graduates in year 103:  820.7034695590341
std of master's graduates in year 112:  864.0040023802159

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  0.0994741380578472
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 103 and year 112 at alpha = 0.05.


2. general, private

In [92]:
z_test(general_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools: 36
mean number of master's graduates in year 103:  279.94117647058823
mean number of master's graduates in year 112:  207.3235294117647
std of master's graduates in year 103:  209.82343821131232
std of master's graduates in year 112:  171.19078858471528

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  1.6089634579072103
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 103 and year 112 at alpha = 0.05.


3. tech, public

In [93]:
t_test(tech_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools: 12
mean number of master's graduates in year 103:  526.75
mean number of master's graduates in year 112:  511.6666666666667
std of master's graduates in year 103:  473.25509650043534
std of master's graduates in year 112:  516.6362063357249

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.38813723211780116) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the master's graduates variances for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is 0.07457595066518273, p-value is 0.5293869647180232
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evid

4. tech, private

In [94]:
z_test(tech_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average total number of master's graduates per school for the 103th and 112th academic years, µ_103 & µ_112
number of schools: 56
mean number of master's graduates in year 103:  68.33333333333333
mean number of master's graduates in year 112:  51.395833333333336
std of master's graduates in year 103:  78.06961795876133
std of master's graduates in year 112:  51.973290842440484

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform 1-sided, 2-sample z-test
the test statistic is  1.3514454914474736
Since the observed value of the test statistic does not fall in the rejection region (z >= -1.65), H0 is not rejected.
There is insufficient evidence to indicate that there is a difference in the mean number of students in year 103 and year 112 at alpha = 0.05.


# Recent Changes in the Proportion of Female Students in Computer Science

## function

### if n < 30, variance unknown -> perform 1-sided, 2-sample t-test

1. use F-test to check if the sample variance are the same

In [66]:
def F_test(std1, std2, n):
  print("H0: the variances are the same, Ha: the variances are different")
  F = std1**2 / std2**2
  p_value = stats.f.cdf(F, n-1, n-1)

  return p_value

2. perform perform 1-sided, 2-sample

In [67]:
def t_test (data, year1, year2):
  df1 = data[data['Year'] == year1]
  df2 = data[data['Year'] == year2]
  df_n = len(data['School'].unique())

  print("1. Analyze Statistical Measures")
  print(f"Calculate the average female student ratio (µ{year1} & µ{year2}) for each school in the academic years {year1} and {year2}")

  df1_mean = df1['Total'].mean()
  df2_mean = df2['Total'].mean()

  df1_std = df1['Total'].std()
  df2_std = df2['Total'].std()

  print("number of schools:", df_n)
  print(f'mean number of female student ratio in year {year1}: ', df1_mean)
  print(f'mean number of female student ratio in year {year2}: ', df2_mean)
  print(f'std of female student ratio in year {year1}: ', df1_std)
  print(f'std of female student ratio in year {year2}: ', df2_std)


  print("\n2. Hypothesis")
  print(f"H0: µ{year1} - µ{year2} ≧ 0")
  print(f"Ha: µ{year1} - µ{year2} < 0")


  print("\n3. Perform F test to check if the sample variance are the same")
  f_pval = F_test(df1_std, df2_std, df_n)
  if f_pval < 0.025:
    print(f"Since the p-value({f_pval}) is less than 0.025, H0 is rejected.")
    print(f"There is sufficient evidence to indicate the variances of female student ratio for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1['Total'], df2['Total'], equal_var=False, alternative="less")
  else:
    print(f"Since the p-value({f_pval}) is greater than 0.025, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate the variances of female student ratio for year {year1} and year {year2} differ at alpha = 0.05")
    t, p_value = stats.ttest_ind(df1['Total'], df2['Total'], equal_var=True, alternative="less")


  print("\n4. Perform 1-sided, 2-sample t-test")
  print(f"the test statistic is {t}, p-value is {p_value}")

  alpha = 0.05
  if p_value < alpha:
    print(f"Since the p-value is less than 0.05, H0 is rejected.")
    print(f"There is sufficient evidence to indicate that there is a difference in the mean number of female student ratio in year {year1} and year {year2} at alpha = 0.05.")
  else:
    print(f"Since the p-value is greater than 0.05, H0 is not rejected.")
    print(f"There is insufficient evidence to indicate that there is a difference in the mean number of female student ratio in year {year1} and year {year2} at alpha = 0.05.")

## statistical test (6 years)

load data

In [68]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/cs_students.csv"

# df = pd.read_csv(url)
df = pd.read_csv('cs_students(6).csv')
df.head()

Unnamed: 0,Year,School,Total,Male,Female,Type,Ownership
0,107,國立清華大學,584,445,139,General,Public
1,107,國立臺灣大學,554,485,69,General,Public
2,107,國立臺灣師範大學,201,161,40,General,Public
3,107,國立成功大學,478,364,114,General,Public
4,107,國立交通大學,781,629,152,General,Public


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [69]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

print("number of schools in each category")
print("general, public：", len(general_public['School'].unique()))
print("general, private：", len(general_private['School'].unique()))
print("tech, public：", len(tech_public['School'].unique()))
print("tech, private：", len(tech_private['School'].unique()))

number of schools in each category
general, public： 23
general, private： 18
tech, public： 8
tech, private： 20


### statistical testing of each category

1. general, public

In [70]:
t_test(general_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools: 23
mean number of female student ratio in year 107:  313.72727272727275
mean number of female student ratio in year 112:  344.5652173913044
std of female student ratio in year 107:  172.73105486859424
std of female student ratio in year 112:  181.08180624612646

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.4133610278641862) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.5840523289455661, p-value is 0.2811183693684919
Since the p-value is greater than 0.05, H0 is not rejected.
There is insuffi

2. general, private

In [71]:
t_test(general_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools: 18
mean number of female student ratio in year 107:  435.3888888888889
mean number of female student ratio in year 112:  476.0
std of female student ratio in year 107:  188.92268536560567
std of female student ratio in year 112:  222.99986810863945

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.250708774250005) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.5895213050029624, p-value is 0.27970449333992153
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evidenc

3. tech, public

In [72]:
t_test(tech_public, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools: 8
mean number of female student ratio in year 107:  319.0
mean number of female student ratio in year 112:  367.0
std of female student ratio in year 107:  133.21411336641475
std of female student ratio in year 112:  127.93636811209815

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.5410978978115627) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.7112397782512805, p-value is 0.24475079549365297
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evidence to indicat

4. tech, private

In [73]:
t_test(tech_private, 107, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ107 & µ112) for each school in the academic years 107 and 112
number of schools: 20
mean number of female student ratio in year 107:  259.15
mean number of female student ratio in year 112:  223.3125
std of female student ratio in year 107:  156.76743586398428
std of female student ratio in year 112:  161.39587716749975

2. Hypothesis
H0: µ107 - µ112 ≧ 0
Ha: µ107 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.45019678231304794) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 107 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is 0.6727284335979001, p-value is 0.7471665575728981
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evidence to ind

## statistical test (10 years)

load data

In [74]:
# GitHub filepath
# url = "https://raw.githubusercontent.com/YiHsiu7893/Statistics_Final/refs/heads/main/data/cs_students.csv"

# df = pd.read_csv(url)
df = pd.read_csv('cs_students(10).csv')
df.head()

Unnamed: 0,Year,School,Total,Male,Female,Type,Ownership
0,103,中原大學,442,372,70,General,Private
1,103,中國文化大學,459,396,63,General,Private
2,103,中國科技大學,178,170,8,Tech,Private
3,103,中華大學,393,341,52,General,Private
4,103,中華科技大學,348,316,32,Tech,Private


split into four categories

(general, public), (general, private), (tech, public), (tech, private)

In [75]:
general_public = df[(df['Type'] == 'General') & (df['Ownership'] == 'Public')]
general_private = df[(df['Type'] == 'General') & (df['Ownership'] == 'Private')]
tech_public = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Public')]
tech_private = df[(df['Type'] == 'Tech') & (df['Ownership'] == 'Private')]

print("number of schools in each category")
print("general, public：", len(general_public['School'].unique()))
print("general, private：", len(general_private['School'].unique()))
print("tech, public：", len(tech_public['School'].unique()))
print("tech, private：", len(tech_private['School'].unique()))

number of schools in each category
general, public： 23
general, private： 18
tech, public： 8
tech, private： 23


### statistical testing of each category

1. general, public

In [76]:
t_test(general_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools: 23
mean number of female student ratio in year 103:  304.7142857142857
mean number of female student ratio in year 112:  344.5652173913044
std of female student ratio in year 103:  167.43002802876873
std of female student ratio in year 112:  181.08180624612646

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.35818511258764923) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.7557147182462142, p-value is 0.22701897458344067
Since the p-value is greater than 0.05, H0 is not rejected.
There is insuff

2. general, private

In [77]:
t_test(general_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools: 18
mean number of female student ratio in year 103:  436.85714285714283
mean number of female student ratio in year 112:  476.0
std of female student ratio in year 103:  147.47978069649315
std of female student ratio in year 112:  222.99986810863945

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.04873542541519645) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -0.5664425020461749, p-value is 0.2876532766231015
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evide

3. tech, public

In [78]:
t_test(tech_public, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools: 8
mean number of female student ratio in year 103:  272.625
mean number of female student ratio in year 112:  367.0
std of female student ratio in year 103:  102.98534916606897
std of female student ratio in year 112:  127.93636811209815

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.29052708434301333) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is -1.6252932263930289, p-value is 0.06319594146185231
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evidence to indi

4. tech, private

In [79]:
t_test(tech_private, 103, 112)

1. Analyze Statistical Measures
Calculate the average female student ratio (µ103 & µ112) for each school in the academic years 103 and 112
number of schools: 23
mean number of female student ratio in year 103:  341.72727272727275
mean number of female student ratio in year 112:  223.3125
std of female student ratio in year 103:  187.55803719387052
std of female student ratio in year 112:  161.39587716749975

2. Hypothesis
H0: µ103 - µ112 ≧ 0
Ha: µ103 - µ112 < 0

3. Perform F test to check if the sample variance are the same
H0: the variances are the same, Ha: the variances are different
Since the p-value(0.7565928747560772) is greater than 0.025, H0 is not rejected.
There is insufficient evidence to indicate the variances of female student ratio for year 103 and year 112 differ at alpha = 0.05

4. Perform 1-sided, 2-sample t-test
the test statistic is 2.0346969885583186, p-value is 0.975351351020854
Since the p-value is greater than 0.05, H0 is not rejected.
There is insufficient evide