In [1]:
# Importing all Required Packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind as ttest
import os
%matplotlib inline

In [2]:
# Reading the Data set

data = pd.read_csv('StatisticalTest_Data.csv')
data.head()

Unnamed: 0,Name,Maths,Physics,Chemistry,Biology,ComputerScience,Bengali,English,History,Geography,Statistics,Gender
0,Aniket,37,34,24,15,48,45,15,45,45,45,M
1,Aarav,32,34,48,25,14,45,14,12,12,12,M
2,Aashi,32,34,45,40,15,23,45,46,13,46,F
3,Aayush,35,31,25,41,25,12,12,48,46,12,M
4,Aditi,26,33,45,39,48,47,14,23,48,48,F


In [3]:
# Creating two different data sets from the dataframe data
# datamale contains the scores for male candidates
# datafemale contains the scores for female candidates

datamale = data[data['Gender'] == 'M']
datafemale = data[data['Gender'] == 'F']

### Checking the datamle data frame

In [4]:
# First Few Rows
datamale.head()

Unnamed: 0,Name,Maths,Physics,Chemistry,Biology,ComputerScience,Bengali,English,History,Geography,Statistics,Gender
0,Aniket,37,34,24,15,48,45,15,45,45,45,M
1,Aarav,32,34,48,25,14,45,14,12,12,12,M
3,Aayush,35,31,25,41,25,12,12,48,46,12,M
5,Advait,30,27,47,5,47,14,25,28,25,6,M
6,Akshay,33,28,33,12,15,21,45,23,21,34,M


In [5]:
# Shape of the datamale dataframe
print('Number of Columns:', datamale.shape[1])
print('Number of Rows:', datamale.shape[0])

Number of Columns: 12
Number of Rows: 30


### Checking the datafemale column

In [6]:
# First Few Rows of datafemale dataframe
datafemale.head()

Unnamed: 0,Name,Maths,Physics,Chemistry,Biology,ComputerScience,Bengali,English,History,Geography,Statistics,Gender
2,Aashi,32,34,45,40,15,23,45,46,13,46,F
4,Aditi,26,33,45,39,48,47,14,23,48,48,F
7,Alisha,36,31,31,45,26,5,12,29,36,36,F
9,Amrita,40,36,5,20,25,31,32,12,45,39,F
10,Ananya,35,38,43,23,45,23,24,18,6,12,F


In [7]:
# Shape of datafemale
print('Number of Columns:', datafemale.shape[1])
print('Number of Rows:', datafemale.shape[0])

Number of Columns: 12
Number of Rows: 30


### Extracting Marks of Different Subjects for Different data frames

In [8]:
# Extracting Marks for Male Candidates

bngmale = datamale['Bengali']
engmale = datamale['English']
hismale = datamale['History']
geomale = datamale['Geography']
stmale = datamale['Statistics']

# Extracting Marks for Female Candidates
bngfemale = datafemale['Bengali']
engfemale = datafemale['English']
hisfemale = datafemale['History']
geofemale = datafemale['Geography']
stfemale = datafemale['Statistics']

## Applying T test on the two groups

* Group-1: Male Candidates
* Group-2: Female Candidates

### T Test and Its Elements
* Null Hypothesis: There is no significant difference between the two group means
* Alternative Hypothesis: There is significant Difference between the two group means

#### We reject the Null Hypothesis if the P-value is less or equal to the significance level, which is 0.05¶

#### Applying T Test on The Two Groups Bengali Score

In [9]:
# Conducting T Test
t_statistic, p_value = ttest(bngmale , bngfemale)
print("t-statistic:", t_statistic)
print("p-value:", p_value)

# Status of the Test
if p_value <= 0.05:
    print('There is Significant Difference between Two Group Means')
else:
    print('There is no Significant Difference Between the Two group Means')

t-statistic: 0.7959729188297807
p-value: 0.42929362403688887
There is no Significant Difference Between the Two group Means


#### Applying T Test on The Englisg Score

In [10]:
# Conducting T Test
t_statistic, p_value = ttest(engmale , engfemale)
print("t-statistic:", t_statistic)
print("p-value:", p_value)

# Status of the Test
if p_value <= 0.05:
    print('There is Significant Difference between Two Group Means')
else:
    print('There is no Significant Difference Between the Two group Means')

t-statistic: -0.23527177336368102
p-value: 0.8148265157795027
There is no Significant Difference Between the Two group Means


#### Applying T test on the History Score

In [11]:
# Conducting T Test
t_statistic, p_value = ttest(hismale , hisfemale)
print("t-statistic:", t_statistic)
print("p-value:", p_value)

# Status of the Test
if p_value <= 0.05:
    print('There is Significant Difference between Two Group Means')
else:
    print('There is no Significant Difference Between the Two group Means')

t-statistic: 0.42107547045565946
p-value: 0.6752562862400109
There is no Significant Difference Between the Two group Means


#### Applying T test on Geography Score

In [12]:
# Conducting T Test
t_statistic, p_value = ttest(geomale , geofemale)
print("t-statistic:", t_statistic)
print("p-value:", p_value)

# Status of the Test
if p_value <= 0.05:
    print('There is Significant Difference between Two Group Means')
else:
    print('There is no Significant Difference Between the Two group Means')

t-statistic: 0.49360655179379115
p-value: 0.623448104884603
There is no Significant Difference Between the Two group Means


#### Applying T test on Statistics Score

In [13]:
# Conducting T Test
t_statistic, p_value = ttest(stmale , stfemale)
print("t-statistic:", t_statistic)
print("p-value:", p_value)

# Status of the Test
if p_value <= 0.05:
    print('There is Significant Difference between Two Group Means')
else:
    print('There is no Significant Difference Between the Two group Means')

t-statistic: -0.34899965795793925
p-value: 0.7283534105746869
There is no Significant Difference Between the Two group Means


### Tabulating Result for Each Subject

In [14]:
# For Bengali Score

reportbng = {'Gr1':['Male'], 'Gr2':['Female'], 'Gr1_Mean':[np.mean(bngmale)], 'Gr2_Mean':[np.mean(bngfemale)],
             'Critical_Value':[0.05], 'P_value':[0.429], 'Status':['No Difference in Means']}
reportbng = pd.DataFrame(reportbng)
reportbng.index = ['Bengali']
reportbng

Unnamed: 0,Gr1,Gr2,Gr1_Mean,Gr2_Mean,Critical_Value,P_value,Status
Bengali,Male,Female,30.4,27.866667,0.05,0.429,No Difference in Means


In [15]:
# For English Score

reporteng = {'Gr1':['Male'], 'Gr2':['Female'], 'Gr1_Mean':[np.mean(engmale)], 'Gr2_Mean':[np.mean(engfemale)],
             'Critical_Value':[0.05], 'P_value':[0.814], 'Status':['No Difference in Means']}
reporteng = pd.DataFrame(reporteng)
reporteng.index = ['English']
reporteng

Unnamed: 0,Gr1,Gr2,Gr1_Mean,Gr2_Mean,Critical_Value,P_value,Status
English,Male,Female,29.833333,30.666667,0.05,0.814,No Difference in Means


In [16]:
# For History Score

reporthis = {'Gr1':['Male'], 'Gr2':['Female'], 'Gr1_Mean':[np.mean(hismale)], 'Gr2_Mean':[np.mean(hisfemale)],
             'Critical_Value':[0.05], 'P_value':[0.675], 'Status':['No Difference in Means']}
reporthis = pd.DataFrame(reporthis)
reporthis.index = ['History']
reporthis

Unnamed: 0,Gr1,Gr2,Gr1_Mean,Gr2_Mean,Critical_Value,P_value,Status
History,Male,Female,29.633333,28.266667,0.05,0.675,No Difference in Means


In [17]:
# For Geography Score

reportgeo = {'Gr1':['Male'], 'Gr2':['Female'], 'Gr1_Mean':[np.mean(geomale)], 'Gr2_Mean':[np.mean(geofemale)],
             'Critical_Value':[0.05], 'P_value':[0.623], 'Status':['No Difference in Means']}
reportgeo = pd.DataFrame(reportgeo)
reportgeo.index = ['Geography']
reportgeo

Unnamed: 0,Gr1,Gr2,Gr1_Mean,Gr2_Mean,Critical_Value,P_value,Status
Geography,Male,Female,29.1,27.533333,0.05,0.623,No Difference in Means


In [18]:
# For Statistics Score

reportstat = {'Gr1':['Male'], 'Gr2':['Female'], 'Gr1_Mean':[np.mean(stmale)], 'Gr2_Mean':[np.mean(stfemale)],
             'Critical_Value':[0.05], 'P_value':[0.728], 'Status':['No Difference in Means']}
reportstat = pd.DataFrame(reportstat)
reportstat.index = ['Statistics']
reportstat

Unnamed: 0,Gr1,Gr2,Gr1_Mean,Gr2_Mean,Critical_Value,P_value,Status
Statistics,Male,Female,30.466667,31.533333,0.05,0.728,No Difference in Means


### Final Result of this Notebook

In [19]:
report2 = pd.concat([reportbng, reporteng, reporthis,reportgeo, reportstat], axis = 0)
report2

Unnamed: 0,Gr1,Gr2,Gr1_Mean,Gr2_Mean,Critical_Value,P_value,Status
Bengali,Male,Female,30.4,27.866667,0.05,0.429,No Difference in Means
English,Male,Female,29.833333,30.666667,0.05,0.814,No Difference in Means
History,Male,Female,29.633333,28.266667,0.05,0.675,No Difference in Means
Geography,Male,Female,29.1,27.533333,0.05,0.623,No Difference in Means
Statistics,Male,Female,30.466667,31.533333,0.05,0.728,No Difference in Means


### Exporting Report2

In [20]:
file_path = 'Ttest_report2.csv'
report2.to_csv(file_path, index = True)