In [None]:
import pandas as pd

#Reading the data set

sur=pd.read_csv('responses.csv')

#Extracting only the relevant columns

sur= sur[['Finances','Gender','Village - town','Healthy eating']]
print(sur)

In [None]:
print(sur.info())

#Finances column contains 3 missing values
#Gender column contains 6 missing values
#Village - town column contains 4 missing values
#Healthy eating column contains 3 missing values

In [None]:
# Dropping rows with one missing value

sur.dropna(inplace=True)
sur.shape

In [None]:
# Outliers to the left of the dsitribution:
out1=(sur < (q1 - 1.5 * IQR))
print(out1)
# We see a few outliers in Healthy eating column in the left of the distribution


# Outliers to the right of the dsitribution:
out2=(sur > (q3 + 1.5 * IQR))
print(out2)
# We don't see any outliers in the right side of the distirbution

# Since these outliers are present in a categorical ordinal data, we are not removing it.

# Assesing the hypotheses using Chi Square test for the given problems

In [None]:
# The H0 (Null Hypothesis): Saving money is not gender dependent
# The H1 (Alternative Hypothesis): Saving money is gender dependent

# Creating crosstab table
ct = pd.crosstab(sur.Gender, sur.Finances)

print('Observed set of values:\n\n ',ct)


#Preparing contingency table and calculating Chi square statistic

from scipy.stats import chi2_contingency
from scipy.stats import chi2
chi2, p, dof, expected = chi2_contingency(ct)
print('\nExpected set of values:\n\n ',expected)
print ('\nChi- square statistic:   ',chi2)
print ('P Value:\t\t ',p)
print('\nCritical Value is : ',9.488)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: Saving money is not gender dependent')
else:
    print('\nRejecting Null Hypothesis: Saving money is gender dependent')

In [None]:
# The H0 (Null Hypothesis): No difference in saving money betweeen city and village
# The H1 (Alternative Hypothesis): There is a difference in saving money betweeen city and village

# Creating crosstab table
ct1 = pd.crosstab (sur['Village - town'],sur['Finances'])
print('Observed set of values:\n\n ',ct1)


#Preparing contingency table and calculating Chi square statistic

from scipy.stats import chi2_contingency
from scipy.stats import chi2
chi2, p, dof, expected = chi2_contingency(ct1)
print('\nExpected set of values:\n\n ',expected)
print ('\nChi- square statistic:   ',chi2)
print ('P Value:\t\t ',p)
print('\nCritical Value is : ',9.488)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: No difference in saving money betweeen city and village')
else:
    print('\nRejecting Null Hypothesis: There is a difference in saving money betweeen city and village')

In [None]:
# The H0 (Null Hypothesis): Women and men have the same lifestyle
# The H1 (Alternative Hypothesis): Women have a healthier lfiestyle than men

# Creating crosstab table
ct2 = pd.crosstab (sur.Gender,sur['Healthy eating'])
print('Observed set of values:\n\n ',ct2)


#Preparing contingency table and calculating Chi square statistic

from scipy.stats import chi2_contingency
from scipy.stats import chi2
chi2, p, dof, expected = chi2_contingency(ct2)
print('\nExpected set of values:\n\n ',expected)
print ('\nChi- square statistic:   ',chi2)
print ('P Value:\t\t ',p)
print('\nCritical Value is : ',9.488)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: Women and men have the same lifestyle')
else:
    print('\nRejecting Null Hypothesis: Women have a healthier lfiestyle than men')

In [None]:
# The H0 (Null Hypothesis): Village people are not healthier than city people
# The H1 (Alternative Hypothesis): Village people are healthier than city people

# Creating crosstab table
ct3 = pd.crosstab (sur['Village - town'],sur['Healthy eating'])
print('Observed set of values:\n\n ',ct3)


#Preparing contingency table and calculating Chi square statistic

from scipy.stats import chi2_contingency
from scipy.stats import chi2
chi2, p, dof, expected = chi2_contingency(ct3)
print('\nExpected set of values:\n\n ',expected)
print ('\nChi- square statistic:   ',chi2)
print ('P Value:\t\t ',p)
print('\nCritical Value is : ',9.488)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: Village people are not healthier than city people')
else:
    print('\nRejecting Null Hypothesis: Village people are healthier than city people')

# Assesing the hypotheses using Mann-Whitney test for the above problems

In [None]:
# The H0 (Null Hypothesis): Saving money is not gender dependent
# The H1 (Alternative Hypothesis): Saving money is gender dependent

import numpy as np
import scipy.stats as stats

# Create two groups of data

male_fin=sur.loc[sur['Gender'] == 'male', 'Finances']
female_fin=sur.loc[sur['Gender'] == 'female', 'Finances']

# Calculate u and probability of a difference

u_statistic, p = stats.mannwhitneyu(male_fin,female_fin)

# Print results
print ('P value:\n',p)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: Saving money is not gender dependent')
else:
    print('\nRejecting Null Hypothesis: Saving money is gender dependent')

In [None]:
# The H0 (Null Hypothesis): No difference in saving money betweeen city and village
# The H1 (Alternative Hypothesis): There is a difference in saving money betweeen city and village

import numpy as np
import scipy.stats as stats

# Create two groups of data

city_fin=sur.loc[sur['Village - town'] == 'city', 'Finances']
village_fin=sur.loc[sur['Village - town'] == 'village', 'Finances']

# Calculate u and probability of a difference

u_statistic, p = stats.mannwhitneyu(city_fin,village_fin)

# Print results
print ('P value:\n',p)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: No difference in saving money betweeen city and village')
else:
    print('\nRejecting Null Hypothesis: There is a difference in saving money betweeen city and village')

In [None]:
# The H0 (Null Hypothesis): Women and men have the same lifestyle
# The H1 (Alternative Hypothesis): Women have a healthier lfiestyle than men

import numpy as np
import scipy.stats as stats

# Create two groups of data

male_life=sur.loc[sur['Gender'] == 'male', 'Healthy eating']
female_life=sur.loc[sur['Gender'] == 'female', 'Healthy eating']

# Calculate u and probability of a difference

u_statistic, p = stats.mannwhitneyu(male_life,female_life)

# Print results
print ('P value:\n',p)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: Women and men have the same lifestyle')
else:
    print('\nRejecting Null Hypothesis: Women have a healthier lfiestyle than men')

In [None]:
# The H0 (Null Hypothesis): Village people are not healthier than city people
# The H1 (Alternative Hypothesis): Village people are healthier than city people

import numpy as np
import scipy.stats as stats

# Create two groups of data

city_life=sur.loc[sur['Village - town'] == 'city', 'Healthy eating']
village_life=sur.loc[sur['Village - town'] == 'village', 'Healthy eating']

# Calculate u and probability of a difference

u_statistic, p = stats.mannwhitneyu(city_life,village_life)

# Print results
print ('P value:\n',p)

if p > 0.05:
    print('\nFailing to Reject Null Hypothesis: Village people are not healthier than city people')
else:
    print('\nRejecting Null Hypothesis: Village people are healthier than city people')