In [17]:
import pandas as pd
from scipy.stats import f_oneway
from scipy.stats import chi2_contingency


# Load the data from the CSV file
df = pd.read_csv('FowlerModule05-2.csv')

print(df.head())

  Marital Status   Length of Work (months) Support Level  Education (years)  \
0          Single                        4           NaN                  9   
1          Single                        4           NaN                  9   
2          Single                       14        Direct                 11   
3          Single                       10        Direct                  9   
4          Single                       12           NaN                 12   

   Age  Self Esteem  
0   52            2  
1   52            2  
2   40            3  
3   46            3  
4   40            3  


In [18]:
df['Support Level'].fillna('None', inplace=True)
print(df.head())

  Marital Status   Length of Work (months) Support Level  Education (years)  \
0          Single                        4          None                  9   
1          Single                        4          None                  9   
2          Single                       14        Direct                 11   
3          Single                       10        Direct                  9   
4          Single                       12          None                 12   

   Age  Self Esteem  
0   52            2  
1   52            2  
2   40            3  
3   46            3  
4   40            3  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Support Level'].fillna('None', inplace=True)


In [22]:
# Strip any leading or trailing spaces from the column names
df.columns = df.columns.str.strip()

# Perform ANOVA
single_scores = df['Self Esteem'][df['Marital Status'] == 'Single'].values
married_scores = df['Self Esteem'][df['Marital Status'] == 'Married'].values
separated_scores = df['Self Esteem'][df['Marital Status'] == 'Separated'].values
divorced_scores = df['Self Esteem'][df['Marital Status'] == 'Divorced'].values

f_statistic, p_value = f_oneway(single_scores, married_scores, separated_scores, divorced_scores)

# Calculate the sum of squares between groups
ss_between = sum(len(group) * (group.mean() - df['Self Esteem'].mean())**2 for group in [single_scores, married_scores, separated_scores, divorced_scores])

# Calculate the sum of squares within groups
ss_within = sum(((group - group.mean())**2).sum() for group in [single_scores, married_scores, separated_scores, divorced_scores])

# Print the results
print(single_scores)
print(married_scores)
print(separated_scores)
print(divorced_scores)
print("Sum of Squares Between Groups:", ss_between)
print("Sum of Squares Within Groups:", ss_within)
print("F-Statistic:", f_statistic)
print("P-Value:", p_value)

# Compare p-value to significance level
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis. Self-esteem is not the same for all marital status levels.")
else:
    print("Fail to reject the null hypothesis. Self-esteem is the same for all marital status levels.")

[2 2 3 3 3 3 3 3 4 4 4 4 4 4 4 5 5 5 5 5 5 6 6]
[3 3 3 4 5 5 5 6]
[3 3 3 3 3 3 3 3 3 4 4 4 4 5 6]
[3 3 3 3 3 3 3 3 3 3 4 4 4 5]
Sum of Squares Between Groups: 5.869047619047618
Sum of Squares Within Groups: 54.31428571428572
F-Statistic: 2.0170670407387927
P-Value: 0.12193938980853504
Fail to reject the null hypothesis. Self-esteem is the same for all marital status levels.


In [20]:
# Strip any leading or trailing spaces from the column names
df.columns = df.columns.str.strip()

# Create a contingency table of marital status and support level
contingency_table = pd.crosstab(df['Marital Status'], df['Support Level'])

# Perform the chi-square test
chi2, p_value, dof, expected = chi2_contingency(contingency_table)

# Print the results
print(contingency_table)
print("Chi-square statistic:", chi2)
print("P-value:", p_value)
print("Degrees of freedom:", dof)
print("Expected frequencies:", expected)

# Check if the result is statistically significant
alpha = 0.05
if p_value < alpha:
    print("Marital status and support level are dependent (reject H0)")
else:
    print("Marital status and support level are independent (fail to reject H0)")

Support Level   Direct  None
Marital Status              
Divorced             2    12
Married              5     3
Separated            7     8
Single              16     7
Chi-square statistic: 11.231262939958594
P-value: 0.010538875055530628
Degrees of freedom: 3
Expected frequencies: [[ 7.   7. ]
 [ 4.   4. ]
 [ 7.5  7.5]
 [11.5 11.5]]
Marital status and support level are dependent (reject H0)
