In [2]:
import pandas as pd
from scipy.stats import chi2_contingency
import statsmodels.api as sm

def perform_analysis(file_path):
    try:
        data=pd.read_csv(file_path)

        def convert_a68_2(value):
            if value=='没有':
                return 0
            try:
                return float(value)
            except ValueError:
                return None

        data['A68_2']=data['A68_2'].apply(convert_a68_2)
        data=data.dropna(subset=['A68_2'])

        data=data[pd.to_numeric(data['D36'],errors='coerce').notnull()]

        contingency_table=pd.crosstab(data['D36'],data['A68_2'])
        chi2,p,_,_=chi2_contingency(contingency_table)
        alpha=0.05
        print(f"卡方值:{chi2},p值:{p}")
        if p<alpha:
            print("幸福感与女儿数量之间存在显著相关性")
            X=data['A68_2'].astype(float)
            y=data['D36'].astype(float)

            X=sm.add_constant(X)
            model=sm.OLS(y,X).fit()
            print(model.summary())
        else:
            print("幸福感与女儿数量之间不存在显著相关性，无需进行回归分析")

    except FileNotFoundError:
        print("文件未找到")
    except Exception as e:
        print(f"发生错误:{e}")

file_path='D36-A68_2.csv'
perform_analysis(file_path)

卡方值:167.3616370860219,p值:4.449452392719556e-12
幸福感与女儿数量之间存在显著相关性
                            OLS Regression Results                            
Dep. Variable:                    D36   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.692
Date:                Mon, 23 Dec 2024   Prob (F-statistic):              0.193
Time:                        23:10:26   Log-Likelihood:                -5549.9
No. Observations:                2640   AIC:                         1.110e+04
Df Residuals:                    2638   BIC:                         1.112e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------