In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
import statsmodels.api as sm
import chardet

In [None]:
# Load File Path
data_path = r"C:\Users\tosin\Desktop\Project\Data_folder\dataset.csv"

In [None]:
# Detect the encoding type for the file
with open(data_path, 'rb') as f:
    result = chardet.detect(f.read())
print(f"Detected encoding: {result['encoding']}")

In [None]:
# Load dataset without specifying an encoding (uses default utf-8)
df = pd.read_csv(data_path, encoding=None)

print("Dataset loaded successfully!")

In [None]:
#View column names 
print(df.columns)

In [None]:
#Check datatypes for concerned columns
print(df[['OFC', 'Parity']].dtypes)

In [None]:
# drop NULL values
df = df.dropna(subset=['OFC', 'Parity'])  # Drop rows with NaNs in these columns

In [None]:
from scipy.stats import pearsonr

# Compute correlation between 'Mother's Age' and 'OFC'
corr_mothers_age_ofc, _ = pearsonr(df[" Mother's Age"], df["OFC"])

# Compute correlation between 'Parity' and 'OFC'
corr_parity_ofc, _ = pearsonr(df["Parity"], df["OFC"])

print(f"Correlation (Mother's Age vs. OFC): {corr_mothers_age_ofc}")
print(f"Correlation (Parity vs. OFC): {corr_parity_ofc}")


In [None]:
import statsmodels.api as sm

# Prepare the independent variables (with a constant for intercept)
X_ofc = df[["Gest. Age (weeks)", " Mother's Age", "Parity", "male gender ", "Socioeconomic Status"]]
X_ofc = sm.add_constant(X_ofc)  # Add constant for intercept

# Dependent variable
y_ofc = df["OFC"]

# Fit the regression model
model_ofc = sm.OLS(y_ofc, X_ofc).fit()

# Summary of the regression model
print(model_ofc.summary())


In [None]:
# Prepare the independent variables (with a constant for intercept)
X_bw = df[["Gest. Age (weeks)", "male gender ", " Mother's Age", "Parity", "Socioeconomic Status"]]
X_bw = sm.add_constant(X_bw)  # Add constant for intercept

# Dependent variable
y_bw = df["Birth Weight (g)"]

# Fit the regression model
model_bw = sm.OLS(y_bw, X_bw).fit()

# Summary of the regression model
print(model_bw.summary())


In [None]:
# Prepare the independent variables (with a constant for intercept)
X_length = df[["Gest. Age (weeks)", "male gender ", " Mother's Age", "Parity", "Socioeconomic Status"]]
X_length = sm.add_constant(X_length)  # Add constant for intercept

# Dependent variable
y_length = df["Length (cm)"]

# Fit the regression model
model_length = sm.OLS(y_length, X_length).fit()

# Summary of the regression model
print(model_length.summary())


In [None]:
# Path to save the results
results_path = r"C:\Users\Smartie\Desktop\Project_Oshinowo\Results"
os.makedirs(results_path, exist_ok=True)

# Results dictionary to store all outputs
results = {}

# Pearson Correlation
corr_mothers_age_ofc, _ = pearsonr(df[" Mother's Age"], df["OFC"])
corr_parity_ofc, _ = pearsonr(df["Parity"], df["OFC"])
results["Correlation (Mother's Age vs. OFC)"] = corr_mothers_age_ofc
results["Correlation (Parity vs. OFC)"] = corr_parity_ofc

# Multilinear Regression: OFC
X_ofc = df[["Gest. Age (weeks)", " Mother's Age", "Parity", "male gender ", "Socioeconomic Status"]]
X_ofc = sm.add_constant(X_ofc)
y_ofc = df["OFC"]
model_ofc = sm.OLS(y_ofc, X_ofc).fit()
results["Regression Summary (OFC)"] = model_ofc.summary().as_text()

# Multilinear Regression: Birth Weight (g)
X_bw = df[["Gest. Age (weeks)", "male gender ", " Mother's Age", "Parity", "Socioeconomic Status"]]
X_bw = sm.add_constant(X_bw)
y_bw = df["Birth Weight (g)"]
model_bw = sm.OLS(y_bw, X_bw).fit()
results["Regression Summary (Birth Weight (g))"] = model_bw.summary().as_text()

# Multilinear Regression: Length (cm)
X_length = df[["Gest. Age (weeks)", "male gender ", " Mother's Age", "Parity", "Socioeconomic Status"]]
X_length = sm.add_constant(X_length)
y_length = df["Length (cm)"]
model_length = sm.OLS(y_length, X_length).fit()
results["Regression Summary (Length (cm))"] = model_length.summary().as_text()

# Write the results to a CSV file
results_df = pd.DataFrame.from_dict(results, orient='index', columns=["Results"])
results_csv_path = os.path.join(results_path, "results.csv")
results_df.to_csv(results_csv_path, index=True)

print(f"Results saved successfully to {results_csv_path}")