In [69]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import norm
from tabulate import tabulate
from tkinter import Tk, filedialog, simpledialog, messagebox
import statsmodels.api as sm
import BiQ_Common_Fxns as BiQ
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

In [70]:
import numpy as np
from tkinter import Tk, filedialog
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import statsmodels.api as sm
from tabulate import tabulate

def read_excel_file(file_path):
    """Reads an Excel file and returns a DataFrame."""
    try:
        df = pd.read_excel(file_path)  # Read Excel file into a temporary DataFrame
        df = df.dropna(axis=1, how='all')  # Drop columns that are entirely blank
        return df  # Return the DataFrame
    except Exception as e:
        print(f"Error reading Excel file: {e}")  # Print error message if file reading fails
        return None  # Return None if an error occurs

def get_user_column_selection(df, title):
    """Gets user input for selecting a column using dialog boxes."""
    # Get column names from the DataFrame
    columns = df.columns.tolist()

    while True:
        col_name = input(f"Enter the column name for {title}:\nAvailable columns: {', '.join(columns)}\n")
        if col_name in columns:
            return col_name
        else:
            print(f"Column '{col_name}' not found in the DataFrame.")

def fit_linear_regression(df, x_col, y_col):
    """Fit a linear regression model."""
    X = df[[x_col]]
    y = df[y_col]
    model = LinearRegression()
    model.fit(X, y)
    return model

def fit_polynomial_regression(df, x_col, y_col, degree):
    """Fit a polynomial regression model."""
    X = df[[x_col]]
    y = df[y_col]

    polynomial_features = PolynomialFeatures(degree=degree)
    X_poly = polynomial_features.fit_transform(X)

    model = LinearRegression()
    model.fit(X_poly, y)
    return model, polynomial_features

def calculate_regression_stats(model, X, y):
    """Calculate regression statistics using statsmodels."""
    X = sm.add_constant(X)  # Add constant term for intercept
    model = sm.OLS(y, X).fit()  # Fit ordinary least squares (OLS) model
    return model

def main():
    root = Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx"), ("All files", "*.*")])
    if not file_path:
        print("No file selected.")
        return

    # Read Excel file into DataFrame
    df = read_excel_file(file_path)
    if df is None:
        print("Error reading Excel file.")
        return

    x_col = get_user_column_selection(df, "Independent Variable (X)")
    if x_col is None:
        print("Error: Invalid input for independent variable.")
        return

    y_col = get_user_column_selection(df, "Dependent Variable (Y)")
    if y_col is None:
        print("Error: Invalid input for dependent variable.")
        return

    linear_model = fit_linear_regression(df, x_col, y_col)
    poly2_model, poly2_features = fit_polynomial_regression(df, x_col, y_col, degree=2)
    poly3_model, poly3_features = fit_polynomial_regression(df, x_col, y_col, degree=3)

    models = {
        'Linear Regression': (linear_model, None),  # No features for linear regression
        'Polynomial Regression (Order 2)': (poly2_model, poly2_features),
        'Polynomial Regression (Order 3)': (poly3_model, poly3_features),
    }

    coef_data = []

    for model_name, (model, features) in models.items():
        coef_results = []  # Initialize coef_results for each model
        X = df[[x_col]] if features is None else features.transform(df[[x_col]])
        y = df[y_col]
        model = calculate_regression_stats(model, X, y)
        coef_names = model.params.index.tolist()  # Get coefficient names

        for coef_name, coef_value, std_err, t_value, p_value in zip(
            model.params.index, model.params.values, model.bse.values, model.tvalues.values,
            model.pvalues.values
        ):
            coef_results.append([coef_name, coef_value.round(3), std_err.round(3),
                                 t_value.round(3), p_value.round(3)])

        coef_data.append([model_name] + coef_results)

    # Print Coefficients Table
    for model_coef_data in coef_data:
        print(f"\n{model_coef_data[0]} Coefficients:")
        print(tabulate(model_coef_data[1:], headers=["Coefficient", "Estimate", "Std. Error",
                                                     "t value", "P>|t|"],
                       tablefmt="grid"))

if __name__ == "__main__":
    main()



Linear Regression Coefficients:
+---------------+------------+--------------+-----------+---------+
| Coefficient   |   Estimate |   Std. Error |   t value |   P>|t| |
| const         |      6.003 |        0.036 |   164.492 |       0 |
+---------------+------------+--------------+-----------+---------+
| Ind           |      2     |        0     | 31663.8   |       0 |
+---------------+------------+--------------+-----------+---------+

Polynomial Regression (Order 2) Coefficients:
+---------------+------------+--------------+-----------+---------+
| Coefficient   |   Estimate |   Std. Error |   t value |   P>|t| |
| const         |      5.918 |        0.055 |   108.156 |   0     |
+---------------+------------+--------------+-----------+---------+
| x1            |      2.001 |        0     |  7923.66  |   0     |
+---------------+------------+--------------+-----------+---------+
| x2            |     -0     |        0     |    -2.077 |   0.038 |
+---------------+------------+------