In [76]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels.panel import PanelOLS
from io import StringIO

In [77]:
def parse_game_text(raw_text):
    """
    Convert copied table text into DataFrame.
    Assumes tab or multiple-space separated columns.
    Adjust delimiter if needed.
    """
    df = pd.read_csv(StringIO(raw_text), sep="\t")

    # ===== clean numeric columns =====
    df["Price"] = (
        df["Price"]
        .str.replace("$", "", regex=False)
        .astype(float)
    )

    df["Sales volume"] = (
        df["Sales volume"]
        .str.replace(",", "", regex=False)
        .astype(int)
    )

    df["Market share"] = (
        df["Market share"]
        .str.replace("%", "", regex=False)
        .astype(float) / 100
    )

    print("\nData types:")
    print(df.dtypes)
    df.head()
    return df

In [78]:
def prepare_features(df, round_number):
    df = df.copy()


    df["Round"] = round_number


    # Log transforms (adjust column names to your actual game table)
    df["log_price"] = np.log(df["Price"])
    df["log_quality"] = np.log(df["Product quality"])
    df["log_marketing"] = np.log(df["Product image"] + 1)


    # Dependent variable (example: market share ratio)
    df["log_share"] = np.log(df["Market share"])


    return df

In [92]:
def interpret_results(params, pvalues):
    print("\n===== INTERPRETATION (Plain Language) =====")
    for var in params.index:
        if var == "const":
            continue
        coef = params[var]
        pval = pvalues[var]


        direction = "increase" if coef > 0 else "decrease"
        significance = (
        "Highly significant" if pval < 0.01 else
        "Significant" if pval < 0.05 else
        "Weak evidence" if pval < 0.1 else
        "Not statistically significant"
        )


        elasticity = round(coef, 3)


        print(f"- {var}: If this variable increases by 1%, market share is expected to {direction} by approximately {abs(elasticity)}%.")
        print(f" Statistical strength: {significance} (p = {round(pval,4)})\n")


def run_cross_section(df_round):
    X = df_round[["log_price", "log_quality", "log_marketing"]]
    X = sm.add_constant(X)
    y = df_round["log_share"]


    model = sm.OLS(y, X).fit()


    print("===== CROSS-SECTION OLS =====")
    print("N:", int(model.nobs))
    print("R-squared:", round(model.rsquared, 4))
    print("Adj R-squared:", round(model.rsquared_adj, 4))
    print("Coefficients:")
    table = pd.DataFrame({
    "coef (elasticity)": model.params,
    "t_stat": model.tvalues,
    "p_value": model.pvalues
    }).round(4)
    print(table)


    interpret_results(model.params, model.pvalues)


    return model

In [80]:
def run_pooled_ols(df_all):
    X = df_all[["log_price", "log_quality", "log_marketing"]]
    X = sm.add_constant(X)
    y = df_all["log_share"]


    model = sm.OLS(y, X).fit()
    return model

In [81]:
def run_fixed_effects(df_all):
    df_panel = df_all.set_index(["Firm", "Round"])


    exog = df_panel[["log_price", "log_quality", "log_marketing"]]
    endog = df_panel["log_share"]


    model = PanelOLS(endog, exog, entity_effects=True)
    result = model.fit()


    return result

In [82]:
def reestimate_all(round_dfs):
    """
    round_dfs: list of prepared round DataFrames
    """
    df_all = pd.concat(round_dfs, ignore_index=True)


    print("===== POOLED OLS =====")
    pooled = run_pooled_ols(df_all)
    print(pooled.summary())


    if df_all["Round"].nunique() >= 2:
        print("===== FIXED EFFECTS =====")
        fe = run_fixed_effects(df_all)
        print(fe.summary())


    return df_all

In [83]:

# USAGE EXAMPLE (each round):


# Step 1: Paste raw copied text
raw_data = """
Company	Product quality	Product image	Price	Sales volume	Market share
Test224	0.48	0.52	$7.29	199,015	3%
Test223	0.94	0.66	$8.19	200,000	3%
Test222	0.65	0.46	$7.50	200,000	3%
Test221	0.75	0.54	$7.50	217,509	4%
Test220	0.17	0.23	$7.50	70,011	1%
Test219	0.21	0.30	$6.70	107,863	2%
Test218	0.22	0.19	$6.50	102,637	2%
Test217	0.61	0.41	$6.00	101,253	2%
Test216	1.07	0.55	$6.80	186,524	3%
Test215	0.27	0.38	$6.50	124,590	2%
Test214	0.27	0.30	$7.50	99,918	2%
Test213	0.23	0.18	$7.00	82,925	1%
Test212	0.52	0.47	$7.25	130,000	2%
Test211	0.22	0.50	$5.60	160,000	3%
Test210	0.70	0.49	$8.00	140,000	2%
Test209	0.19	0.40	$7.00	120,310	2%
Test208	0.47	0.65	$7.99	167,780	3%
Test207	0.47	0.25	$8.79	69,577	1%
Test206	0.27	0.49	$6.20	173,679	3%
Test205	0.84	0.75	$7.00	250,000	4%
Test204	0.30	0.40	$8.00	111,473	2%
Test203	0.76	0.60	$7.50	226,345	4%
Test202	0.50	1.34	$6.50	160,589	3%
Test201	0.22	0.51	$7.60	123,029	2%
Test200	0.71	0.82	$7.20	227,261	4%
Test199	1.09	0.60	$7.50	230,254	4%
Test198	0.75	0.60	$7.80	150,070	3%
Test197	0.30	0.41	$9.00	75,448	1%
Test196	0.92	1.14	$7.30	250,000	4%
Test195	0.24	0.35	$9.00	76,766	1%
Test194	0.40	0.41	$7.50	125,856	2%
Test193	0.75	0.47	$8.50	150,000	3%
Test192	0.58	0.46	$7.70	130,839	2%
Test191	0.27	0.22	$7.70	83,605	1%
Test190	0.29	0.30	$7.10	114,617	2%
Test189	0.25	0.62	$7.30	79,276	1%
Test188	0.17	0.26	$7.50	78,167	1%
Test187	0.59	0.60	$7.00	221,599	4%
Test186	0.71	0.46	$6.89	230,000	4%
Test185	0.82	1.09	$7.50	235,050	4%

"""
""""

# Step 2: Parse
round1 = parse_game_text(raw_text)


# Step 3: Prepare
round1 = prepare_features(round1, round_number=1)


# Step 4: Run cross-section
model_cs = run_cross_section(round1)
print(model_cs.summary())


# Step 5: When multiple rounds available
# df_all = pd.concat([round1, round2, round3])


# pooled_model = run_pooled_ols(df_all)
# print(pooled_model.summary())


# fe_model = run_fixed_effects(df_all)
# print(fe_model.summary())
"""

'"\n\n# Step 2: Parse\nround1 = parse_game_text(raw_text)\n\n\n# Step 3: Prepare\nround1 = prepare_features(round1, round_number=1)\n\n\n# Step 4: Run cross-section\nmodel_cs = run_cross_section(round1)\nprint(model_cs.summary())\n\n\n# Step 5: When multiple rounds available\n# df_all = pd.concat([round1, round2, round3])\n\n\n# pooled_model = run_pooled_ols(df_all)\n# print(pooled_model.summary())\n\n\n# fe_model = run_fixed_effects(df_all)\n# print(fe_model.summary())\n'

In [93]:

# Step 2: Parse
round1 = parse_game_text(raw_data)


# Step 3: Prepare
round1 = prepare_features(round1, round_number=1)


# Step 4: Run cross-section
model_cs = run_cross_section(round1)
# model_cs.summary()


# # Step 5: When multiple rounds available
# df_all = pd.concat([round1, round2, round3])


# # pooled_model = run_pooled_ols(df_all)
# print(pooled_model.summary())


# # fe_model = run_fixed_effects(df_all)
# print(fe_model.summary())
# """


Data types:
Company             object
Product quality    float64
Product image      float64
Price              float64
Sales volume         int64
Market share       float64
dtype: object
===== CROSS-SECTION OLS =====
N: 40
R-squared: 0.7134
Adj R-squared: 0.6896
Coefficients:
               coef (elasticity)  t_stat  p_value
const                    -0.1915 -0.1987   0.8436
log_price                -1.7632 -3.9653   0.0003
log_quality               0.5087  5.1398   0.0000
log_marketing             0.8454  2.2703   0.0293

===== INTERPRETATION (Plain Language) =====
- log_price: If this variable increases by 1%, market share is expected to decrease by approximately 1.763%.
 Statistical strength: Highly significant (p = 0.0003)

- log_quality: If this variable increases by 1%, market share is expected to increase by approximately 0.509%.
 Statistical strength: Highly significant (p = 0.0)

- log_marketing: If this variable increases by 1%, market share is expected to increase by approx

In [85]:

# my_company = "Test200"

# # ถ้ายังไม่มี Period ให้สร้างก่อน
# df["Period"] = 1

# # เรียงข้อมูล
# df = df.sort_values(["Period", "Company"])

# # เลือก baseline = บริษัทแรกในแต่ละ Period
# baseline = df[df["Company"] == my_company]

# baseline = baseline.rename(columns={
#     "Market share": "share_base",
#     "Price": "price_base",
#     "Product quality": "quality_base",
#     "Product image": "image_base"
# })

# # merge baseline กลับเข้า df
# df = df.merge(
#     baseline[["Period", "share_base", "price_base", "quality_base", "image_base"]],
#     on="Period"
# )


# # สร้าง log share ratio
# df["log_share_ratio"] = np.log(df["Market share"] / df["share_base"])


# # สร้าง difference variables
# df["d_price"] = df["Price"] - df["price_base"]
# df["d_quality"] = df["Product quality"] - df["quality_base"]
# df["d_image"] = df["Product image"] - df["image_base"]

# model = smf.ols(
#     formula="log_share_ratio ~ d_quality + d_image + d_price",
#     data=df
# ).fit()

# model.summary()


In [86]:
# df.head()

In [87]:
# print(df.shape)
# print(df.groupby(level=0).size())
# print(df.isna().sum())