In [25]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.feature_selection import SelectFromModel
from sklearn.impute import SimpleImputer

file_path = "../feature_engineered_data.csv"
TARGET_COL = "thai_price"

df = pd.read_csv(file_path)

drop_cols = [TARGET_COL]
for c in ["year_month", "date", "Year", "Month","world_price_thb_kg"]:
    if c in df.columns:
        drop_cols.append(c)

df = df.dropna(subset=[TARGET_COL])

y = df[TARGET_COL]

X_raw = df.drop(columns=drop_cols).select_dtypes(include=["number"])

print("Before imputation:")
print("X_raw shape:", X_raw.shape)
print("NaN per column (top 10):")
print(X_raw.isna().sum().sort_values(ascending=False).head(10))



Before imputation:
X_raw shape: (222, 104)
NaN per column (top 10):
production_growth          1
thai_production            0
world_price_thb_kg_lag6    0
thai_production_lag1       0
china_pmi_lag12            0
china_pmi_lag6             0
china_pmi_lag3             0
china_pmi_lag1             0
oil_price_thb_lag12        0
oil_price_thb_lag6         0
dtype: int64


In [26]:
imputer = SimpleImputer(strategy="median")
X_imputed_array = imputer.fit_transform(X_raw)

X = pd.DataFrame(X_imputed_array, columns=X_raw.columns, index=X_raw.index)

print("\nAfter imputation:")
print("Any NaN left?", X.isna().any().any())


After imputation:
Any NaN left? False


In [27]:
ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X, y)

In [28]:
selector = SelectFromModel(
    ridge,
    threshold="median",
    prefit=True
)

selected_mask = selector.get_support()
selected_features = X.columns[selected_mask]

In [29]:
print("\nSelected Ridge Regression features:\n")
for f in selected_features:
    print(f"'{f}',")

print(f"\nTotal selected: {len(selected_features)} / {X.shape[1]}")


Selected Ridge Regression features:

'exchange_rate_usd',
'month',
'year',
'is_season',
'thai_price_ma3',
'thai_price_std3',
'thai_price_ma6',
'thai_price_std6',
'thai_price_ma12',
'thai_price_std12',
'world_price_thb_kg_ma3',
'world_price_thb_kg_std3',
'world_price_thb_kg_ma6',
'world_price_thb_kg_std6',
'world_price_thb_kg_ma12',
'world_price_thb_kg_std12',
'china_pmi_ma3',
'china_pmi_std3',
'china_pmi_ma6',
'china_pmi_std6',
'china_pmi_ma12',
'china_pmi_std12',
'thai_price_pct1',
'thai_price_pct3',
'thai_price_pct6',
'thai_price_pct12',
'world_price_thb_kg_pct1',
'world_price_thb_kg_pct3',
'world_price_thb_kg_pct6',
'world_price_thb_kg_pct12',
'exchange_rate_usd_pct1',
'exchange_rate_usd_pct3',
'exchange_rate_usd_pct6',
'exchange_rate_usd_pct12',
'thai_production_pct1',
'production_growth',
'thai_price_lag1',
'thai_price_lag3',
'thai_price_lag6',
'thai_price_lag12',
'world_price_thb_kg_lag1',
'world_price_thb_kg_lag3',
'world_price_thb_kg_lag6',
'world_price_thb_kg_lag12',
'china_p