# Endogenous variables

## NO - EA Inflation Core

In [28]:
import pandas as pd
import numpy as np
import io, gzip, re, requests


URL = "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/prc_hicp_midx?format=TSV&compressed=true"

r = requests.get(URL, timeout=120)
r.raise_for_status()
with gzip.open(io.BytesIO(r.content), 'rt') as f:
    df_raw = pd.read_csv(f, sep='\t')

# Split the dimension column: 'freq,unit,coicop,geo\\TIME_PERIOD'
dimcol = df_raw.columns[0]
df_raw[['freq','unit','coicop','geo']] = df_raw[dimcol].str.split(',', expand=True)
df_raw = df_raw.drop(columns=[dimcol])

# Identify time columns (YYYY-MM with optional trailing space)
time_cols = [c for c in df_raw.columns if re.match(r'^\d{4}-\d{2}\s*$', c)]
df_long = df_raw.melt(
    id_vars=['freq','unit','coicop','geo'],
    value_vars=time_cols,
    var_name='time',
    value_name='value'
)

# Clean values: remove ':' and keep numeric values only
df_long['value'] = df_long['value'].astype(str).str.strip()
df_long = df_long[(df_long['value'] != ':') & (df_long['value'] != '')]
num = df_long['value'].str.extract(r'^\s*([-]?\d+(?:\.\d+)?)')
df_long = df_long[~num[0].isna()].copy()
df_long['value'] = num[0].astype(float)

# Standardize time format
df_long['time'] = df_long['time'].str.strip()
df_long['time'] = pd.to_datetime(df_long['time'], format="%Y-%m", errors='coerce')
df_long = df_long.dropna(subset=['time'])

# Helper functions
def best_unit(df, prefer=('I15','I05','I96')):
    units = df['unit'].dropna().unique().tolist()
    for u in prefer:
        if u in units:
            return u
    return units[0] if units else None

def get_series(df, geo, coicop, start_year=1999, end_year=2025):
    sub = df.query("geo == @geo and coicop == @coicop").copy()
    if sub.empty:
        raise ValueError(f"No rows found for geo={geo}, coicop={coicop}")
    u = best_unit(sub)
    sub = sub[sub['unit'] == u].copy()
    sub = sub[(sub['time'].dt.year >= start_year) & (sub['time'].dt.year <= end_year)]
    sub = sub.sort_values('time').reset_index(drop=True)
    return sub[['time','value']].assign(unit=u)

# Extract HICP series
no_core = get_series(df_long, geo='NO', coicop='TOT_X_NRG_FOOD', start_year=1999, end_year=2025)
print(f"Norway core-HICP (TOT_X_NRG_FOOD) unit: {no_core['unit'].iloc[0]}")

ea_all = get_series(df_long, geo='EA', coicop='CP00', start_year=1999, end_year=2025)
print(f"Euro area HICP total (CP00) unit: {ea_all['unit'].iloc[0]}")

# Build complete monthly range and forward-fill
full_index = pd.date_range(start="1999-12-01", end="2025-12-01", freq="MS")

df = full_index.to_frame(name="time")
df = df.merge(no_core[['time','value']].rename(columns={'value':'hicp_no_core'}), on='time', how='left')
df = df.merge(ea_all[['time','value']].rename(columns={'value':'hicp_ea_all'}), on='time', how='left')
df[['hicp_no_core','hicp_ea_all']] = df[['hicp_no_core','hicp_ea_all']].ffill()

# Log transforms
df['p_no_core_log'] = np.log(df['hicp_no_core'])
df['p_ea_all_log']  = np.log(df['hicp_ea_all'])

# Final monthly dataset
out = df[['time', 'hicp_no_core', 'hicp_ea_all', 'p_no_core_log', 'p_ea_all_log']].copy()
out = out.set_index('time')
out.index.freq = 'MS'

print(out.head(5))
print(out.tail(5))
print(f"Number of months: {len(out)}  | Period: {out.index.min().date()} → {out.index.max().date()}")

Norway core-HICP (TOT_X_NRG_FOOD) unit: I15
Euro area HICP total (CP00) unit: I15
            hicp_no_core  hicp_ea_all  p_no_core_log  p_ea_all_log
time                                                              
1999-12-01          79.9        75.09       4.380776      4.318687
2000-01-01          79.7        75.13       4.378270      4.319220
2000-02-01          80.1        75.37       4.383276      4.322409
2000-03-01          80.3        75.60       4.385770      4.325456
2000-04-01          80.8        75.67       4.391977      4.326382
            hicp_no_core  hicp_ea_all  p_no_core_log  p_ea_all_log
time                                                              
2025-08-01         133.6       129.31       4.894850      4.862213
2025-09-01         134.1       129.43       4.898586      4.863140
2025-10-01         134.1       129.70       4.898586      4.865224
2025-11-01         134.1       129.70       4.898586      4.865224
2025-12-01         134.1       129.70       4.8

## EU NOK

In [29]:
import pandas as pd

# 1) Download data from Norges Bank (2000–2025) 
url = ("https://data.norges-bank.no/api/data/EXR/B.EUR.NOK.SP"
       "?format=csv&bom=include&apisrc=nbi"
       "&startPeriod=2000-01-01&endPeriod=2025-12-31&locale=no")

# Read CSV (semicolon separator, comma as decimal)
df = pd.read_csv(url, sep=';', encoding='utf-8-sig', decimal=',')

# 2) Select date and exchange rate columns 
# Handles potential column name variations (e.g. OBS_VALUE_N)
value_col = 'OBS_VALUE' if 'OBS_VALUE' in df.columns else 'OBS_VALUE_N'
df = (
    df[['TIME_PERIOD', value_col]]
      .rename(columns={'TIME_PERIOD': 'DATE', value_col: 'EUR_NOK'})
)

df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
df = df.dropna(subset=['DATE','EUR_NOK']).sort_values('DATE').set_index('DATE')

# 3) Create full daily index and forward-fill missing values 
full_idx = pd.date_range(df.index.min(), df.index.max(), freq='D')
df_daily = df.reindex(full_idx)

df_daily['EUR_NOK'] = df_daily['EUR_NOK'].ffill()
df_daily.index.name = 'DATE'
df_daily = df_daily.asfreq('D')

# 4) Inspect output 
print(df_daily.head(10))
print(df_daily.tail(10))
print(f"\nNumber of days: {len(df_daily)}  | Period: {df_daily.index.min().date()} → {df_daily.index.max().date()}")


            EUR_NOK
DATE               
2000-01-03   8.0620
2000-01-04   8.1500
2000-01-05   8.2060
2000-01-06   8.2030
2000-01-07   8.1945
2000-01-08   8.1945
2000-01-09   8.1945
2000-01-10   8.1900
2000-01-11   8.2075
2000-01-12   8.2160
            EUR_NOK
DATE               
2025-10-27  11.6320
2025-10-28  11.6335
2025-10-29  11.6385
2025-10-30  11.6648
2025-10-31  11.6485
2025-11-01  11.6485
2025-11-02  11.6485
2025-11-03  11.6480
2025-11-04  11.7265
2025-11-05  11.7490

Number of days: 9439  | Period: 2000-01-03 → 2025-11-05


## Merge EU/NOK price with Inflation

In [30]:
# 1) Convert HICP (out) to daily frequency using forward-fill 
# Start and end dates are defined from 'out'
daily_idx = pd.date_range(start=out.index.min(), end=out.index.max(), freq='D')

out_daily = (
    out.reindex(daily_idx)     # add all daily timestamps
       .ffill()                # keep the last monthly value until a new month begins
)

out_daily.index.name = 'DATE'

# 2) Merge with Norges Bank daily data (df_daily) 
merged = (
    df_daily[['EUR_NOK']]
    .merge(out_daily, left_index=True, right_index=True, how='left')
)

# 3) Add log of the exchange rate 
merged['s_eurnok_log'] = np.log(merged['EUR_NOK'])

# 4) Inspect output 
print(merged.head(10))
print(merged.tail(10))
print(f"\nNumber of days: {len(merged)} | Period: {merged.index.min().date()} → {merged.index.max().date()}")



            EUR_NOK  hicp_no_core  hicp_ea_all  p_no_core_log  p_ea_all_log  \
DATE                                                                          
2000-01-03   8.0620          79.7        75.13        4.37827       4.31922   
2000-01-04   8.1500          79.7        75.13        4.37827       4.31922   
2000-01-05   8.2060          79.7        75.13        4.37827       4.31922   
2000-01-06   8.2030          79.7        75.13        4.37827       4.31922   
2000-01-07   8.1945          79.7        75.13        4.37827       4.31922   
2000-01-08   8.1945          79.7        75.13        4.37827       4.31922   
2000-01-09   8.1945          79.7        75.13        4.37827       4.31922   
2000-01-10   8.1900          79.7        75.13        4.37827       4.31922   
2000-01-11   8.2075          79.7        75.13        4.37827       4.31922   
2000-01-12   8.2160          79.7        75.13        4.37827       4.31922   

            s_eurnok_log  
DATE                    

## Reell valutakurs (q) & Inflasjonsdifferanse (dπ)

In [31]:
import pandas as pd
import numpy as np

# 1) Convert HICP (out) to daily frequency using forward-fill 
daily_idx = pd.date_range(start=out.index.min(), end=out.index.max(), freq='D')
out_daily = (
    out.reindex(daily_idx)      # add all daily dates
       .ffill()                 # keep the last monthly value until next month
)
out_daily.index.name = 'DATE'

# 2) Merge with Norges Bank daily data (df_daily) 
merged = (
    df_daily[['EUR_NOK']]
    .merge(out_daily, left_index=True, right_index=True, how='left')
)

# 3) Daily log levels and real exchange rate 
merged['st'] = np.log(merged['EUR_NOK'])                # log(EUR/NOK)
merged['pt'] = np.log(merged['hicp_no_core'])           # Norway price level
merged['pt_star'] = np.log(merged['hicp_ea_all'])       # Euro area price level
merged['qt'] = merged['st'] - (merged['pt'] - merged['pt_star'])  # real exchange rate

# 4) Month-to-month inflation (Δlog) at month-end, mapped to daily within same month 
no_me = merged['hicp_no_core'].resample('M').last()
ea_me = merged['hicp_ea_all'].resample('M').last()

p_no_me = np.log(no_me)
p_ea_me = np.log(ea_me)
pi_no_m = p_no_me.diff(1)          # π_t  (Norway)
pi_ea_m = p_ea_me.diff(1)          # π_t* (Euro area)
dpi_m   = pi_no_m - pi_ea_m        # inflation differential

# Map monthly values to daily so that the same value holds for the entire month
didx   = merged.index
end_me = didx.max() + pd.offsets.MonthEnd(0)
drng   = pd.date_range(pi_no_m.index.min(), end_me, freq='D')

def to_daily_same_month(s, daily_index):
    daily_full = s.reindex(drng).bfill()  # backfill to fill same month
    return daily_full.reindex(daily_index)

merged['pi_t']      = to_daily_same_month(pi_no_m, didx)
merged['pi_t_star'] = to_daily_same_month(pi_ea_m, didx)
merged['dpi_t']     = to_daily_same_month(dpi_m, didx)

# 5) Clean for model use 
needed_cols = [
    'EUR_NOK', 'hicp_no_core', 'hicp_ea_all',
    'st', 'pt', 'pt_star', 'qt',
    'pi_t', 'pi_t_star', 'dpi_t'
]
final_df = merged.dropna(subset=needed_cols).copy()

# 6) Display summary 
print(final_df[needed_cols].head(12))
print(final_df[needed_cols].tail(12))
print(f"\nNumber of days (original): {len(merged)} | Period: {merged.index.min().date()} → {merged.index.max().date()}")
print(f"Number of days (after NaN removal): {len(final_df)}")

            EUR_NOK  hicp_no_core  hicp_ea_all        st        pt   pt_star  \
DATE                                                                           
2000-01-31   8.0825          79.7        75.13  2.089701  4.378270  4.319220   
2000-02-01   8.0730          80.1        75.37  2.088525  4.383276  4.322409   
2000-02-02   8.0175          80.1        75.37  2.081627  4.383276  4.322409   
2000-02-03   8.0475          80.1        75.37  2.085361  4.383276  4.322409   
2000-02-04   8.0830          80.1        75.37  2.089763  4.383276  4.322409   
2000-02-05   8.0830          80.1        75.37  2.089763  4.383276  4.322409   
2000-02-06   8.0830          80.1        75.37  2.089763  4.383276  4.322409   
2000-02-07   8.0590          80.1        75.37  2.086789  4.383276  4.322409   
2000-02-08   8.0720          80.1        75.37  2.088401  4.383276  4.322409   
2000-02-09   8.0825          80.1        75.37  2.089701  4.383276  4.322409   
2000-02-10   8.0695          80.1       

In [32]:
# EUR/NOK, Q, and d_pi only 
import numpy as np
import pandas as pd

# 1) Q = real exchange rate = s_t - (p_t - p_t*)
merged['st']      = np.log(merged['EUR_NOK'])          # s_t
merged['pt']      = np.log(merged['hicp_no_core'])     # p_t
merged['pt_star'] = np.log(merged['hicp_ea_all'])      # p_t*
merged['Q']       = merged['st'] - (merged['pt'] - merged['pt_star'])

# 2) d_pi = inflation differential = π_t - π_t*
#    (Δlog of end-of-month levels, mapped to daily within the same month)
no_me = merged['hicp_no_core'].resample('M').last()
ea_me = merged['hicp_ea_all'].resample('M').last()

pi_no_m = np.log(no_me).diff(1)
pi_ea_m = np.log(ea_me).diff(1)
d_pi_m  = pi_no_m - pi_ea_m

# Map to daily frequency: backfill from month-end so value holds for the same month
didx   = merged.index
end_me = didx.max() + pd.offsets.MonthEnd(0)
drng   = pd.date_range(d_pi_m.index.min(), end_me, freq='D')

d_pi_daily = d_pi_m.reindex(drng).bfill().reindex(didx)
merged['d_pi'] = d_pi_daily

# 3) Final dataset for model
final_small = merged[['EUR_NOK', 'Q', 'd_pi']].dropna().copy()


## Rentedifferanse (dI_t)


In [33]:
import pandas as pd, numpy as np, re, requests
import io, requests
from bs4 import BeautifulSoup

# 1) NORWAY POLICY RATE (Norges Bank)

url_nb = (
    "https://data.norges-bank.no/api/data/IR/B.KPRA.SD.R"
    "?apisrc=qb&format=csv&startPeriod=1996-01-01&endPeriod=2025-09-26&locale=no&bom=include"
)

df_rate_nb = pd.read_csv(url_nb, sep=";", encoding="utf-8-sig", engine="python")
df_rate_nb.columns = [c.strip() for c in df_rate_nb.columns]

time_candidates  = ["TIME_PERIOD", "Tid", "TIME", "Date", "PERIOD"]
value_candidates = ["OBS_VALUE", "Observasjonsverdi", "Value", "VALUE"]
time_col  = next((c for c in time_candidates  if c in df_rate_nb.columns), None)
value_col = next((c for c in value_candidates if c in df_rate_nb.columns), None)

rate_nb = (
    df_rate_nb[[time_col, value_col]]
    .rename(columns={time_col: "DATE", value_col: "policy_rate"})
)
rate_nb["policy_rate"] = pd.to_numeric(rate_nb["policy_rate"].astype(str).str.replace(",", "."), errors="coerce")
rate_nb["DATE"] = pd.to_datetime(rate_nb["DATE"], errors="coerce")
rate_nb = rate_nb.dropna(subset=["DATE", "policy_rate"]).sort_values("DATE").set_index("DATE")

# Daily reindex full range
policy_rate_daily = rate_nb.reindex(pd.date_range("2000-01-01", "2025-12-31", freq="D")).ffill().bfill()
policy_rate_daily.index.name = "DATE"

print("Norges Bank policy rate:")
print(policy_rate_daily.head(3))
print(policy_rate_daily.tail(3))
print(f"Period: {policy_rate_daily.index.min().date()} → {policy_rate_daily.index.max().date()}\n")

# 2) ECB POLICY RATE (Deposit Facility) – SDW API

URL = "https://www.ecb.europa.eu/stats/policy_and_exchange_rates/key_ecb_interest_rates/html/index.en.html"
html = requests.get(URL, timeout=30).text
soup = BeautifulSoup(html, "lxml")

table = None
for tbl in soup.find_all("table"):
    if tbl.find(string=re.compile("Date \\(with effect from\\)", re.I)):
        table = tbl
        break
if table is None:
    raise RuntimeError("ECB table not found")

rows, year = [], None
for tr in table.find_all("tr"):
    tds = [td.get_text(" ", strip=True) for td in tr.find_all(["td","th"])]
    if not tds or "Date" in tds[0]:
        continue
    if re.fullmatch(r"\d{4}", tds[0]):
        year = tds[0]
        date_str, dep = tds[1], tds[2]
    else:
        date_str, dep = tds[0], tds[1] if len(tds)>1 else "-"
    m = re.search(r"(\d{1,2})\s+([A-Za-z]{3})", date_str)
    if not m:
        continue
    d = f"{m.group(1)} {m.group(2)} {year}"
    d_iso = pd.to_datetime(d, format="%d %b %Y", errors="coerce")
    val = pd.to_numeric(dep.replace(",", "."), errors="coerce")
    if pd.notna(d_iso) and pd.notna(val):
        rows.append({"DATE": d_iso, "eu_policy_rate": val})

ecb_rate_daily = (
    pd.DataFrame(rows)
      .dropna(subset=["DATE"])
      .set_index("DATE")
      .sort_index()
      .reindex(pd.date_range("2000-01-01", "2025-12-31", freq="D"))
      .ffill().bfill()
)
ecb_rate_daily.index.name = "DATE"


# 3) INTEREST RATE DIFFERENTIAL (Norway – Euro area)

common_idx = pd.date_range(
    start=max(policy_rate_daily.index.min(), ecb_rate_daily.index.min()),
    end=min(policy_rate_daily.index.max(), ecb_rate_daily.index.max()),
    freq="D"
)

rates = pd.DataFrame({
    "NO_rate": policy_rate_daily.reindex(common_idx).ffill().bfill()["policy_rate"],
    "EA_rate": ecb_rate_daily.reindex(common_idx).ffill().bfill()["eu_policy_rate"],
})
rates["dI_t"] = rates["NO_rate"] - rates["EA_rate"]
rates.index.name = "DATE"

print("Interest rate differential sample:")
print(rates.head(5))
print(rates.tail(5))
print(f"Coverage: {rates.index.min().date()} → {rates.index.max().date()} | NaNs: {rates['dI_t'].isna().sum()}\n")


Norges Bank policy rate:
            policy_rate
DATE                   
2000-01-01          5.5
2000-01-02          5.5
2000-01-03          5.5
            policy_rate
DATE                   
2025-12-29          4.0
2025-12-30          4.0
2025-12-31          4.0
Period: 2000-01-01 → 2025-12-31

Interest rate differential sample:
            NO_rate  EA_rate  dI_t
DATE                              
2000-01-01      5.5     3.75  1.75
2000-01-02      5.5     3.75  1.75
2000-01-03      5.5     3.75  1.75
2000-01-04      5.5     3.75  1.75
2000-01-05      5.5     3.75  1.75
            NO_rate  EA_rate  dI_t
DATE                              
2025-12-27      4.0      2.0   2.0
2025-12-28      4.0      2.0   2.0
2025-12-29      4.0      2.0   2.0
2025-12-30      4.0      2.0   2.0
2025-12-31      4.0      2.0   2.0
Coverage: 2000-01-01 → 2025-12-31 | NaNs: 0



## Daily Endegenous 

In [34]:
final_with_rates = (
    final_small
    .merge(rates[["dI_t"]], left_index=True, right_index=True, how="left")
    .dropna(subset=["dI_t"])
)
final_with_rates = final_with_rates[["EUR_NOK", "Q", "d_pi", "dI_t"]]

print("Final daily dataset:")
print(final_with_rates.head(10))
print(final_with_rates.tail(10))
print(f"\nFinal dataset: {len(final_with_rates)} obs | "
      f"Period: {final_with_rates.index.min().date()} → {final_with_rates.index.max().date()}")

# Save to CSV 
final_with_rates.index.name = "DATE"
final_with_rates.to_csv("variables_daily_end.csv", index=True, float_format="%.6f")
print("\nSaved successfully as 'variables_daily_end.csv'")


Final daily dataset:
            EUR_NOK         Q      d_pi  dI_t
DATE                                         
2000-01-31   8.0825  2.030652  0.001817  1.75
2000-02-01   8.0730  2.027659  0.001817  1.75
2000-02-02   8.0175  2.020760  0.001817  1.75
2000-02-03   8.0475  2.024495  0.001817  1.75
2000-02-04   8.0830  2.028897  0.001817  1.75
2000-02-05   8.0830  2.028897  0.001817  1.75
2000-02-06   8.0830  2.028897  0.001817  1.75
2000-02-07   8.0590  2.025923  0.001817  1.75
2000-02-08   8.0720  2.027535  0.001817  1.75
2000-02-09   8.0825  2.028835  0.001817  1.75
            EUR_NOK         Q      d_pi  dI_t
DATE                                         
2025-10-27  11.6320  2.420398 -0.002084   2.0
2025-10-28  11.6335  2.420527 -0.002084   2.0
2025-10-29  11.6385  2.420957 -0.002084   2.0
2025-10-30  11.6648  2.423214 -0.002084   2.0
2025-10-31  11.6485  2.421816 -0.002084   2.0
2025-11-01  11.6485  2.421816  0.000000   2.0
2025-11-02  11.6485  2.421816  0.000000   2.0
2025-11-03  1

## Monthly Endogenous

In [35]:
# 1) Ensure index is datetime and sorted
final_with_rates = final_with_rates.sort_index()
final_with_rates.index = pd.to_datetime(final_with_rates.index)

# 2) Aggregate to month-end ('M') using LAST observation
final_monthly = pd.DataFrame({
    "EUR_NOK": final_with_rates["EUR_NOK"].resample("M").last(),   # nominal exchange rate (level)
    "Q":       final_with_rates["Q"].resample("M").last(),         # real exchange rate (level)
    "d_pi":    final_with_rates["d_pi"].resample("M").last(),      # inflation differential (monthly)
    "dI_t":    final_with_rates["dI_t"].resample("M").last(),      # interest rate differential (level)
})

# 3) Drop potential NaN rows (alignment)
final_monthly = final_monthly.dropna(how="any")

# 4) Assign proper monthly frequency
final_monthly.index.name = "DATE"
final_monthly = final_monthly.asfreq("M")

# 5) Inspect results
print("Monthly endogenous dataset:")
print(final_monthly.head(12))
print(final_monthly.tail(12))
print(f"\nTotal rows (monthly): {len(final_monthly)} | "
      f"Period: {final_monthly.index.min().date()} → {final_monthly.index.max().date()}")


Monthly endogenous dataset:
            EUR_NOK         Q      d_pi  dI_t
DATE                                         
2000-01-31   8.0825  2.030652  0.001817  1.75
2000-02-29   8.0805  2.028587  0.001817  1.75
2000-03-31   8.0885  2.030130 -0.000553  1.75
2000-04-30   8.1475  2.032116  0.005282  2.00
2000-05-31   8.3050  2.052451 -0.001189  2.00
2000-06-30   8.1850  2.037880  0.000017  2.50
2000-07-31   8.1990  2.044609 -0.005020  2.50
2000-08-31   8.0745  2.030095 -0.000788  3.00
2000-09-30   8.0255  2.020539  0.003469  3.25
2000-10-31   7.8735  2.001680 -0.000261  3.25
2000-11-30   8.0525  2.023794  0.000365  3.25
2000-12-31   8.2335  2.050894 -0.004871  3.25
            EUR_NOK         Q      d_pi  dI_t
DATE                                         
2024-12-31  11.7950  2.431128 -0.000508  1.50
2025-01-31  11.7373  2.430318 -0.004094  1.50
2025-02-28  11.7245  2.423597  0.005630  1.75
2025-03-31  11.4130  2.399005 -0.002335  2.00
2025-04-30  11.8090  2.429050  0.004064  2.25
2025-0

# Exogenous Variables 

## VIX

In [36]:
import pandas as pd

# 1) Load VIX (CBOE) 
url = "https://cdn.cboe.com/api/global/us_indices/daily_prices/VIX_History.csv"
vix = pd.read_csv(url)

# 2) Standardize columns and pick the correct "close" column 
vix.columns = [c.strip().upper() for c in vix.columns]

# candidates seen: CLOSE, VIX CLOSE, Close
close_candidates = ["CLOSE", "VIX CLOSE", "VIX_CLOSE"]
close_col = next((c for c in close_candidates if c in vix.columns), None)
if close_col is None:
    raise KeyError(f"Could not find VIX close column. Available: {list(vix.columns)}")

# Build a clean VIX dataframe
vix = vix.rename(columns={"DATE": "DATE"}).copy()
vix["DATE"] = pd.to_datetime(vix["DATE"], errors="coerce")
vix["VIX"] = pd.to_numeric(vix[close_col], errors="coerce")
vix = vix.dropna(subset=["DATE", "VIX"]).sort_values("DATE").set_index("DATE")

# 3) Reindex to your daily calendar and fill gaps 
target_idx = pd.date_range(start=merged.index.min(), end=merged.index.max(), freq="D")
vix_daily = vix.reindex(target_idx).ffill().bfill()
vix_daily.index.name = "DATE"

# 4) Add to main dataset safely (overwrite if exists) 
merged = merged.assign(VIX=vix_daily["VIX"])

# 5) Inspect 
print(merged[["VIX"]].head(10))
print(merged[["VIX"]].tail(10))
print(f"\nVIX daily: {merged.index.min().date()} → {merged.index.max().date()} | NaN: {merged['VIX'].isna().sum()}")


              VIX
DATE             
2000-01-03  24.21
2000-01-04  27.01
2000-01-05  26.41
2000-01-06  25.73
2000-01-07  21.72
2000-01-08  21.72
2000-01-09  21.72
2000-01-10  21.71
2000-01-11  22.50
2000-01-12  22.84
              VIX
DATE             
2025-10-27  15.79
2025-10-28  16.42
2025-10-29  16.92
2025-10-30  16.91
2025-10-31  17.44
2025-11-01  17.44
2025-11-02  17.44
2025-11-03  17.17
2025-11-04  19.00
2025-11-05  18.01

VIX daily: 2000-01-03 → 2025-11-05 | NaN: 0


## Brent Oil


In [37]:
!pip -q install pandas requests

# Packages 
import io
from datetime import datetime, timezone
import pandas as pd
import requests

# 1) Define API endpoint and authentication 
APP_TOKEN = "laCqAPM9Wo1SggEqlGFBAdssN"  # X-App-Token (public)
CSV_ENDPOINT = "https://agtransport.usda.gov/api/v3/views/b3w8-gxpm/query.csv"

# 2) Define time range (from 1999 to current UTC date) 
date_from = "1999-01-01T00:00:00.000"
date_to   = datetime.now(timezone.utc).strftime("%Y-%m-%dT23:59:59.999")

# 3) Request parameters 
params = {
    "select": "date, brent",                   # only these fields
    "where": f"date between '{date_from}' and '{date_to}'",
    "order": "date ASC",
}
headers = {"X-App-Token": APP_TOKEN}

# 4) Download CSV 
resp = requests.get(CSV_ENDPOINT, headers=headers, params=params, timeout=60)
resp.raise_for_status()

# 5) Load into pandas 
df = pd.read_csv(io.BytesIO(resp.content))

# 6) Clean data types 
df["date"]  = pd.to_datetime(df["date"], errors="coerce", utc=True).dt.tz_convert(None)
df["brent"] = pd.to_numeric(df["brent"], errors="coerce")

# Keep only the relevant columns
df = df[["date", "brent"]]

# 7) Inspect
print(f"Rows retrieved: {len(df):,}")
display(df.head(5))
display(df.tail(5))


Rows retrieved: 9,729


Unnamed: 0,date,brent
0,1987-05-20,18.63
1,1987-05-21,18.45
2,1987-05-22,18.55
3,1987-05-25,18.6
4,1987-05-26,18.63


Unnamed: 0,date,brent
9724,2025-09-16,69.69
9725,2025-09-17,69.19
9726,2025-09-18,67.83
9727,2025-09-19,67.05
9728,2025-09-22,66.87


In [38]:
import pandas as pd

# Assumes df contains columns 'date' (datetime) and 'brent' (float)
df_ff = df.copy().sort_values("date").set_index("date")

# Create full daily index from first to last available date
full_idx = pd.date_range(df_ff.index.min(), df_ff.index.max(), freq="D")

# Reindex to daily frequency (NaN where prices are missing)
df_daily = df_ff.reindex(full_idx)

# Count missing values before filling
missing_before = df_daily["brent"].isna().sum()

# Forward-fill (does not fill before the first observation)
df_daily["brent"] = df_daily["brent"].ffill()

# Optionally drop leading NaNs if the series starts with gaps
df_daily = df_daily[df_daily["brent"].notna()]

missing_after = df_daily["brent"].isna().sum()
filled_days = missing_before - missing_after

# Convert index back to column
df_daily = df_daily.rename_axis("date").reset_index()

# Inspect 
print(f"Missing days before ffill: {missing_before:,}")
print(f"Missing days after ffill: {missing_after:,}")
print(f"Days filled by ffill: {filled_days:,}")

display(df_daily.head(5))
display(df_daily.tail(5))


Missing days before ffill: 4,277
Missing days after ffill: 0
Days filled by ffill: 4,277


Unnamed: 0,date,brent
0,1987-05-20,18.63
1,1987-05-21,18.45
2,1987-05-22,18.55
3,1987-05-23,18.55
4,1987-05-24,18.55


Unnamed: 0,date,brent
14001,2025-09-18,67.83
14002,2025-09-19,67.05
14003,2025-09-20,67.05
14004,2025-09-21,67.05
14005,2025-09-22,66.87


## StoxEurope 


In [39]:
import pandas as pd

# 1) Load CSV directly from GitHub
URL = "https://raw.githubusercontent.com/bredeespelid/Data_MasterOppgave/refs/heads/main/Variables/StoxEurope/StoxxEuro600.csv"

# Read as raw text to ensure full control during cleaning
raw = pd.read_csv(URL, sep=",", dtype=str, encoding="utf-8")

# 2) Clean column names
raw.columns = raw.columns.str.strip()

# 3) Parse 'Date' column (format: dd.mm.yyyy kl. HH.MM.SS)
dt = raw["Date"].astype(str).str.replace(" kl. ", " ", regex=False)
date = pd.to_datetime(dt, format="%d.%m.%Y %H.%M.%S", errors="coerce")

# 4) Clean 'Close' values (convert from European format to float) 
vals = (
    raw["Close"]
    .astype(str)
    .str.replace("\u00A0", "", regex=False)  # remove non-breaking space
    .str.replace(" ", "", regex=False)       # remove normal spaces
    .str.replace(",", ".", regex=False)      # replace comma with dot
    .replace({"": None})
)
stox = pd.to_numeric(vals, errors="coerce")

# 5) Combine and sort 
df = (
    pd.DataFrame({"Date": date, "StoxEurope": stox})
      .sort_values("Date")
      .dropna(subset=["Date"])
      .set_index("Date")
)

# 6) Inspect 
print("NaN count in StoxEurope:", df["StoxEurope"].isna().sum())
print("Date range:", df.index.min().date(), "→", df.index.max().date())
display(df.head(5))
display(df.tail(5))


NaN count in StoxEurope: 0
Date range: 1998-07-17 → 2025-09-26


Unnamed: 0_level_0,StoxEurope
Date,Unnamed: 1_level_1
1998-07-17 18:00:00,313.83
1998-07-20 18:00:00,315.0
1998-07-21 18:00:00,313.52
1998-07-22 18:00:00,308.13
1998-07-23 18:00:00,307.42


Unnamed: 0_level_0,StoxEurope
Date,Unnamed: 1_level_1
2025-09-22 18:00:00,553.4
2025-09-23 18:00:00,554.95
2025-09-24 18:00:00,553.88
2025-09-25 18:00:00,550.22
2025-09-26 18:00:00,554.52


In [40]:
import pandas as pd

# 1) Create business-day index (weekdays only) with 18:00 timestamp
start = df.index.min().normalize()
end = df.index.max().normalize()
bidx = pd.bdate_range(start=start, end=end, freq="B") + pd.Timedelta(hours=18)

# 2) Reindex to business days and forward-fill missing values
df_ffill = df.reindex(bidx).ffill()
df_ffill.index.name = "Date"

# 3) Inspect result
print("Missing values after ffill:", df_ffill["StoxEurope"].isna().sum())
print("Date range:", df_ffill.index.min().date(), "→", df_ffill.index.max().date())
display(df_ffill.head(5))
display(df_ffill.tail(5))


Missing values after ffill: 0
Date range: 1998-07-17 → 2025-09-26


Unnamed: 0_level_0,StoxEurope
Date,Unnamed: 1_level_1
1998-07-17 18:00:00,313.83
1998-07-20 18:00:00,315.0
1998-07-21 18:00:00,313.52
1998-07-22 18:00:00,308.13
1998-07-23 18:00:00,307.42


Unnamed: 0_level_0,StoxEurope
Date,Unnamed: 1_level_1
2025-09-22 18:00:00,553.4
2025-09-23 18:00:00,554.95
2025-09-24 18:00:00,553.88
2025-09-25 18:00:00,550.22
2025-09-26 18:00:00,554.52


## S&P500


In [41]:
import pandas as pd

# 1) Read CSV directly from GitHub 
url = "https://raw.githubusercontent.com/bredeespelid/Data_MasterOppgave/refs/heads/main/Variables/S%26P500/S%26P.csv" 
df = pd.read_csv(url, sep=",", encoding="utf-8", names=["Date", "Close"], header=0)

# 2) Clean and convert date 
df["Date"] = (
    df["Date"]
    .astype(str)
    .str.replace("kl.", "", regex=False)
    .str.replace("kl", "", regex=False)
    .str.strip()
)
df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%Y %H.%M.%S", errors="coerce")

# 3) Clean and convert price 
df["Close"] = (
    df["Close"]
    .astype(str)
    .str.replace('"', '', regex=False)
    .str.replace(",", ".", regex=False)
    .astype(float)
)

# 4) Drop missing and set index 
df = df.dropna(subset=["Date", "Close"]).sort_values("Date").set_index("Date")

# 5) Normalize to daily business days and forward-fill 
df.index = df.index.normalize()
bidx = pd.bdate_range(start=df.index.min(), end=df.index.max(), freq="B")
df_sp_ffill = df.reindex(bidx).ffill()
df_sp_ffill.index.name = "Date"

# 6) Inspect result 
print("Missing after ffill:", df_sp_ffill["Close"].isna().sum())
print("Period:", df_sp_ffill.index.min().date(), "→", df_sp_ffill.index.max().date())
print(df_sp_ffill.head(5))
print(df_sp_ffill.tail(5))


Missing after ffill: 0
Period: 1996-11-18 → 2025-09-26
             Close
Date              
1996-11-18  737.02
1996-11-19  742.16
1996-11-20  742.16
1996-11-21  742.72
1996-11-22  748.70
              Close
Date               
2025-09-22  6693.75
2025-09-23  6656.92
2025-09-24  6637.97
2025-09-25  6604.72
2025-09-26  6643.70


## OSEBX

In [42]:
import pandas as pd

# 1) Read CSV (semicolon-separated; first two lines are metadata-like, so we parse generically)
url = "https://raw.githubusercontent.com/bredeespelid/Data_MasterOppgave/refs/heads/main/Variables/OSEBX/OSEBX_Daily.csv"

raw = pd.read_csv(
    url,
    sep=";",
    header=None,
    names=["Date", "OSEBX"],
    engine="python",
    dtype=str
)

# 2) Keep only proper date rows (dd.mm.yyyy), then parse
raw = raw[raw["Date"].str.match(r"\d{2}\.\d{2}\.\d{4}$", na=False)].copy()
raw["Date"] = pd.to_datetime(raw["Date"], format="%d.%m.%Y", errors="coerce")

# 3) Clean numeric values (comma decimals, NBSP)
raw["OSEBX"] = (
    raw["OSEBX"].astype(str)
        .str.replace("\u00A0", "", regex=False)
        .str.replace(",", ".", regex=False)
        .astype(float)
)

# 4) Index and forward-fill to business days
df = (raw.dropna(subset=["Date", "OSEBX"])
          .sort_values("Date")
          .set_index("Date"))

bidx = pd.bdate_range(df.index.min(), df.index.max(), freq="B")
df_osebx_ffill = df.reindex(bidx).ffill()
df_osebx_ffill.index.name = "Date"

# 5) Inspect
print("Missing after ffill:", df_osebx_ffill["OSEBX"].isna().sum())
print("Period:", df_osebx_ffill.index.min().date(), "→", df_osebx_ffill.index.max().date())
print(df_osebx_ffill.head(5))
print(df_osebx_ffill.tail(5))


Missing after ffill: 0
Period: 1999-12-30 → 2025-10-03
            OSEBX
Date             
1999-12-30  189.8
1999-12-31  189.8
2000-01-03  192.7
2000-01-04  185.7
2000-01-05  180.6
             OSEBX
Date              
2025-09-29  1657.7
2025-09-30  1644.6
2025-10-01  1657.7
2025-10-02  1650.5
2025-10-03  1659.3


## OBX Energy 

In [43]:
import pandas as pd

# 1) Read CSV (semicolon-separated; skip metadata lines)
url = "https://raw.githubusercontent.com/bredeespelid/Data_MasterOppgave/refs/heads/main/Variables/OSEBX/OBX_EnergyDaily.csv"

raw = pd.read_csv(
    url,
    sep=";",
    header=None,
    names=["Date", "OBX_Energy"],
    engine="python",
    dtype=str
)

# 2) Keep only proper date rows (dd.mm.yyyy) and parse
raw = raw[raw["Date"].str.match(r"\d{2}\.\d{2}\.\d{4}$", na=False)].copy()
raw["Date"] = pd.to_datetime(raw["Date"], format="%d.%m.%Y", errors="coerce")

# 3) Clean numeric column (comma decimals, NBSP)
raw["OBX_Energy"] = (
    raw["OBX_Energy"].astype(str)
        .str.replace("\u00A0", "", regex=False)
        .str.replace(",", ".", regex=False)
        .astype(float)
)

# 4) Index and forward-fill to business days
df = (raw.dropna(subset=["Date", "OBX_Energy"])
          .sort_values("Date")
          .set_index("Date"))

bidx = pd.bdate_range(df.index.min(), df.index.max(), freq="B")
df_OBX_Energy_ffill = df.reindex(bidx).ffill()
df_OBX_Energy_ffill.index.name = "Date"

# 5) Inspect
print("Missing after ffill:", df_OBX_Energy_ffill["OBX_Energy"].isna().sum())
print("Period:", df_OBX_Energy_ffill.index.min().date(), "→", df_OBX_Energy_ffill.index.max().date())
print(df_OBX_Energy_ffill.head(5))
print(df_OBX_Energy_ffill.tail(5))


Missing after ffill: 0
Period: 1999-12-30 → 2025-10-03
            OBX_Energy
Date                  
1999-12-30     149.695
1999-12-31     149.695
2000-01-03     153.497
2000-01-04     147.188
2000-01-05     142.761
            OBX_Energy
Date                  
2025-09-29     1472.79
2025-09-30     1437.21
2025-10-01     1449.07
2025-10-02     1452.39
2025-10-03     1457.35


# Variables Combined Daily

In [44]:
import pandas as pd

# 1) Base: endogenous features

base = final_with_rates.copy()
# Make sure the index is Date (no time) and sorted
base.index = pd.to_datetime(base.index, errors="coerce").normalize()
base = base.sort_index()


# 2) Small helper: normalize any source to one column

def normalize_one(
    df,
    target_name: str,
    index_col: str | None = None,
    prefer: list[str] | None = None,
):
    """
    - Ensures a DatetimeIndex normalized to date-only.
    - Picks a single value column (prefer these names if present, else first numeric, else first column).
    - Renames that column -> `target_name`.
    - Returns a one-column, date-indexed DataFrame.
    """
    x = df.copy()

    # Ensure we have a DatetimeIndex
    if index_col is not None:
        if index_col not in x.columns:
            raise KeyError(f"{target_name}: index_col '{index_col}' not found in {list(x.columns)}")
        x[index_col] = pd.to_datetime(x[index_col], errors="coerce")
        x = x.dropna(subset=[index_col]).set_index(index_col)
    else:
        if not isinstance(x.index, pd.DatetimeIndex):
            x.index = pd.to_datetime(x.index, errors="coerce")
        x = x.dropna(subset=[x.columns[0]])  # avoid all-NaT rows if any

    # Normalize to date only (removes 18:00:00 etc.)
    x.index = x.index.normalize()
    x = x.sort_index()

    # Choose the value column
    cols = list(x.columns)
    if prefer:
        for c in prefer:
            if c in cols:
                val = x[[c]]
                break
        else:
            val = None
    else:
        val = None

    if val is None:
        # try numeric-only first
        num = x.select_dtypes(include=["number"])
        if not num.empty:
            val = num.iloc[:, [0]]
        else:
            # fall back to first column
            val = x.iloc[:, [0]]

    # Finalize name
    val.columns = [target_name]
    return val


# 3) Normalize each exogenous series (using vars)

# VIX  
vix_idx = normalize_one(vix_daily, "VIX", index_col=None, prefer=["VIX", "vix", "Close", "CLOSE"])

# Brent 
brent_idx = normalize_one(df_daily, "Brent", index_col="date", prefer=["brent", "Brent", "close", "Close"])

# STOXX Europe 600 
stox_idx = normalize_one(df_ffill, "StoxEurope", index_col=None, prefer=["StoxEurope", "Close", "close"])

# S&P 500 
sp500_idx = normalize_one(df_sp_ffill, "SP500", index_col=None, prefer=["SP500", "Close", "close"])

# OSEBX 
osebx_idx = normalize_one(df_osebx_ffill, "OSEBX", index_col=None, prefer=["OSEBX"])

# OBX Energy
obx_energy_idx = normalize_one(df_OBX_Energy_ffill, "OBX_Energy", index_col=None, prefer=["OBX_Energy"])


# 4) Align to base dates and join (ffill for coverage)

# Reindex each to the base calendar (forward-fill where missing), then join: drop any pre-existing columns with same names first

to_add = {
    "Brent": brent_idx,
    "VIX": vix_idx,
    "StoxEurope": stox_idx,
    "SP500": sp500_idx,
    "OSEBX": osebx_idx,
    "OBX_Energy": obx_energy_idx,
}

for name, src in to_add.items():
    if name in base.columns:
        base = base.drop(columns=[name])
    aligned = src.reindex(base.index, method="ffill")
    base = base.join(aligned, how="left")


# 5) Inspect and save

print(base.head(12))
print(base.tail(12))
print(
    f"\nTotal rows: {len(base):,} | "
    f"Period: {base.index.min().date()} → {base.index.max().date()}"
)
for c in ["Brent", "VIX", "StoxEurope", "SP500", "OSEBX", "OBX_Energy"]:
    if c in base.columns:
        print(f"NaN in {c}: {base[c].isna().sum():,}")

out_name = "variables_daily.csv"
base.to_csv(out_name, index_label="Date")
print(f"\nSaved: {out_name}")


            EUR_NOK         Q      d_pi  dI_t  Brent    VIX  StoxEurope  \
DATE                                                                      
2000-01-31   8.0825  2.030652  0.001817  1.75  27.08  24.95      360.93   
2000-02-01   8.0730  2.027659  0.001817  1.75  27.35  23.45      366.71   
2000-02-02   8.0175  2.020760  0.001817  1.75  27.15  23.12      371.34   
2000-02-03   8.0475  2.024495  0.001817  1.75  27.60  22.01      376.29   
2000-02-04   8.0830  2.028897  0.001817  1.75  27.48  21.54      377.37   
2000-02-05   8.0830  2.028897  0.001817  1.75  27.48  21.54      377.37   
2000-02-06   8.0830  2.028897  0.001817  1.75  27.48  21.54      377.37   
2000-02-07   8.0590  2.025923  0.001817  1.75  27.94  22.79      374.20   
2000-02-08   8.0720  2.027535  0.001817  1.75  27.61  21.25      382.38   
2000-02-09   8.0825  2.028835  0.001817  1.75  27.44  22.90      382.35   
2000-02-10   8.0695  2.027225  0.001817  1.75  27.32  23.07      381.61   
2000-02-11   8.0395  2.02

## Daily LOG 

In [45]:
import numpy as np
import pandas as pd

# 1) Load daily dataset 
df = pd.read_csv("variables_daily.csv", parse_dates=["Date"], index_col="Date")

# 2) Copy and transform all non-target variables safely 
log_df = df.copy()

for col in df.columns:
    if col != "EUR_NOK":
        log_df[f"log_{col}"] = np.log(df[col].where(df[col] > 0))

# 3) Keep only EUR_NOK + log columns 
cols_to_keep = ["EUR_NOK"] + [c for c in log_df.columns if c.startswith("log_")]
log_df = log_df[cols_to_keep]

# 4) Save 
log_df.to_csv("variables_daily_log.csv", index_label="Date")

# 5) Inspect 
print(log_df.head(5))
print(log_df.tail(5))
print(f"\nSaved file: variables_daily_log.csv")


            EUR_NOK     log_Q  log_d_pi  log_dI_t  log_Brent   log_VIX  \
Date                                                                     
2000-01-31   8.0825  0.708357 -6.310625  0.559616   3.298795  3.216874   
2000-02-01   8.0730  0.706882 -6.310625  0.559616   3.308717  3.154870   
2000-02-02   8.0175  0.703474 -6.310625  0.559616   3.301377  3.140698   
2000-02-03   8.0475  0.705320 -6.310625  0.559616   3.317816  3.091497   
2000-02-04   8.0830  0.707492 -6.310625  0.559616   3.313458  3.069912   

            log_StoxEurope  log_SP500  log_OSEBX  log_OBX_Energy  
Date                                                              
2000-01-31        5.888684   7.240263   5.217649        5.040608  
2000-02-01        5.904571   7.250834   5.216022        5.045784  
2000-02-02        5.917118   7.250721   5.228967        5.068143  
2000-02-03        5.930360   7.261899   5.253320        5.062570  
2000-02-04        5.933226   7.261485   5.255410        5.046298  
            

# Variables Combined Monthly 

In [46]:
import pandas as pd

# 1) Load the daily dataset 
df = pd.read_csv("variables_daily.csv", parse_dates=["Date"], index_col="Date")

# 2) Define aggregation rules 

# Average (macro/fundamental drivers)
avg_vars = ["Brent"]

# Last observation of the month (financial market variables)
last_vars = ["VIX", "StoxEurope", "SP500", "OSEBX", "OBX_Energy"]

# Endogenous variables (already monthly, but last observation)
endo_vars = ["EUR_NOK", "Q", "d_pi", "dI_t"]


# 3) Build the aggregation dictionary 
agg_dict = {v: "mean" for v in avg_vars}
agg_dict.update({v: "last" for v in last_vars})
agg_dict.update({v: "last" for v in endo_vars})

# 4) Resample to monthly frequency 
df_monthly = df.resample("M").agg(agg_dict)

# 5) Reorder columns to match the daily dataset structure 
ordered_cols = ["EUR_NOK", "Q", "d_pi", "dI_t", "Brent", "VIX", "StoxEurope", "SP500", "OSEBX", "OBX_Energy"]
df_monthly = df_monthly[ordered_cols]

# 6) Save to CSV
df_monthly.to_csv("variables_monthly.csv", index_label="Date")

# 7) Inspect 
print(df_monthly.head(5))
print(df_monthly.tail(5))
print(f"\nSaved file: variables_monthly.csv")
print(f"Period: {df_monthly.index.min().date()} → {df_monthly.index.max().date()}")


            EUR_NOK         Q      d_pi  dI_t      Brent    VIX  StoxEurope  \
Date                                                                          
2000-01-31   8.0825  2.030652  0.001817  1.75  27.080000  24.95      360.93   
2000-02-29   8.0805  2.028587  0.001817  1.75  27.774828  23.37      386.01   
2000-03-31   8.0885  2.030130 -0.000553  1.75  27.645161  24.11      394.10   
2000-04-30   8.1475  2.032116  0.005282  2.00  22.970000  26.20      392.62   
2000-05-31   8.3050  2.052451 -0.001189  2.00  27.754516  23.65      380.24   

              SP500  OSEBX  OBX_Energy  
Date                                    
2000-01-31  1394.46  184.5     154.564  
2000-02-29  1366.41  189.4     155.128  
2000-03-31  1498.58  188.3     165.325  
2000-04-30  1452.43  182.1     155.681  
2000-05-31  1420.60  190.6     176.655  
            EUR_NOK         Q      d_pi  dI_t      Brent    VIX  StoxEurope  \
Date                                                                          
2

## Monthly LOG 

In [47]:
import numpy as np
import pandas as pd

# 1) Load the monthly dataset 
df_monthly = pd.read_csv("variables_monthly.csv", parse_dates=["Date"], index_col="Date")

# 2) Create a copy for log transformation 
df_monthly_log = df_monthly.copy()

# 3) Define which variables to log (exclude EUR_NOK) 
log_vars = [c for c in df_monthly_log.columns if c != "EUR_NOK"]

# 4) Convert to numeric and apply natural log (only positive values)
df_monthly_log[log_vars] = df_monthly_log[log_vars].apply(pd.to_numeric, errors="coerce")
df_monthly_log[log_vars] = np.log(df_monthly_log[log_vars].where(df_monthly_log[log_vars] > 0))

# 5) Rename logged columns with prefix 'log_' 
df_monthly_log.rename(columns={col: f"log_{col}" for col in log_vars}, inplace=True)

# 6) Save to CSV 
out_name = "variables_monthly_log.csv"
df_monthly_log.to_csv(out_name, index_label="Date")

# 7) Inspect
print(df_monthly_log.head(5))
print(df_monthly_log.tail(5))
print(f"\nSaved file: {out_name}")
print(f"Columns: {list(df_monthly_log.columns)}")


            EUR_NOK     log_Q  log_d_pi  log_dI_t  log_Brent   log_VIX  \
Date                                                                     
2000-01-31   8.0825  0.708357 -6.310625  0.559616   3.298795  3.216874   
2000-02-29   8.0805  0.707340 -6.310625  0.559616   3.324130  3.151453   
2000-03-31   8.0885  0.708100       NaN  0.559616   3.319451  3.182627   
2000-04-30   8.1475  0.709078 -5.243479  0.693147   3.134189  3.265759   
2000-05-31   8.3050  0.719035       NaN  0.693147   3.323399  3.163363   

            log_StoxEurope  log_SP500  log_OSEBX  log_OBX_Energy  
Date                                                              
2000-01-31        5.888684   7.240263   5.217649        5.040608  
2000-02-29        5.955863   7.219942   5.243861        5.044251  
2000-03-31        5.976605   7.312273   5.238036        5.107913  
2000-04-30        5.972842   7.280993   5.204556        5.047809  
2000-05-31        5.940803   7.258835   5.250177        5.174199  
            