In [53]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, median_absolute_error
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.api import VAR
from statsmodels.tsa.statespace.varmax import VARMAX
from scipy.stats import norm
from statsmodels.tsa.stattools import grangercausalitytests
from sklearn.metrics import r2_score
import unicodedata
from neuralprophet import NeuralProphet

In [54]:
# Laste poll-of-polls data
#df = pd.read_csv("../data/all_polls.csv", sep=",", encoding='ISO-8859-1')
#df= pd.read_csv("https://github.com/jensmorten/onesixtynine/blob/JM/data/all_polls.csv", sep=",", encoding='ISO-8859-1')
df = pd.read_csv("https://raw.githubusercontent.com/jensmorten/onesixtynine/JM/data/all_polls.csv", sep=",", encoding='ISO-8859-1')

In [55]:
# Convert to datetime and set the date to the end of the month
df["Dato"] = pd.to_datetime(df["Dato"])

In [56]:
# Sort values and set index
df = df.sort_values("Dato")
df.set_index("Dato", inplace=True)

In [57]:
df_en=df[["Ap","Høyre","Frp","SV","Sp","KrF","Venstre","MDG","Rødt", "Andre"]]

In [58]:
df_test=df_en
df_test=df_test.reset_index()

In [59]:
df_test["rank"] = df_test.groupby(df_test["Dato"].dt.date).cumcount()

In [60]:
times = pd.to_timedelta([0, 4, 8, 12, 16, 18, 22], unit="h")
df_test["DateTime"] = df_test["Dato"].dt.normalize() + df_test["rank"].map(dict(zip(range(7), times)))

In [61]:
df_test= df_test.drop(columns=["Dato", "rank"]).set_index("DateTime").sort_index()

In [62]:
full_idx = pd.date_range(df_test.index.min(), df_test.index.max(), freq="6h")
df_test = df_test.reindex(full_idx).bfill()

In [63]:
#df_fixed = expand_and_impute(df_en, resample_freq="6H")
df_fixed=df_test.dropna()
# Quick check
print("Any NaNs left?", df_fixed.isna().sum().sum())
print("Row sums sample:", df_fixed.sum(axis=1).head(10).values)

Any NaNs left? 0
Row sums sample: [97.3 96.8 96.8 96.8 96.8 96.7 96.7 96.7 96.7 96.7]


In [64]:
df=df_fixed

In [65]:
df = df.reset_index().rename(columns={"index": "ds"})  # move index to ds column

In [66]:
print(df.columns.tolist())

['ds', 'Ap', 'Høyre', 'Frp', 'SV', 'Sp', 'KrF', 'Venstre', 'MDG', 'Rødt', 'Andre']


In [78]:
df.head(20)

Unnamed: 0,ds,Ap,Høyre,Frp,SV,Sp,KrF,Venstre,MDG,Rødt,Andre
0,2008-01-09 00:00:00,32.7,14.1,23.2,7.1,6.4,7.8,6.0,0.0,0.0,0.0
1,2008-01-09 06:00:00,30.7,16.2,23.4,8.1,5.4,5.6,7.4,0.0,0.0,0.0
2,2008-01-09 12:00:00,30.7,16.2,23.4,8.1,5.4,5.6,7.4,0.0,0.0,0.0
3,2008-01-09 18:00:00,30.7,16.2,23.4,8.1,5.4,5.6,7.4,0.0,0.0,0.0
4,2008-01-10 00:00:00,30.7,16.2,23.4,8.1,5.4,5.6,7.4,0.0,0.0,0.0
5,2008-01-10 06:00:00,29.5,17.7,25.6,6.4,5.0,6.4,6.1,0.0,0.0,0.0
6,2008-01-10 12:00:00,29.5,17.7,25.6,6.4,5.0,6.4,6.1,0.0,0.0,0.0
7,2008-01-10 18:00:00,29.5,17.7,25.6,6.4,5.0,6.4,6.1,0.0,0.0,0.0
8,2008-01-11 00:00:00,29.5,17.7,25.6,6.4,5.0,6.4,6.1,0.0,0.0,0.0
9,2008-01-11 06:00:00,29.5,17.7,25.6,6.4,5.0,6.4,6.1,0.0,0.0,0.0


In [68]:
# NeuralProphet forecasts storage
forecasts = {}

In [79]:
# df is your dataframe
df = df.copy()
df['ds'] = pd.to_datetime(df['ds'])  # ensure datetime

# Remove leading/trailing spaces in column names
df.columns = df.columns.str.strip()

# Store forecasts
forecasts = {}

# Loop over each party
for target in [c for c in df.columns if c != 'ds']:
    data = df[['ds', target]].rename(columns={target: 'y'})
    
    # Initialize NeuralProphet
    m = NeuralProphet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
    
    # Add other parties as future regressors
    for reg in df.columns:
        if reg not in ['ds', target]:
            m.add_future_regressor(name=reg)
    
    # Fit model with 6H frequency
    m.fit(data, freq='6H', progress='bar')
    
    # Make future dataframe (30 future timestamps = 7.5 days)
    future = m.make_future_dataframe(data, periods=30, n_historic_predictions=True)
    
    # Add regressors to future
    for reg in df.columns:
        if reg not in ['ds', target]:
            # Use last known values for the future
            future[reg] = df[reg].iloc[-len(future):].values
    
    # Predict
    forecast = m.predict(future)
    forecasts[target] = forecast

# Example: show last 5 rows of Ap forecast
print(forecasts['Ap'][['ds', 'yhat1']].tail())




KeyError: 'Høyre'