In [80]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression  # f_regression for regression tasks
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error  # for regression evaluation

In [58]:
df = pd.read_csv("../data/featured_data.csv", sep=";")
df_n=pd.read_csv("../data/featured_to_predict.csv", sep=";")

In [59]:
df.drop(columns=['Mnd'], inplace=True)

In [60]:
df_n.head()

Unnamed: 0,Ap_prev,Ap_trend_3,Ap_trend_6,Hoyre_prev,Hoyre_trend_3,Hoyre_trend_6,Frp_prev,Frp_trend_3,Frp_trend_6,SV_prev,...,Venstre_trend_6,MDG_prev,MDG_trend_3,MDG_trend_6,Rodt_prev,Rodt_trend_3,Rodt_trend_6,Andre_prev,Andre_trend_3,Andre_trend_6
0,19.1,-0.8,-2.0,22.2,-1.2,-2.7,24.1,3.8,7.6,8.0,...,-1.1,3.3,-0.2,-0.4,5.6,-0.3,0.2,3.9,0.2,-0.8


In [84]:
predictions=pd.DataFrame(columns=['Ap', 'Hoyre', 'Frp', 'SV', 'Sp', 'KrF', 'Venstre', 'MDG','Rodt', 'Andre'])

In [85]:
for party in ['Ap', 'Hoyre', 'Frp', 'SV', 'Sp', 'KrF', 'Venstre', 'MDG','Rodt', 'Andre']: 
    y=df[party]
    X=df.drop(columns=['Ap', 'Hoyre', 'Frp', 'SV', 'Sp', 'KrF', 'Venstre', 'MDG','Rodt', 'Andre'])

    # Split the data into train and test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


    model_1 = RandomForestRegressor(random_state=42, n_estimators=1000, min_samples_leaf=5)
    model_2 = LinearRegression()
    model_3 = SVR()

    pipeline = Pipeline([
    ('feature_selection', SelectKBest(score_func=f_regression, k=20)),  # Adjust 'k' as needed
    #('model', LinearRegression())
    #('model' , RandomForestRegressor(random_state=42, n_estimators=1000, min_samples_leaf=5))
    ('model', VotingRegressor(estimators=[('rf', model_1), ('lr', model_2), ('svr', model_3)]))
    ])
    # Fit the model
    pipeline.fit(X_train, y_train)

    selector = pipeline.named_steps['feature_selection']

    # Get the selected features (Boolean mask indicating selected features)
    #selected_features = X_train.columns[selector.get_support()]
    #for feature in selected_features:
    #    print(f" selected features for {party}: {feature}")
    
    # Make predictions
    y_pred = pipeline.predict(X_test)

    # Evaluate the model using Mean Squared Error
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error for {party}: {mse}")

    X_new=df_n
    new_pred = pipeline.predict(X_new)
    print(f"new prediction for {party}: {new_pred[0]}")

    predictions[party] = new_pred

Mean Squared Error for Ap: 1.6663362838642743
new prediction for Ap: 18.942644821371392
Mean Squared Error for Hoyre: 3.553310329822058
new prediction for Hoyre: 22.92613125395317
Mean Squared Error for Frp: 1.983941362545797
new prediction for Frp: 18.864199110418426
Mean Squared Error for SV: 0.34543783014157303
new prediction for SV: 8.52397150310833
Mean Squared Error for Sp: 1.3011048114393298
new prediction for Sp: 6.171938556483326
Mean Squared Error for KrF: 0.12342513269986156
new prediction for KrF: 3.4854053969509535
Mean Squared Error for Venstre: 0.16222421032785478
new prediction for Venstre: 4.598771549257662
Mean Squared Error for MDG: 0.04584618021976743
new prediction for MDG: 3.5796016651299354
Mean Squared Error for Rodt: 0.278762956676951
new prediction for Rodt: 5.822205833515077
Mean Squared Error for Andre: 0.19449819738932045
new prediction for Andre: 3.7576462030760265


In [86]:
predictions

Unnamed: 0,Ap,Hoyre,Frp,SV,Sp,KrF,Venstre,MDG,Rodt,Andre
0,18.942645,22.926131,18.864199,8.523972,6.171939,3.485405,4.598772,3.579602,5.822206,3.757646
