# 1. Imports

In [3]:
from build_model import build_v2g_model, build_v2g_model_multinomial, evaluate_subset
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import itertools
from sklearn.model_selection import cross_val_score

# 2. Model setup

In [4]:
input_variables = ['Q9','Q2','Q8_1','Q8_2','Q8_99']
target_variable = 'Q10_2'  # "I would be interested in installing V2G..."
csv_file = './data/survey_pre_processed_data.csv'

In [5]:
q10_2_categories = [
    "Strongly disagree",
    "Somewhat disagree",
    "Neither agree nor disagree",
    "Somewhat agree",
    "Strongly agree"
    ]

input_variables_names = [
    'How familiar are you with V2G?',
    'How many kilometres do you drive per year?',
    'Owns Solar Panels',
    'Owns Home battery',

]

# 3. Model run

In [6]:
model, X, y, X_test, y_test = build_v2g_model_multinomial(csv_file, input_variables, target_variable, do_normalize=True, test_split_ratio=0.0)

TypeError: build_v2g_model_multinomial() missing 1 required positional argument: 'transformers'

In [7]:
# Suppose 'model' is your fitted logistic regression model
coefs = model.coef_  # shape: (n_classes, n_features)

# If you want more readable row labels, use your Q10_2 category names
row_labels = q10_2_categories  # e.g., ["Strongly disagree", ..., "Strongly agree"]

# Column labels are just your feature names
col_labels = input_variables_names  # e.g., ['Q9','Q2','Q8_1','Q8_2']

plt.figure(figsize=(16, 8))
sns.heatmap(
    coefs,
    annot=True,        # display numerical values in the cells
    cmap="RdBu",     # or another diverging colormap you like
    xticklabels=col_labels,
    yticklabels=row_labels,
    center=0           # ensures 0 is in the middle of the color scale
)
plt.title("Logistic Regression Coefficients Heatmap")
plt.xlabel("Features")
plt.ylabel("Target Classes")
plt.tight_layout()
plt.show()

NameError: name 'model' is not defined

In [None]:
# Now let's systematically try all combinations of these variables
best_acc = 0
best_combo = None

for r in range(1, len(input_variables) + 1):
    for subset in itertools.combinations(input_variables, r):
        mean_cv_acc = evaluate_subset(csv_file, subset, target_variable)
        if mean_cv_acc > best_acc:
            best_acc = mean_cv_acc
            best_combo = subset

print("Best subset:", best_combo)
print("Best subset accuracy:", best_acc)

In [None]:
all_variables = [
    # e.g. EVERY column except those we exclude 
    # or just a subset you're testing
    "Q9", "Q2", "Q8_1", "Q8_2", "Q8_99", # ...
    "Q15_1", "Q15_2", # etc.
]

best_acc = 0
best_combo = None
max_subset_size = 5 

for r in range(1, max_subset_size + 1):
    for subset in itertools.combinations(all_variables, r):
        try:
            acc = evaluate_subset(csv_file, subset, target_variable)
            if acc > best_acc:
                best_acc = acc
                best_combo = subset
        except ValueError as e:
            # Probably "No valid rows found" or some parse error
            # We can skip
            pass

print("Best combo:", best_combo)
print("Best accuracy:", best_acc)