In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("anes_timeseries_2020/anes_timeseries_2020_csv_20220210.csv", encoding='latin1')

#don't use any data to train the model where there is no clear answer on the self-reported political ideology
invalid_ideology_values = [-9, -8, 99]
df_valid = df[~df['V201200'].isin(invalid_ideology_values)]


  df = pd.read_csv("anes_timeseries_2020/anes_timeseries_2020_csv_20220210.csv", encoding='latin1')


In [25]:
#turn numbers on the scale into labels to avoid creating a sense of distance between categories
def ideology_label(val):
    if val in [1,2]:
        return "Extremely liberal"
    elif val == 3:
        return "Liberal"
    elif val == 4:
        return "Moderate"
    elif val == 5:
        return "Conservative"
    elif val in [6, 7]:
        return "Extremely conservative"
    else:
        raise Exception("Value is not on the required scale")

Y = df_valid['V201200'].apply(ideology_label)

def simplify_ideology(val):
    if val in ["Extremely liberal", "Liberal"]:
        return "Liberal"
    elif val in ["Extremely conservative", "Conservative"]:
        return "Conservative"
    elif val == "Moderate":
        return "Moderate"
    else:
        return pd.NA

Y = Y.apply(simplify_ideology)

In [26]:
# Prepare all the features
features = [
    # Climate change
    "V202332", "V202333", "V202334",

    # Immigration
    "V202232", "V202233", "V202234", "V202237", "V202240", "V202243",

    # Abortion
    "V201336", "V201337", "V201340",

    # Gun control
    "V202337", "V202339", "V202342", "V202345",

    # Race
    "V202300", "V202487", "V202488",

    # Gender equality
    "V202287", "V202291", "V202292",

    # LGBTQ+ rights
    "V201409", "V201412", "V202533"
]

X = df_valid[features]
missing_values = [-9, -8, -7, -6, -1]
X = X.replace(missing_values, pd.NA)

X = X.dropna()
Y = Y.loc[X.index]  # Align y with filtered X

In [27]:
#Create a classifier with a Random Forest model
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
    

In [28]:
#Split into test and training
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42, stratify=Y
)


#Train model
clf = RandomForestClassifier(
    n_estimators=100,       # Number of trees
    max_depth=None,         # Let trees grow fully
    random_state=42,
    class_weight='balanced'
)
clf.fit(X_train, y_train)

#Evaluate the model
y_pred = clf.predict(X_test)

# Print metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

Conservative       0.77      0.83      0.80       453
     Liberal       0.73      0.79      0.76       434
    Moderate       0.44      0.33      0.38       298

    accuracy                           0.69      1185
   macro avg       0.64      0.65      0.64      1185
weighted avg       0.67      0.69      0.68      1185

Confusion Matrix:
[[377  25  51]
 [ 17 341  76]
 [ 97 102  99]]


In [29]:
#Neural Network Model

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder


label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(Y)

# Scale the features (important for neural networks!)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

mlp = MLPClassifier(
    hidden_layer_sizes=(100, 50),  # Two layers: 100 and 50 neurons
    activation='relu',
    solver='adam',
    max_iter=300,
    random_state=42
)
mlp.fit(X_train, y_train)


y_pred = mlp.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

Conservative       0.71      0.71      0.71       453
     Liberal       0.69      0.72      0.70       434
    Moderate       0.34      0.32      0.33       298

    accuracy                           0.62      1185
   macro avg       0.58      0.58      0.58      1185
weighted avg       0.61      0.62      0.61      1185

Confusion Matrix:
[[323  36  94]
 [ 34 311  89]
 [ 99 104  95]]
