In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [18]:
# Generate synthetic social media interactions data and voter registration data
social_media_data = {
    "user_id": range(1, 1001),
    "likes_facebook": np.random.randint(0, 100, size=1000),
    "shares_twitter": np.random.randint(0, 50, size=1000),
    "comments_instagram": np.random.randint(0, 20, size=1000),
}
voter_data = {
    "user_id": range(1, 1001),
    "age": np.random.randint(18, 85, size=1000),
    "gender": np.random.choice(["Male", "Female"], size=1000),
    "party_affiliation": np.random.choice(["Democrat", "Republican", "Independent"], size=1000),
    "voter_support": np.random.choice(["support", "oppose"], size=1000)
}

In [19]:
data = pd.merge(pd.DataFrame(social_media_data), pd.DataFrame(voter_data), on="user_id")

In [20]:
X = data.drop(columns=["user_id", "voter_support"])
y = data["voter_support"]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape, y_train.shape)

(800, 6) (800,)


In [None]:
print(X_train.isnull().sum(), y_train.isnull().sum())


likes_facebook        0
shares_twitter        0
comments_instagram    0
age                   0
gender                0
party_affiliation     0
dtype: int64 0


In [None]:
print(X_train.dtypes)


likes_facebook         int64
shares_twitter         int64
comments_instagram     int64
age                    int64
gender                object
party_affiliation     object
dtype: object


In [None]:
print(X_train.isnull().sum())
print(y_train.isnull().sum())


likes_facebook        0
shares_twitter        0
comments_instagram    0
age                   0
gender                0
party_affiliation     0
dtype: int64
0


In [22]:
X_train_encoded = pd.get_dummies(X_train)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_encoded, y_train)

In [23]:
X_train_encoded = pd.get_dummies(X_train)
X_test_encoded = pd.get_dummies(X_test)

missing_cols = set(X_train_encoded.columns) - set(X_test_encoded.columns)
for col in missing_cols:
    X_test_encoded[col] = 0

X_test_encoded = X_test_encoded[X_train_encoded.columns]

# Make predictions on the test set
y_pred = model.predict(X_test_encoded)


In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.43


In [24]:
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

      oppose       0.60      0.48      0.53       116
     support       0.43      0.55      0.48        84

    accuracy                           0.51       200
   macro avg       0.51      0.52      0.51       200
weighted avg       0.53      0.51      0.51       200

