# --- Install and Import Required Libraries ---

In [None]:
!pip install scikit-learn pandas numpy

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# --- Load and Explore Dataset ---

In [None]:
print("Loading Dataset...")
from google.colab import files
uploaded = files.upload()

In [None]:
data = pd.read_csv(list(uploaded.keys())[0])
print("Dataset Loaded Successfully!")
print("First 5 rows of the dataset:")
print(data.head())

# --- Data Preprocessing ---

In [None]:
print("\nCleaning and Preprocessing Data...")
# Remove percentage symbols and convert to numeric
data = data.replace('%', '', regex=True).apply(pd.to_numeric, errors='coerce')

# Drop columns with all NaNs and rows with any NaNs
data = data.dropna(axis=1, how='all').dropna(axis=0, how='any')

# Simulate a target column if not present (0 for Democrat, 1 for Republican)
data['Party'] = pd.Series(['Democrat', 'Republican'] * (len(data) // 2 + 1))[:len(data)]

# Encode the target column
label_encoder = LabelEncoder()
data['Party'] = label_encoder.fit_transform(data['Party'])  # 0: Democrat, 1: Republican

# Separate features and target
X = data.drop(columns=['Party'])
y = data['Party']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("Data Preprocessing Complete!")

# --- Split Data ---

In [None]:
print("\nSplitting Data...")
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
print("Train-Test Split Complete!")

# --- Logistic Regression ---

In [None]:
print("\nTraining Logistic Regression Model...")
logistic_model = LogisticRegression(random_state=42, max_iter=500)
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)

print("\nLogistic Regression Results:")
print(confusion_matrix(y_test, y_pred_logistic))
print(classification_report(y_test, y_pred_logistic))

# --- Random Forest Classifier ---

In [None]:
print("\nTraining Random Forest Model...")
rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("\nRandom Forest Results:")
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# --- Visualizing Feature Importances for Random Forest ---

In [None]:
import matplotlib.pyplot as plt

feature_importances = pd.Series(rf_model.feature_importances_, index=data.columns[:-1])
feature_importances = feature_importances.sort_values(ascending=False)

plt.figure(figsize=(10, 6))
feature_importances.head(10).plot(kind='bar')
plt.title('Top 10 Feature Importances (Random Forest)')
plt.ylabel('Importance Score')
plt.xlabel('Features')
plt.xticks(rotation=45)
plt.show()

# Presidential and Turnout





# --- Install Required Libraries ---

In [None]:
!pip install scikit-learn pandas numpy matplotlib seaborn

# --- Import Libraries ---

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
print("Loading Dataset...")
from google.colab import files
uploaded = files.upload()

# --- Preprocessing ---

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data[columns])

# --- Train-Test Split ---

In [None]:
X_train_presidential, X_test_presidential, y_train_presidential, y_test_presidential = train_test_split(
    X_scaled, y_presidential, test_size=0.3, random_state=42
)
X_train_turnout, X_test_turnout, y_train_turnout, y_test_turnout = train_test_split(
    X_scaled, y_turnout, test_size=0.3, random_state=42
)

# --- Logistic Regression for Presidential Prediction ---

In [None]:
logistic_model_presidential = LogisticRegression(random_state=42, max_iter=500)
logistic_model_presidential.fit(X_train_presidential, y_train_presidential)
y_pred_presidential_logistic = logistic_model_presidential.predict(X_test_presidential)

# --- Random Forest for Presidential Prediction --

In [None]:
print("\nTraining Random Forest for Presidential Prediction...")
rf_model_presidential = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model_presidential.fit(X_train_presidential, y_train_presidential)
y_pred_presidential_rf = rf_model_presidential.predict(X_test_presidential)

# --- Logistic Regression for Voter Turnout Prediction ---

In [None]:
print("\nTraining Logistic Regression for Voter Turnout Prediction...")
logistic_model_turnout = LogisticRegression(random_state=42, max_iter=500)
logistic_model_turnout.fit(X_train_turnout, y_train_turnout)
y_pred_turnout_logistic = logistic_model_turnout.predict(X_test_turnout)

# --- Random Forest for Voter Turnout Prediction ---

In [None]:
print("\nTraining Random Forest for Voter Turnout Prediction...")
rf_model_turnout = RandomForestClassifier(random_state=42, n_estimators=100)
rf_model_turnout.fit(X_train_turnout, y_train_turnout)
y_pred_turnout_rf = rf_model_turnout.predict(X_test_turnout)

# --- Evaluation ---

In [None]:
def evaluate_model(y_test, y_pred, model_name, task):
    print(f"\n{model_name} Results for {task}:")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

evaluate_model(y_test_presidential, y_pred_presidential_logistic, "Logistic Regression", "Presidential Prediction")
evaluate_model(y_test_presidential, y_pred_presidential_rf, "Random Forest", "Presidential Prediction")
evaluate_model(y_test_turnout, y_pred_turnout_logistic, "Logistic Regression", "Voter Turnout Prediction")
evaluate_model(y_test_turnout, y_pred_turnout_rf, "Random Forest", "Voter Turnout Prediction")

# --- Visualizing Confusion Matrices ---

In [None]:
def plot_confusion_matrix(y_test, y_pred, title, labels):
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.tight_layout()
    plt.show()

plot_confusion_matrix(y_test_presidential, y_pred_presidential_logistic, "Presidential Logistic Regression CM", ["Democrat", "Republican"])
plot_confusion_matrix(y_test_presidential, y_pred_presidential_rf, "Presidential Random Forest CM", ["Democrat", "Republican"])
plot_confusion_matrix(y_test_turnout, y_pred_turnout_logistic, "Voter Turnout Logistic Regression CM", ["Low", "High"])
plot_confusion_matrix(y_test_turnout, y_pred_turnout_rf, "Voter Turnout Random Forest CM", ["Low", "High"])

# --- Visualizing Metrics ---

In [None]:
def plot_metrics(title, metrics, labels):
    df = pd.DataFrame(metrics, index=labels)
    df.plot(kind='bar', figsize=(8, 5), colormap='viridis')
    plt.title(title)
    plt.ylabel("Score")
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.show()


In [None]:
plot_metrics("Presidential Logistic Regression Metrics", presidential_metrics_logistic, ["Democrat", "Republican"])
plot_metrics("Presidential Random Forest Metrics", presidential_metrics_rf, ["Democrat", "Republican"])
plot_metrics("Voter Turnout Logistic Regression Metrics", turnout_metrics_logistic, ["Low Turnout", "High Turnout"])
plot_metrics("Voter Turnout Random Forest Metrics", turnout_metrics_rf, ["Low Turnout", "High Turnout"])