<a href="https://colab.research.google.com/github/deborahchang/projects/blob/Campaign-Financing-on-Election-Outcomes/baseline_and_logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

drive.mount('/content/drive')

dem_candidates_path = '/content/drive/MyDrive/144Project/dem_candidates.csv'
rep_candidates_path = '/content/drive/MyDrive/144Project/rep_candidates.csv'
lobbyist_bundle_path = '/content/drive/MyDrive/144Project/lobbyist_bundle.csv'

dem_candidates = pd.read_csv(dem_candidates_path)
rep_candidates = pd.read_csv(rep_candidates_path, encoding='latin-1')
lobbyist_bundle = pd.read_csv(lobbyist_bundle_path)

lobbyist_bundle['Total_Contribution'] = lobbyist_bundle.filter(like='Contribution').sum(axis=1)

merged_dem = pd.merge(dem_candidates, lobbyist_bundle[['Committee_Election_State', 'Total_Contribution']], left_on='State', right_on='Committee_Election_State', how='left')
merged_rep = pd.merge(rep_candidates, lobbyist_bundle[['Committee_Election_State', 'Total_Contribution']], left_on='State', right_on='Committee_Election_State', how='left')


merged_dem.drop('Committee_Election_State', axis=1, inplace=True)
merged_rep.drop('Committee_Election_State', axis=1, inplace=True)

combined_candidates = pd.concat([merged_dem, merged_rep], axis=0)


selected_features = ['Partisan Lean', 'Party Support?', 'Total_Contribution', 'Won Primary']
final_data = combined_candidates[selected_features]

final_data.fillna(0, inplace=True)

yes_no_mapping = {'Yes': 1, 'No': 0}
final_data = final_data.applymap(lambda x: yes_no_mapping.get(x, x))

X = final_data.drop(['Won Primary'], axis=1)
y = final_data['Won Primary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Baseline Model
baseline_model = DummyClassifier(strategy="stratified", random_state=42)
baseline_model.fit(X_train, y_train)
y_baseline_pred = baseline_model.predict(X_test)

# Baseline model evaluation
print('Baseline Model:')
print(f'Accuracy: {accuracy_score(y_test, y_baseline_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_baseline_pred)}')
print(f'Classification Report:\n{classification_report(y_test, y_baseline_pred)}')

# Logistic Regression Model
log_reg_model = LogisticRegression(random_state=42)
log_reg_model.fit(X_train, y_train)
y_log_reg_pred = log_reg_model.predict(X_test)

# LR model evaluation
print('\nLogistic Regression Model:')
print(f'Accuracy: {accuracy_score(y_test, y_log_reg_pred)}')
print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_log_reg_pred)}')
print(f'Classification Report:\n{classification_report(y_test, y_log_reg_pred)}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Baseline Model:
Accuracy: 0.5712906784335355
Confusion Matrix:
[[3416 1547]
 [1562  727]]
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.69      0.69      4963
           1       0.32      0.32      0.32      2289

    accuracy                           0.57      7252
   macro avg       0.50      0.50      0.50      7252
weighted avg       0.57      0.57      0.57      7252


Logistic Regression Model:
Accuracy: 0.6770546056260341
Confusion Matrix:
[[4878   85]
 [2257   32]]
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.98      0.81      4963
           1       0.27      0.01      0.03      2289

    accuracy                           0.68      7252
   macro avg       0.48      0.50      0.42      7252
weighted avg       0.55      0.68 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_data.fillna(0, inplace=True)
