In [8]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import seaborn as sns

In [None]:
def contains_special_characters(cell):
    if isinstance(cell, str):
        return '?' in cell or '/' in cell
    return False

In [None]:
df = pd.read_csv("Voting_Outcomes_and_Campaign_Expenditures.csv")
test = df['State']
df.drop(["State"],axis=1,inplace = True)

f, ax = plt.subplots(figsize=(30, 25))
mat = df.corr('spearman')
mask = np.triu(np.ones_like(mat, dtype=bool))
cmap = sns.diverging_palette(230, 20, as_cmap=True)
sns.heatmap(mat, mask=mask, cmap=cmap, vmax=1, center=0,annot = True, 
linewidths=.5, cbar_kws={"shrink": .5})
plt.title('Correlation Map')
plt.show()

In [None]:
df_original = pd.read_csv("Voting_Outcomes_and_Campaign_Expenditures.csv")
df = df_original.copy()

df.drop(["State"], axis=1, inplace=True)
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
df["State"] = df_original.loc[df.index, "State"]

#print(df)

In [None]:
df_encoded = pd.get_dummies(df, columns=['State'])
df_encoded = df_encoded.astype(float)
#print(df_encoded)

In [None]:
print(df_encoded.describe())

In [None]:
df_encoded = df_encoded.drop_duplicates()
#print(df_encoded)

In [None]:
contains_characters = df_encoded.applymap(contains_special_characters)
any_special_characters = contains_characters.any().any()

if any_special_characters:
    print("there is some data missing")
else:
    print("all the dataset is complete")

In [None]:
df.hist(figsize = (35,30), bins = 50 )
plt.show()

In [None]:
plt.scatter(df_encoded['expandA'], df_encoded['voteA'])
plt.show()

plt.scatter(df_encoded['expandB'], df_encoded['voteA'])
plt.show()

plt.scatter(df_encoded['shareA'], df_encoded['voteA'])
plt.show()

In [None]:
y = df_encoded['voteA']
x = df_encoded.drop(columns=['voteA'])

model = sm.OLS(y,x)
results = model.fit()
print(results.summary())

In [None]:
y = df_encoded['voteA']
x = df_encoded[['lexpendA','prtystrA','democA']]

model = sm.OLS(y,x)
results = model.fit()
print(results.summary())

In [None]:
y = df_encoded['voteA']
x = df_encoded[['lexpendA','expandA']]

model = sm.OLS(y,x)
results = model.fit()
print(results.summary())