In [13]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# import profile report
from ydata_profiling import ProfileReport

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
filepath = '../data/ElectionSurvey.csv'
df = pd.read_csv(filepath, encoding='latin-1')

In [12]:
df.head()

Unnamed: 0,Timestamp,Age,Occupation,Did you vote in the last election?,"(If No, ask this question)\n\nWhy didnt you vote?",Why did you vote for your candidate?,Which party do you belong to?,Why do you think the NPP parliamentary candidate lost in the election?,Did you know about Frederick Obeng Adoms projects as MP before the election?,Do you think Frederick Obeng Adom solved the communitys problems?,Why do you think Emmanuel Drah (NDC) won?,What did the NDC do better than the NPP?,Which of the former MPs would you say was the best performing MP for the constituency?,Community name:,What are the biggest problems in your community? (Pick 2 or more),Who addressed these issues better during the election?,Suggestions for the team that wants to win in 2028,Do you think the election results were fair and reflect the peoples choice?
0,12/23/2024 16:55,,Trader,No,I don't know anything about it,,,,,,,,,,,,,
1,12/23/2024 16:59,29.0,Food vendor,No,,,,People were not happy with the NPP government ...,,,The NDCs promises were better,,,,,,,
2,12/23/2024 17:00,26.0,Food vendor,Yes,,I support their party,NDC,He didnt do enough for the community,"I heard, but they didn't help me","Yes, he did most of the work needed",He was more visible and approachable,They made stronger promises,Assifu Bekoe,,Bad roads,Emmanuel Drah (NDC),Provision of employment,Yes
3,12/23/2024 17:00,55.0,,Yes,,I support their party,NPP,He didnt do enough for the community,"Yes, I knew about them","Yes, he did most of the work needed",He was more visible and approachable,They were better at reaching people,Frederick Adom,,"Bad roads, Jobs",Neither,,Dont know
4,12/23/2024 17:01,53.0,Tailor,No,His card was missing,,Cant tell,Cant tell,"No, I didn't know about them","He tried, but it wasn't enough",Cant tell,Cant tell,Frederick Adom,,,,,Dont know


In [18]:
# use profile report to get a quick overview of the data
profile = ProfileReport(df, title='Election Survey Profile Report', explorative=True)
# profile.to_file('ElectionSurveyProfileReport.html')

# PREVIEW PROFILE REPORT IN NOTEBOOK
profile.to_notebook_iframe()

Summarize dataset:  22%|██▏       | 5/23 [00:00<00:04,  4.11it/s, Describe variable:(If No, ask this question)                                           

Summarize dataset: 100%|██████████| 29/29 [00:04<00:00,  6.83it/s, Completed]                                                                                               
Generate report structure: 100%|██████████| 1/1 [00:08<00:00,  8.41s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]


In [19]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data with the correct encoding
data = pd.read_csv('../data/ElectionSurvey.csv', encoding='latin1')

# Display the first few rows of the data
print(data.head())

# Data Cleaning
# Rename columns for easier access
data.columns = [
    'Timestamp', 'Age', 'Occupation', 'Voted_Last_Election', 'Reason_Not_Voted',
    'Reason_Voted_Candidate', 'Party_Belong', 'Reason_NPP_Lost', 'Know_Projects',
    'Solved_Community_Problems', 'Reason_NDC_Won', 'NDC_Better_Than_NPP',
    'Best_Performing_MP', 'Community_Name', 'Biggest_Problems', 'Addressed_Issues_Better',
    'Suggestions_2028', 'Fair_Election_Results'
]

# Convert 'Timestamp' to datetime
data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')

# Convert 'Age' to numeric, forcing errors to NaN
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')

# Fill missing values in 'Occupation' with 'Unknown'
data['Occupation'].fillna('Unknown', inplace=True)

# Fill missing values in categorical columns with 'Not Specified'
categorical_columns = [
    'Voted_Last_Election', 'Reason_Not_Voted', 'Reason_Voted_Candidate', 'Party_Belong',
    'Reason_NPP_Lost', 'Know_Projects', 'Solved_Community_Problems', 'Reason_NDC_Won',
    'NDC_Better_Than_NPP', 'Best_Performing_MP', 'Community_Name', 'Biggest_Problems',
    'Addressed_Issues_Better', 'Suggestions_2028', 'Fair_Election_Results'
]
data[categorical_columns] = data[categorical_columns].fillna('Not Specified')

# Analysis
# Descriptive Statistics
print(data.describe(include='all'))

# Inferential Statistics
# Example: Chi-Square Test for Independence between 'Voted_Last_Election' and 'Party_Belong'
from scipy.stats import chi2_contingency

contingency_table = pd.crosstab(data['Voted_Last_Election'], data['Party_Belong'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f"Chi-Square Test: chi2={chi2}, p-value={p}")

# Visualization
# Age Distribution
plt.figure(figsize=(10, 6))
data['Age'].hist(bins=20)
plt.title('Age Distribution of Respondents')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

# Voting Pattern
plt.figure(figsize=(10, 6))
data['Voted_Last_Election'].value_counts().plot(kind='bar')
plt.title('Voting Pattern in Last Election')
plt.xlabel('Voted')
plt.ylabel('Frequency')
plt.show()

# Party Affiliation
plt.figure(figsize=(10, 6))
data['Party_Belong'].value_counts().plot(kind='bar')
plt.title('Party Affiliation of Respondents')
plt.xlabel('Party')
plt.ylabel('Frequency')
plt.show()

# Save cleaned data to a new CSV file
data.to_csv('../data/Cleaned_ElectionSurvey.csv', index=False)

          Timestamp   Age   Occupation Did you vote in the last election?  \
0  12/23/2024 16:55   NaN      Trader                                  No   
1  12/23/2024 16:59  29.0  Food vendor                                 No   
2  12/23/2024 17:00  26.0  Food vendor                                Yes   
3  12/23/2024 17:00  55.0          NaN                                Yes   
4  12/23/2024 17:01  53.0       Tailor                                 No   

  (If No, ask this question)\n\nWhy didnt you vote?   \
0                    I don't know anything about it       
1                                                NaN      
2                                                NaN      
3                                                NaN      
4                              His card was missing       

  Why did you vote for your candidate?   Which party do you belong to?  \
0                                    NaN                           NaN   
1                                 

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Occupation'].fillna('Unknown', inplace=True)
  plt.show()
  plt.show()
  plt.show()


In [21]:
df = data.copy()

In [22]:
df.head()

Unnamed: 0,Timestamp,Age,Occupation,Voted_Last_Election,Reason_Not_Voted,Reason_Voted_Candidate,Party_Belong,Reason_NPP_Lost,Know_Projects,Solved_Community_Problems,Reason_NDC_Won,NDC_Better_Than_NPP,Best_Performing_MP,Community_Name,Biggest_Problems,Addressed_Issues_Better,Suggestions_2028,Fair_Election_Results
0,2024-12-23 16:55:00,,Trader,No,I don't know anything about it,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified
1,2024-12-23 16:59:00,29.0,Food vendor,No,Not Specified,Not Specified,Not Specified,People were not happy with the NPP government ...,Not Specified,Not Specified,The NDCs promises were better,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified
2,2024-12-23 17:00:00,26.0,Food vendor,Yes,Not Specified,I support their party,NDC,He didnt do enough for the community,"I heard, but they didn't help me","Yes, he did most of the work needed",He was more visible and approachable,They made stronger promises,Assifu Bekoe,Not Specified,Bad roads,Emmanuel Drah (NDC),Provision of employment,Yes
3,2024-12-23 17:00:00,55.0,Unknown,Yes,Not Specified,I support their party,NPP,He didnt do enough for the community,"Yes, I knew about them","Yes, he did most of the work needed",He was more visible and approachable,They were better at reaching people,Frederick Adom,Not Specified,"Bad roads, Jobs",Neither,Not Specified,Dont know
4,2024-12-23 17:01:00,53.0,Tailor,No,His card was missing,Not Specified,Cant tell,Cant tell,"No, I didn't know about them","He tried, but it wasn't enough",Cant tell,Cant tell,Frederick Adom,Not Specified,Not Specified,Not Specified,Not Specified,Dont know


In [23]:
df_processed = df.dropna()
df_processed.shape

(2733, 18)

In [24]:
df.shape

(3454, 18)

In [None]:
df_processed

In [25]:
# MAKE A DATAFRAME OF THE NA RECORDS
na_records = df[df.isna()]

In [27]:
na_records.shape

(3454, 18)

In [28]:
df_processed

Unnamed: 0,Timestamp,Age,Occupation,Voted_Last_Election,Reason_Not_Voted,Reason_Voted_Candidate,Party_Belong,Reason_NPP_Lost,Know_Projects,Solved_Community_Problems,Reason_NDC_Won,NDC_Better_Than_NPP,Best_Performing_MP,Community_Name,Biggest_Problems,Addressed_Issues_Better,Suggestions_2028,Fair_Election_Results
1,2024-12-23 16:59:00,29.0,Food vendor,No,Not Specified,Not Specified,Not Specified,People were not happy with the NPP government ...,Not Specified,Not Specified,The NDCs promises were better,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified,Not Specified
2,2024-12-23 17:00:00,26.0,Food vendor,Yes,Not Specified,I support their party,NDC,He didnt do enough for the community,"I heard, but they didn't help me","Yes, he did most of the work needed",He was more visible and approachable,They made stronger promises,Assifu Bekoe,Not Specified,Bad roads,Emmanuel Drah (NDC),Provision of employment,Yes
3,2024-12-23 17:00:00,55.0,Unknown,Yes,Not Specified,I support their party,NPP,He didnt do enough for the community,"Yes, I knew about them","Yes, he did most of the work needed",He was more visible and approachable,They were better at reaching people,Frederick Adom,Not Specified,"Bad roads, Jobs",Neither,Not Specified,Dont know
4,2024-12-23 17:01:00,53.0,Tailor,No,His card was missing,Not Specified,Cant tell,Cant tell,"No, I didn't know about them","He tried, but it wasn't enough",Cant tell,Cant tell,Frederick Adom,Not Specified,Not Specified,Not Specified,Not Specified,Dont know
5,2024-12-23 17:02:00,62.0,Pensioner,Yes,Not Specified,I liked their projects or promises,NPP,People were not happy with the NPP government ...,"Yes, I knew about them","Yes, he did most of the work needed",People were not happy with the NPP government,People didn't like NPP,Frederick Adom,Not Specified,Schools and education,Frederick Obeng Adom (NPP),Everyone should vote,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3449,2024-12-30 21:33:00,32.0,Driver,Yes,Not Specified,I liked their projects or promises,NPP,Team didnt support,"Yes, I knew about them","Yes, he did most of the work needed",,,Frederick Adom,Odumkyere darmang,"Clean water, Lavatories and network",Frederick Obeng Adom (NPP),Job creation and more infrastructure,Yes
3450,2024-12-30 21:34:00,45.0,Trader,Yes,Not Specified,I support their party,NDC,People vote for NDC here,"Yes, I knew about them","Yes, he did most of the work needed",People were not happy with the NPP government,They made stronger promises,Frederick Adom,Atimatim,"Bad roads, Jobs, Lavatory , poor connection",Frederick Obeng Adom (NPP),Good policies,Yes
3451,2024-12-30 21:37:00,35.0,Farmer,Yes,Not Specified,I liked their projects or promises,NPP,,"Yes, I knew about them","Yes, he did most of the work needed",,,Frederick Adom,Odumkyere darmang,"Jobs, Health Services, Clean water, Lavatories...",Frederick Obeng Adom (NPP),Network fixation and more infrastructure and j...,Yes
3452,2024-12-30 21:37:00,33.0,Farmer,Yes,Not Specified,"I liked their projects or promises, They were ...",NDC,People were not happy with the NPP government ...,"I heard, but they didn't help me","He tried, but it wasn't enough","He was more visible and approachable, The NDC...","They were better at reaching people, They made...",Frederick Adom,Odumkyere Darmang,"Bad roads, Jobs, Clean water",Emmanuel Drah (NDC),Not Specified,Yes
