<a href="https://colab.research.google.com/github/fahmizainal17/Algorithm-of-Life/blob/main/Revised_6_12_Data_Cleaning_Gopeng_Survey_IVR_2023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This is data cleaning specifically for IVR

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from datetime import date

"""
since CC labels the folders by date, we'll need to specify a
variable with today's date to get the latest file.
"""

today = date.today()
formatted_date = "/" + today.strftime("%d-%m-%Y").replace("-0", "-")
print("Today's date:", formatted_date)

Today's date: /17-1-2024


# IVR Results

## Import IVR results from folders in Google Drive

In [None]:
import pandas as pd
import numpy as np

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Get and store all .csv files path in `csv_path`
import glob
import re

path = "/content/drive/MyDrive/Clients Deliverables/Gopeng Parliament Survey 2023/Gopeng IVR Raw Result/After 6 Dec/"

df_list = []
phonenum_list = []
total_call_made = []
total_calls_made = []
i = 0

for fname in glob.glob(path + '/*.csv'):
  print(f"Reading in {re.search('Broadcast.*.csv', fname).group()} ..")
  i += 1

  with open(fname, 'r') as f:
    df = pd.read_csv(f, skiprows=1, names=range(24), engine='python')

    # Drop all-empty columns
    df.dropna(axis='columns', how='all', inplace=True)

    # Assign first row as column names
    df.columns = df.iloc[0]

    # Select PhoneNo column and all columns from UserKeyPress onwards
    df_phonenum = df[['PhoneNo']]

    df_response = df.loc[:, 'UserKeyPress':]

    df_results = pd.concat([df_phonenum, df_response], axis='columns')

    # Total Calls made
    total_call_made = len(df_results)
    total_calls_made.append(total_call_made)

    # Drop rows with blank response in the first question only
    phonenum_recycle = df_results.dropna(subset=['UserKeyPress'])

    # Append the participated phone no. into phonenum_list (select PhoneNo column only)
    phonenum_list.append(phonenum_recycle[['PhoneNo']])

    # Drop incomplete rows
    df_complete = df_results.dropna(axis='index')

    # Reset column names for accurate concatenation later
    df_complete.columns = np.arange(len(df_complete.columns))

    # Initialize `Set` column
    df_complete['Set'] = 'IVR'

    # Select all columns from the first one up to `Set`
    df_complete = df_complete.loc[:, :'Set']

    # Filter out key presses that are blank on the 2nd question
    df_complete = df_complete.loc[(df_complete.iloc[:, 2].str.len() == 10)]

    print(f'Total row: {len(df_complete)}')

    # Append the CRs into df_list (to be used later)
    df_list.append(df_complete)

# Combined all participated phone no. stored in phonenum_list
phonenum_combined = pd.concat(phonenum_list, axis='rows')

# Rename column to match with codes in databricks
phonenum_combined.rename(columns={'PhoneNo': 'phonenum'}, inplace=True)

# Inspect result
print('\n')
print(f"Total calls made: {sum(total_calls_made)}")
print(f"Total count of phone no. that need to be excluded in the next sampling: {phonenum_combined.shape[0]}")
print(f"Total files loaded in: {i}")

Reading in Broadcast_List_Report_for_GOPENG NEW NOON (1).csv ..
Total row: 82
Reading in Broadcast_List_Report_for_GOPENG NEW EVENING (1).csv ..
Total row: 86
Reading in Broadcast_List_Report_for_GOPENG NEW NIGHT (1).csv ..
Total row: 90
Reading in Broadcast_List_Report_for_GOPENG NEW NOON (2).csv ..
Total row: 25
Reading in Broadcast_List_Report_for_GOPENG NEW EVENING (2).csv ..
Total row: 29
Reading in Broadcast_List_Report_for_GOPENG NEW NIGHT (2).csv ..
Total row: 19
Reading in Broadcast_List_Report_for_GOPENG NEW NIGHT 3.csv ..
Total row: 24
Reading in Broadcast_List_Report_for_GOPENG NEW EVENING 3.csv ..
Total row: 13
Reading in Broadcast_List_Report_for_GOPENG NEW NOON 3.csv ..
Total row: 23
Reading in Broadcast_List_Report_for_GOPENG NEW NOON 4.csv ..
Total row: 15
Reading in Broadcast_List_Report_for_GOPENG NEW EVENING 4.csv ..
Total row: 20
Reading in Broadcast_List_Report_for_GOPENG NEW NIGHT 4.csv ..
Total row: 24
Reading in Broadcast_List_Report_for_GOPENG NEW NIGHT 5.csv 

In [None]:
# Check duplicated number and drop duplicated number
print(f"Total count of phone numbers: {phonenum_combined.shape[0]}")
dup = phonenum_combined.duplicated().sum()
print("Total duplicated numbers:", dup)
phonenum_combined = phonenum_combined.drop_duplicates()
print("Total numbers after dropping duplicate numbers:", phonenum_combined.shape[0])

Total count of phone numbers: 5635
Total duplicated numbers: 122
Total numbers after dropping duplicate numbers: 5513


# Run below if you want to download and do the Call Sampling again if CC asked you if they had run out of numbers and want new ones

In [None]:
# Export phone no. list as csv to be uploaded into Databricks, then to be excluded in the next sampling
phonenum_combined.to_csv('ivr_Gopeng_survey2023_used_phonenum_v{}.csv'.format(formatted_date.replace('/', '')), index=False)

from google.colab import files
files.download('ivr_Gopeng_survey2023_used_phonenum_v{}.csv'.format(formatted_date.replace('/', '')))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Merge all raw IVR results


In [None]:
df_merge = pd.concat(df_list, axis='index')

print(df_merge.shape)
print('\n')
print(f'IVR count by Set as of {today.strftime("%d-%m-%Y").replace("-0", "-")}')
print(df_merge['Set'].value_counts())

(698, 18)


IVR count by Set as of 16-1-2024
IVR    698
Name: Set, dtype: int64


In [None]:
# Check duplicated number and drop duplicated number
print(f"Total count of responses: {df_merge.shape[0]}")
df_duplicated = df_merge.duplicated().sum()
print("Total duplicated responses:", df_duplicated)
df_merge = df_merge.drop_duplicates()
print("Total numbers after dropping duplicate responses:", df_merge.shape[0])

Total count of responses: 698
Total duplicated responses: 2
Total numbers after dropping duplicate responses: 696


## Data Cleaning & Preprocessing

In [None]:
ivr = df_merge.loc[df_merge['Set'] == 'IVR']
ivr

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,Set
567,60165667672,FlowNo_2=1,FlowNo_3=1,FlowNo_4=3,FlowNo_5=,FlowNo_6=,FlowNo_7=6,FlowNo_8=4,FlowNo_9=5,FlowNo_10=7,FlowNo_11=1,FlowNo_12=5,FlowNo_13=3,FlowNo_14=,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR
891,60135213231,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=2,FlowNo_8=1,FlowNo_9=1,FlowNo_10=2,FlowNo_11=2,FlowNo_12=,FlowNo_13=2,FlowNo_14=2,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR
1569,60174084401,FlowNo_2=1,FlowNo_3=1,FlowNo_4=1,FlowNo_5=1,FlowNo_6=,FlowNo_7=6,FlowNo_8=4,FlowNo_9=5,FlowNo_10=3,FlowNo_11=2,FlowNo_12=,FlowNo_13=1,FlowNo_14=,FlowNo_15=2,FlowNo_16=1,FlowNo_17=1,IVR
1638,60194220619,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=6,FlowNo_8=4,FlowNo_9=5,FlowNo_10=5,FlowNo_11=1,FlowNo_12=5,FlowNo_13=2,FlowNo_14=2,FlowNo_15=2,FlowNo_16=1,FlowNo_17=1,IVR
2227,60195204548,FlowNo_2=1,FlowNo_3=1,FlowNo_4=1,FlowNo_5=1,FlowNo_6=,FlowNo_7=4,FlowNo_8=4,FlowNo_9=2,FlowNo_10=2,FlowNo_11=2,FlowNo_12=,FlowNo_13=1,FlowNo_14=,FlowNo_15=2,FlowNo_16=2,FlowNo_17=1,IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7973,60103716484,FlowNo_2=1,FlowNo_3=2,FlowNo_4=,FlowNo_5=,FlowNo_6=,FlowNo_7=,FlowNo_8=,FlowNo_9=,FlowNo_10=,FlowNo_11=,FlowNo_12=,FlowNo_13=,FlowNo_14=,FlowNo_15=,FlowNo_16=,FlowNo_17=,IVR
6623,60125048204,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=5,FlowNo_8=1,FlowNo_9=5,FlowNo_10=7,FlowNo_11=2,FlowNo_12=,FlowNo_13=2,FlowNo_14=2,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR
9101,60135010115,FlowNo_2=1,FlowNo_3=1,FlowNo_4=1,FlowNo_5=1,FlowNo_6=,FlowNo_7=1,FlowNo_8=1,FlowNo_9=1,FlowNo_10=1,FlowNo_11=1,FlowNo_12=1,FlowNo_13=1,FlowNo_14=,FlowNo_15=1,FlowNo_16=1,FlowNo_17=1,IVR
15022,60165328736,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=5,FlowNo_8=1,FlowNo_9=4,FlowNo_10=4,FlowNo_11=1,FlowNo_12=4,FlowNo_13=2,FlowNo_14=3,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR


In [None]:
ivr.rename(columns={0: 'phonenum',
                    1: 'Did you vote in the Gopeng Parliament?',
                    2: 'Are you of Malay ethnicity?',
                    3: '1. Do you feel that Malaysia is heading towards a better direction?',
                    4: "2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?",
                    5: "2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?",
                    6: '3. What is the main issue in your residential area?',
                    7: '4. Among the following parties, which do you feel is the most positive?',
                    8: '5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?',
                    9: '6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?',
                    10: '7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?',
                    11: "8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?",
                    12: '9. Are you ready to accept a non-Malay as your representative?',
                    13: "10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?",
                    14: 'AgeGroup',
                    15: 'Gender',
                    16: 'IncomeRange'
                    },inplace=True)
ivr

Unnamed: 0,phonenum,Did you vote in the Gopeng Parliament?,Are you of Malay ethnicity?,1. Do you feel that Malaysia is heading towards a better direction?,2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?,2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?,3. What is the main issue in your residential area?,"4. Among the following parties, which do you feel is the most positive?",5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?,9. Are you ready to accept a non-Malay as your representative?,10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeRange,Set
567,60165667672,FlowNo_2=1,FlowNo_3=1,FlowNo_4=3,FlowNo_5=,FlowNo_6=,FlowNo_7=6,FlowNo_8=4,FlowNo_9=5,FlowNo_10=7,FlowNo_11=1,FlowNo_12=5,FlowNo_13=3,FlowNo_14=,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR
891,60135213231,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=2,FlowNo_8=1,FlowNo_9=1,FlowNo_10=2,FlowNo_11=2,FlowNo_12=,FlowNo_13=2,FlowNo_14=2,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR
1569,60174084401,FlowNo_2=1,FlowNo_3=1,FlowNo_4=1,FlowNo_5=1,FlowNo_6=,FlowNo_7=6,FlowNo_8=4,FlowNo_9=5,FlowNo_10=3,FlowNo_11=2,FlowNo_12=,FlowNo_13=1,FlowNo_14=,FlowNo_15=2,FlowNo_16=1,FlowNo_17=1,IVR
1638,60194220619,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=6,FlowNo_8=4,FlowNo_9=5,FlowNo_10=5,FlowNo_11=1,FlowNo_12=5,FlowNo_13=2,FlowNo_14=2,FlowNo_15=2,FlowNo_16=1,FlowNo_17=1,IVR
2227,60195204548,FlowNo_2=1,FlowNo_3=1,FlowNo_4=1,FlowNo_5=1,FlowNo_6=,FlowNo_7=4,FlowNo_8=4,FlowNo_9=2,FlowNo_10=2,FlowNo_11=2,FlowNo_12=,FlowNo_13=1,FlowNo_14=,FlowNo_15=2,FlowNo_16=2,FlowNo_17=1,IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7973,60103716484,FlowNo_2=1,FlowNo_3=2,FlowNo_4=,FlowNo_5=,FlowNo_6=,FlowNo_7=,FlowNo_8=,FlowNo_9=,FlowNo_10=,FlowNo_11=,FlowNo_12=,FlowNo_13=,FlowNo_14=,FlowNo_15=,FlowNo_16=,FlowNo_17=,IVR
6623,60125048204,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=5,FlowNo_8=1,FlowNo_9=5,FlowNo_10=7,FlowNo_11=2,FlowNo_12=,FlowNo_13=2,FlowNo_14=2,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR
9101,60135010115,FlowNo_2=1,FlowNo_3=1,FlowNo_4=1,FlowNo_5=1,FlowNo_6=,FlowNo_7=1,FlowNo_8=1,FlowNo_9=1,FlowNo_10=1,FlowNo_11=1,FlowNo_12=1,FlowNo_13=1,FlowNo_14=,FlowNo_15=1,FlowNo_16=1,FlowNo_17=1,IVR
15022,60165328736,FlowNo_2=1,FlowNo_3=1,FlowNo_4=2,FlowNo_5=,FlowNo_6=2,FlowNo_7=5,FlowNo_8=1,FlowNo_9=4,FlowNo_10=4,FlowNo_11=1,FlowNo_12=4,FlowNo_13=2,FlowNo_14=3,FlowNo_15=4,FlowNo_16=1,FlowNo_17=1,IVR


### Drop irrelevant columns





In [None]:
ivr.drop(columns=['Did you vote in the Gopeng Parliament?','Are you of Malay ethnicity?'], inplace=True)

### Questionnaire map

In [None]:
import numpy as np
# Questions for ivr
q_ivr = {
    'FlowNo_2=': np.nan,
    'FlowNo_2=1': 'Yes',
    'FlowNo_2=2': 'No',
    'FlowNo_3=': np.nan,
    'FlowNo_3=1': 'Yes',
    'FlowNo_3=2': 'No',
    'FlowNo_4=': np.nan,
    'FlowNo_4=1': 'Yes',
    'FlowNo_4=2': 'No',
    'FlowNo_4=3': 'Unsure',
    'FlowNo_5=': np.nan,
    'FlowNo_5=1': 'Political stability is guaranteed',
    'FlowNo_5=2': 'The cost of living is declining',
    'FlowNo_5=3': 'Job opportunities are increasing',
    'FlowNo_5=4': "The country's development is accelerating",
    'FlowNo_5=5': 'Social cohesion is getting stronger',
    'FlowNo_6=': np.nan,
    'FlowNo_6=1': 'Political stability is eroding',
    'FlowNo_6=2': 'The cost of living is rising',
    'FlowNo_6=3': 'Unsatisfactory job opportunities',
    'FlowNo_6=4': "The country's development is slowing down",
    'FlowNo_6=5': 'Social cohesion is eroding',
    'FlowNo_7=': np.nan,
    'FlowNo_7=1': 'Land issues or licensing',
    'FlowNo_7=2': 'Crimes',
    'FlowNo_7=3': 'Infrastructure situation',
    'FlowNo_7=4': 'Hygiene',
    'FlowNo_7=5': 'Others',
    'FlowNo_7=6': 'No issues',
    'FlowNo_8=': np.nan,
    'FlowNo_8=1': 'PN',
    'FlowNo_8=2': 'BN',
    'FlowNo_8=3': 'PH',
    'FlowNo_8=4': 'About the same',
    'FlowNo_9=': np.nan,
    'FlowNo_9=1': 'This week',
    'FlowNo_9=2': 'This month',
    'FlowNo_9=3': 'Last month',
    'FlowNo_9=4': 'More than a month',
    'FlowNo_9=5': 'Never heard or seen',
    'FlowNo_10=':np.nan,
    'FlowNo_10=1':'AMANAH',
    'FlowNo_10=2':'BERSATU',
    'FlowNo_10=3':'DAP',
    'FlowNo_10=4':'PAS',
    'FlowNo_10=5':'PKR',
    'FlowNo_10=6':'UMNO',
    'FlowNo_10=7':'Unsure',
    'FlowNo_11=':np.nan,
    'FlowNo_11=1':'Yes',
    'FlowNo_11=2':'No',
    'FlowNo_11=3':'Unsure',
    'FlowNo_12=':np.nan,
    'FlowNo_12=1':'Moral issues such as the increasing spread of LGBT or deviant teachings',
    'FlowNo_12=2':'Non-Malay communities getting richer, while the Malay community is becoming poorer',
    'FlowNo_12=3':'The influence of non-Malays in the government being too high',
    'FlowNo_12=4':'Malay leaders in the government being too weak',
    'FlowNo_12=5':'The lack of new policies to strengthen Malay-Islamic institutions',
    'FlowNo_13=':np.nan,
    'FlowNo_13=1':'Yes',
    'FlowNo_13=2':'No',
    'FlowNo_13=3':'Unsure',
    'FlowNo_14=':np.nan,
    'FlowNo_14=1':'Non-Malays will always prioritize the issues of non-Malays',
    'FlowNo_14=2':'Non-Malays do not understand the problems and challenges of Malays',
    'FlowNo_14=3':'Non-Malays are not sincere when helping Malays',
    'FlowNo_14=4':'Non-Malays are unable to participate in Malay-Islamic ceremonies',
    'FlowNo_14=5':'Non-Malays are too arrogant',
    'FlowNo_15=':np.nan,
    'FlowNo_15=1':'18-24 (Gen Z)',
    'FlowNo_15=2':'25-40 (Millenial)',
    'FlowNo_15=3':'41-56 (Gen X)',
    'FlowNo_15=4':'57+ (Baby Boomers)',
    'FlowNo_16=':np.nan,
    'FlowNo_16=1':'Male',
    'FlowNo_16=2':'Female',
    'FlowNo_17=':np.nan,
    'FlowNo_17=1':'RM4,850 & below',
    'FlowNo_17=2':'RM4,851 to RM10,960',
    'FlowNo_17=3':'RM10,961 to RM15,039',
    'FlowNo_17=4':'RM15,040 & above'
}


In [None]:
ivr.replace(q_ivr, inplace=True)

In [None]:
ivr["AgeGroup"].unique()

array(['57+ (Baby Boomers)', '25-40 (Millenial)', '41-56 (Gen X)', nan,
       '18-24 (Gen Z)'], dtype=object)

## Data Quality Checking

In [None]:
# Check unique value
sets_dict = {'ivr': ivr}
for name, x in sets_dict.items():
  for val in x.columns:
    if val != 'phonenum':
      print(x[val].unique())
  print(f"------------------{name}-------------------------")
  print("")

['Unsure' 'No' 'Yes' nan]
[nan 'Political stability is guaranteed' 'The cost of living is declining'
 'Social cohesion is getting stronger' 'Job opportunities are increasing'
 "The country's development is accelerating"]
[nan 'The cost of living is rising' 'Unsatisfactory job opportunities'
 'Social cohesion is eroding' 'Political stability is eroding'
 "The country's development is slowing down"]
['No issues' 'Crimes' 'Hygiene' 'Others' nan 'Land issues or licensing'
 'Infrastructure situation']
['About the same' 'PN' 'BN' nan 'PH']
['Never heard or seen' 'This week' 'This month' nan 'More than a month'
 'Last month']
['Unsure' 'BERSATU' 'DAP' 'PKR' nan 'AMANAH' 'UMNO' 'PAS']
['Yes' 'No' nan 'Unsure']
['The lack of new policies to strengthen Malay-Islamic institutions' nan
 'The influence of non-Malays in the government being too high'
 'Malay leaders in the government being too weak'
 'Moral issues such as the increasing spread of LGBT or deviant teachings'
 'Non-Malay communities ge

In [None]:
# Check duplicated response and drop duplicated response
print(f"Total count of responses: {ivr.shape[0]}")
df_duplicated = ivr.duplicated().sum()
print("Total duplicated response:", df_duplicated)
ivr_merge = ivr.drop_duplicates()
print("Total numbers after dropping duplicate numbers:", ivr.shape[0])

Total count of responses: 696
Total duplicated response: 0
Total numbers after dropping duplicate numbers: 696


In [None]:
# Progress Check: Raw CR count per for IVR
len(ivr)

696

In [None]:
ivr

Unnamed: 0,phonenum,1. Do you feel that Malaysia is heading towards a better direction?,2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?,2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?,3. What is the main issue in your residential area?,"4. Among the following parties, which do you feel is the most positive?",5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?,9. Are you ready to accept a non-Malay as your representative?,10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeRange,Set
567,60165667672,Unsure,,,No issues,About the same,Never heard or seen,Unsure,Yes,The lack of new policies to strengthen Malay-I...,Unsure,,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
891,60135213231,No,,The cost of living is rising,Crimes,PN,This week,BERSATU,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
1569,60174084401,Yes,Political stability is guaranteed,,No issues,About the same,Never heard or seen,DAP,No,,Yes,,25-40 (Millenial),Male,"RM4,850 & below",IVR
1638,60194220619,No,,The cost of living is rising,No issues,About the same,Never heard or seen,PKR,Yes,The lack of new policies to strengthen Malay-I...,No,Non-Malays do not understand the problems and ...,25-40 (Millenial),Male,"RM4,850 & below",IVR
2227,60195204548,Yes,Political stability is guaranteed,,Hygiene,About the same,This month,BERSATU,No,,Yes,,25-40 (Millenial),Female,"RM4,850 & below",IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7973,60103716484,,,,,,,,,,,,,,,IVR
6623,60125048204,No,,The cost of living is rising,Others,PN,Never heard or seen,Unsure,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
9101,60135010115,Yes,Political stability is guaranteed,,Land issues or licensing,PN,This week,AMANAH,Yes,Moral issues such as the increasing spread of ...,Yes,,18-24 (Gen Z),Male,"RM4,850 & below",IVR
15022,60165328736,No,,The cost of living is rising,Others,PN,More than a month,PAS,Yes,Malay leaders in the government being too weak,No,Non-Malays are not sincere when helping Malays,57+ (Baby Boomers),Male,"RM4,850 & below",IVR


# For columns without skip logic, we check for incomplete columns

In [None]:
# For columns without skip logic, we check for incomplete columns
print(f'Before dropping: {len(ivr)} rows')

columns_to_exclude = [
    "2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?",
    "2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?",
    "8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?",
    "10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?"
]

# Create a list of columns to check for null values
columns_to_check = [col for col in ivr.columns if col not in columns_to_exclude]

ivr.dropna(subset=columns_to_check, inplace=True)

print(f'After dropping: {len(ivr)} rows')

Before dropping: 696 rows
After dropping: 506 rows


In [None]:
ivr

Unnamed: 0,phonenum,1. Do you feel that Malaysia is heading towards a better direction?,2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?,2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?,3. What is the main issue in your residential area?,"4. Among the following parties, which do you feel is the most positive?",5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?,9. Are you ready to accept a non-Malay as your representative?,10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeRange,Set
567,60165667672,Unsure,,,No issues,About the same,Never heard or seen,Unsure,Yes,The lack of new policies to strengthen Malay-I...,Unsure,,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
891,60135213231,No,,The cost of living is rising,Crimes,PN,This week,BERSATU,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
1569,60174084401,Yes,Political stability is guaranteed,,No issues,About the same,Never heard or seen,DAP,No,,Yes,,25-40 (Millenial),Male,"RM4,850 & below",IVR
1638,60194220619,No,,The cost of living is rising,No issues,About the same,Never heard or seen,PKR,Yes,The lack of new policies to strengthen Malay-I...,No,Non-Malays do not understand the problems and ...,25-40 (Millenial),Male,"RM4,850 & below",IVR
2227,60195204548,Yes,Political stability is guaranteed,,Hygiene,About the same,This month,BERSATU,No,,Yes,,25-40 (Millenial),Female,"RM4,850 & below",IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11379,60175738159,Yes,Political stability is guaranteed,,Land issues or licensing,About the same,Never heard or seen,Unsure,Yes,"Non-Malay communities getting richer, while th...",Unsure,,41-56 (Gen X),Female,"RM4,850 & below",IVR
6623,60125048204,No,,The cost of living is rising,Others,PN,Never heard or seen,Unsure,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
9101,60135010115,Yes,Political stability is guaranteed,,Land issues or licensing,PN,This week,AMANAH,Yes,Moral issues such as the increasing spread of ...,Yes,,18-24 (Gen Z),Male,"RM4,850 & below",IVR
15022,60165328736,No,,The cost of living is rising,Others,PN,More than a month,PAS,Yes,Malay leaders in the government being too weak,No,Non-Malays are not sincere when helping Malays,57+ (Baby Boomers),Male,"RM4,850 & below",IVR


In [None]:
# Check unique value
sets_dict = {'ivr': ivr}
for name, x in sets_dict.items():
  for val in x.columns:
    if val != 'phonenum':
      print(x[val].unique())
  print(f"------------------{name}-------------------------")
  print("")

['Unsure' 'No' 'Yes']
[nan 'Political stability is guaranteed' 'The cost of living is declining'
 'Social cohesion is getting stronger' 'Job opportunities are increasing'
 "The country's development is accelerating"]
[nan 'The cost of living is rising' 'Unsatisfactory job opportunities'
 'Social cohesion is eroding' 'Political stability is eroding'
 "The country's development is slowing down"]
['No issues' 'Crimes' 'Hygiene' 'Others' 'Land issues or licensing'
 'Infrastructure situation']
['About the same' 'PN' 'BN' 'PH']
['Never heard or seen' 'This week' 'This month' 'More than a month'
 'Last month']
['Unsure' 'BERSATU' 'DAP' 'PKR' 'AMANAH' 'UMNO' 'PAS']
['Yes' 'No' 'Unsure']
['The lack of new policies to strengthen Malay-Islamic institutions' nan
 'The influence of non-Malays in the government being too high'
 'Malay leaders in the government being too weak'
 'Moral issues such as the increasing spread of LGBT or deviant teachings'
 'Non-Malay communities getting richer, while the 

In [None]:
ivr

Unnamed: 0,phonenum,1. Do you feel that Malaysia is heading towards a better direction?,2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?,2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?,3. What is the main issue in your residential area?,"4. Among the following parties, which do you feel is the most positive?",5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?,9. Are you ready to accept a non-Malay as your representative?,10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeRange,Set
567,60165667672,Unsure,,,No issues,About the same,Never heard or seen,Unsure,Yes,The lack of new policies to strengthen Malay-I...,Unsure,,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
891,60135213231,No,,The cost of living is rising,Crimes,PN,This week,BERSATU,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
1569,60174084401,Yes,Political stability is guaranteed,,No issues,About the same,Never heard or seen,DAP,No,,Yes,,25-40 (Millenial),Male,"RM4,850 & below",IVR
1638,60194220619,No,,The cost of living is rising,No issues,About the same,Never heard or seen,PKR,Yes,The lack of new policies to strengthen Malay-I...,No,Non-Malays do not understand the problems and ...,25-40 (Millenial),Male,"RM4,850 & below",IVR
2227,60195204548,Yes,Political stability is guaranteed,,Hygiene,About the same,This month,BERSATU,No,,Yes,,25-40 (Millenial),Female,"RM4,850 & below",IVR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11379,60175738159,Yes,Political stability is guaranteed,,Land issues or licensing,About the same,Never heard or seen,Unsure,Yes,"Non-Malay communities getting richer, while th...",Unsure,,41-56 (Gen X),Female,"RM4,850 & below",IVR
6623,60125048204,No,,The cost of living is rising,Others,PN,Never heard or seen,Unsure,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR
9101,60135010115,Yes,Political stability is guaranteed,,Land issues or licensing,PN,This week,AMANAH,Yes,Moral issues such as the increasing spread of ...,Yes,,18-24 (Gen Z),Male,"RM4,850 & below",IVR
15022,60165328736,No,,The cost of living is rising,Others,PN,More than a month,PAS,Yes,Malay leaders in the government being too weak,No,Non-Malays are not sincere when helping Malays,57+ (Baby Boomers),Male,"RM4,850 & below",IVR


In [None]:
ivr['IncomeRange'].unique()
ivr["AgeGroup"].unique()

array(['57+ (Baby Boomers)', '25-40 (Millenial)', '41-56 (Gen X)',
       '18-24 (Gen Z)'], dtype=object)

## Create Classification of Income Group

In [None]:
def classify_income(income):
    if income == 'RM4,850 & below':
        return 'B40'
    elif income == 'RM4,851 to RM10,960':
        return 'M40'
    elif income in ['RM10,961 to RM15,039', 'RM15,040 & above']:
        return 'T20'

ivr['IncomeGroup'] = ivr['IncomeRange'].apply(classify_income)

In [None]:
print(f'Null values before dropping:\n{ivr.isnull().sum()}')
print(f'\n\n Before dropping : \n\n{ivr.value_counts()}')
ivr.dropna()
print(f'\n\n After dropping : \n\n {ivr.value_counts()}')
print(f'\n\n Null values after dropping:\n{ivr.isnull().sum()}')
ivr


Null values before dropping:
phonenum                                                                                                                            0
1. Do you feel that Malaysia is heading towards a better direction?                                                                 0
2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?                                                          291
2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?                                                           346
3. What is the main issue in your residential area?                                                                                 0
4. Among the following parties, which do you feel is the most positive?                                                             0
5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?                     0
6. Based on your knowledge, which

Unnamed: 0,phonenum,1. Do you feel that Malaysia is heading towards a better direction?,2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?,2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?,3. What is the main issue in your residential area?,"4. Among the following parties, which do you feel is the most positive?",5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?,9. Are you ready to accept a non-Malay as your representative?,10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeRange,Set,IncomeGroup
567,60165667672,Unsure,,,No issues,About the same,Never heard or seen,Unsure,Yes,The lack of new policies to strengthen Malay-I...,Unsure,,57+ (Baby Boomers),Male,"RM4,850 & below",IVR,B40
891,60135213231,No,,The cost of living is rising,Crimes,PN,This week,BERSATU,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR,B40
1569,60174084401,Yes,Political stability is guaranteed,,No issues,About the same,Never heard or seen,DAP,No,,Yes,,25-40 (Millenial),Male,"RM4,850 & below",IVR,B40
1638,60194220619,No,,The cost of living is rising,No issues,About the same,Never heard or seen,PKR,Yes,The lack of new policies to strengthen Malay-I...,No,Non-Malays do not understand the problems and ...,25-40 (Millenial),Male,"RM4,850 & below",IVR,B40
2227,60195204548,Yes,Political stability is guaranteed,,Hygiene,About the same,This month,BERSATU,No,,Yes,,25-40 (Millenial),Female,"RM4,850 & below",IVR,B40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11379,60175738159,Yes,Political stability is guaranteed,,Land issues or licensing,About the same,Never heard or seen,Unsure,Yes,"Non-Malay communities getting richer, while th...",Unsure,,41-56 (Gen X),Female,"RM4,850 & below",IVR,B40
6623,60125048204,No,,The cost of living is rising,Others,PN,Never heard or seen,Unsure,No,,No,Non-Malays do not understand the problems and ...,57+ (Baby Boomers),Male,"RM4,850 & below",IVR,B40
9101,60135010115,Yes,Political stability is guaranteed,,Land issues or licensing,PN,This week,AMANAH,Yes,Moral issues such as the increasing spread of ...,Yes,,18-24 (Gen Z),Male,"RM4,850 & below",IVR,B40
15022,60165328736,No,,The cost of living is rising,Others,PN,More than a month,PAS,Yes,Malay leaders in the government being too weak,No,Non-Malays are not sincere when helping Malays,57+ (Baby Boomers),Male,"RM4,850 & below",IVR,B40


# Combine before & after 6/12 IVR Results

In [None]:
df_before = pd.read_excel('/content/drive/MyDrive/Clients Deliverables/Gopeng Parliament Survey 2023/Gopeng_Cleaned_Result/Before 6 Dec/Before6DecNew_IVR_Gopeng_survey_cleanedfiles.xlsx')
df_before.head()

Unnamed: 0,phonenum,1. Do you feel that Malaysia is heading towards a better direction?,2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?,2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?,3. What is the main issue in your residential area?,"4. Among the following parties, which do you feel is the most positive?",5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?,9. Are you ready to accept a non-Malay as your representative?,10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeRange,IncomeGroup,Set
0,60193094069,Yes,Political stability is guaranteed,,Land issues or licensing,About the same,This week,PAS,Yes,Moral issues such as the increasing spread of ...,Yes,,18-24 (Gen Z),Male,"RM4,850 & below",B40,IVR
1,60136283022,Unsure,,,Infrastructure situation,About the same,More than a month,DAP,Yes,Malay leaders in the government being too weak,Yes,,25-40 (Millenial),Female,"RM4,850 & below",B40,IVR
2,60135840510,No,,The cost of living is rising,Others,PN,More than a month,DAP,Yes,Malay leaders in the government being too weak,Unsure,,41-56 (Gen X),Male,"RM15,040 & above",T20,IVR
3,60165537125,No,,The country's development is slowing down,Others,PN,Never heard or seen,UMNO,Yes,Malay leaders in the government being too weak,No,Non-Malays are not sincere when helping Malays,41-56 (Gen X),Male,"RM4,851 to RM10,960",M40,IVR
4,60125202498,No,,The cost of living is rising,Others,PN,Never heard or seen,Unsure,Yes,Malay leaders in the government being too weak,Yes,,57+ (Baby Boomers),Male,"RM10,961 to RM15,039",T20,IVR


In [None]:
# Combine both before and after 6/12 IVR results
all_results = ivr.append(df_before)

In [None]:
len(all_results), all_results.shape

(604, (604, 17))

In [None]:
all_results.columns

Index(['phonenum',
       '1. Do you feel that Malaysia is heading towards a better direction?',
       '2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?',
       '2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?',
       '3. What is the main issue in your residential area?',
       '4. Among the following parties, which do you feel is the most positive?',
       '5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?',
       '6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?',
       '7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?',
       '8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?',
       '9. Are you ready to accept a non-Malay as your representative?',
       '10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?',
  

In [None]:
all_results['Set'].value_counts()

IVR    604
Name: Set, dtype: int64

## Data Quality checking

In [None]:
all_results.columns

Index(['phonenum',
       '1. Do you feel that Malaysia is heading towards a better direction?',
       '2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?',
       '2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?',
       '3. What is the main issue in your residential area?',
       '4. Among the following parties, which do you feel is the most positive?',
       '5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?',
       '6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?',
       '7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?',
       '8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?',
       '9. Are you ready to accept a non-Malay as your representative?',
       '10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?',
  

In [None]:
reorder_col = [
    'phonenum',
       '1. Do you feel that Malaysia is heading towards a better direction?',
       "2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?",
       "2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?",
       '3. What is the main issue in your residential area?',
       '4. Among the following parties, which do you feel is the most positive?',
       '5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?',
       '6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?',
       '7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?',
       "8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?",
       '9. Are you ready to accept a non-Malay as your representative?',
       "10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?",
       'AgeGroup', 'Gender', 'IncomeRange', 'IncomeGroup', 'Set'
]

all_results = all_results[reorder_col]

In [None]:
# final sanity check on all columns
for col in all_results.columns:
  print(all_results[col].value_counts())
  print("\n")

60124998730    2
60176655765    2
60165667672    1
60199161390    1
60165506233    1
              ..
60164164500    1
60132683816    1
60174230023    1
60125875527    1
60197282190    1
Name: phonenum, Length: 602, dtype: int64


Yes       253
No        194
Unsure    157
Name: 1. Do you feel that Malaysia is heading towards a better direction?, dtype: int64


Political stability is guaranteed            139
The country's development is accelerating     45
Social cohesion is getting stronger           33
The cost of living is declining               27
Job opportunities are increasing               9
Name: 2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?, dtype: int64


The cost of living is rising                 121
Political stability is eroding                32
Social cohesion is eroding                    17
Unsatisfactory job opportunities              13
The country's development is slowing down     11
Name: 2b. [IF 'NO' FOR QUESTION 1] What is the main 

In [None]:
all_results = all_results.drop_duplicates(subset='phonenum')
all_results['phonenum'].value_counts()

60165667672    1
60132512508    1
60194110400    1
60135080219    1
60199161390    1
              ..
60195304787    1
60164164500    1
60132683816    1
60174230023    1
60197282190    1
Name: phonenum, Length: 602, dtype: int64

In [None]:
# # Drop phonenum that answer the survey 2 times.
# all_results = all_results[~((all_results['phonenum'] == '60124998730') & all_results.duplicated('phonenum', keep='first'))]
# all_results['phonenum'].value_counts()

In [None]:
all_results['Set'].value_counts()







IVR    602
Name: Set, dtype: int64

## Export `Combined`  before and after 6/12 Set For Weighting at Databricks


In [None]:
all_results.to_excel('11Jan24_ivr_Gopeng_survey_all_sets_combined_v{}.xlsx'.format(formatted_date.replace('/', '')), index=False) #sebenarnya nak letak version bagi sama ngan latest date

from google.colab import files
files.download('11Jan24_ivr_Gopeng_survey_all_sets_combined_v{}.xlsx'.format(formatted_date.replace('/', '')))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Final formatting before Crosstab : We have to do weighting at Databricks integrated with the ER_Stats

In [None]:
import pandas as pd
import numpy as np
from datetime import date

# Adding this code chunk to track progress by date.
today = date.today()
formatted_date = "/" + today.strftime("%d-%m-%Y").replace("-0", "-")
print("Today's date:", formatted_date)

Today's date: /18-12-2023


In [None]:
from google.colab import files
files.upload()

Saving GopengParliament-Survey2023-all-sets-392CR-weighted.csv to GopengParliament-Survey2023-all-sets-392CR-weighted.csv


{'GopengParliament-Survey2023-all-sets-392CR-weighted.csv': b'phonenum,1_ Do you feel that Malaysia is heading towards a better direction?,2a_ [IF \'YES\' FOR QUESTION 1] What is the main reason you feel that way?,2b_ [IF \'NO\' FOR QUESTION 1] What is the main reason you feel that way?,3_ What is the main issue in your residential area?,"4_ Among the following parties, which do you feel is the most positive?",5_ When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?,"6_ Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?",7_ Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?,8_ [IF \'YES\' TO QUESTION 7] What is the main reason you feel that way?,9_ Are you ready to accept a non-Malay as your representative?,10_ [IF \'NO\' TO QUESTION 9] What is the main reason you feel that way?,AgeGroup,Gender,IncomeGroup,IncomeClass,Set,untr

In [None]:
weighted_file = [
 'GopengParliament-Survey2023-all-sets-392CR-weighted.csv'
]

In [None]:
# Loop thru all weighted files and combine
weighted_list = []

for f in weighted_file:
  df = pd.read_csv(f)
  weighted_list.append(df)

weighted_results = pd.concat(weighted_list, axis='index')

In [None]:
# Change _ in the column names into .
for col in weighted_results.columns:
  if col not in ['untrimmed_weight', 'trimmed_weight']:
    new_col_name = col.replace('_', '.')
    weighted_results.rename(columns={col: new_col_name}, inplace=True)

In [None]:
weighted_results.columns

Index(['phonenum',
       '1. Do you feel that Malaysia is heading towards a better direction?',
       '2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?',
       '2b. [IF 'NO' FOR QUESTION 1] What is the main reason you feel that way?',
       '3. What is the main issue in your residential area?',
       '4. Among the following parties, which do you feel is the most positive?',
       '5. When was the last time you heard or saw news about your Member of Parliament or the parliamentary machinery?',
       '6. Based on your knowledge, which political party does the representative for the Gopeng parliamentary constituency come from?',
       '7. Do you feel that the rights of Malay-Muslims are currently under threat in Malaysia?',
       '8. [IF 'YES' TO QUESTION 7] What is the main reason you feel that way?',
       '9. Are you ready to accept a non-Malay as your representative?',
       '10. [IF 'NO' TO QUESTION 9] What is the main reason you feel that way?',
  

In [None]:
weighted_results['untrimmed_weight'].describe()

count    392.000000
mean       1.000000
std        1.373285
min        0.149254
25%        0.158732
50%        0.362129
75%        1.452062
max        9.245614
Name: untrimmed_weight, dtype: float64

In [None]:
# final sanity check on all columns
for col in weighted_results.columns:
  print(weighted_results[col].value_counts())
  print("\n")

60195628425     2
601126564245    2
60165537125     2
60189518350     2
60175846806     2
               ..
60195789972     1
60132330922     1
60185799762     1
60175785097     1
60197282190     1
Name: phonenum, Length: 384, dtype: int64


Yes       164
No        128
Unsure    100
Name: 1. Do you feel that Malaysia is heading towards a better direction?, dtype: int64


Political stability is guaranteed            91
The country's development is accelerating    30
Social cohesion is getting stronger          21
The cost of living is declining              15
Job opportunities are increasing              7
Name: 2a. [IF 'YES' FOR QUESTION 1] What is the main reason you feel that way?, dtype: int64


The cost of living is rising                 76
Political stability is eroding               22
Social cohesion is eroding                   15
Unsatisfactory job opportunities              8
The country's development is slowing down     7
Name: 2b. [IF 'NO' FOR QUESTION 1] What is the main

In [None]:
# Export to be run thru Crosstab Generator
weighted_results.to_excel('Gopeng-Survey2023-weighted-formatted_v{}.xlsx'.format(formatted_date.replace('/', '')), index=False)

from google.colab import files
files.download('Gopeng-Survey2023-weighted-formatted_v{}.xlsx'.format(formatted_date.replace('/', '')))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>