In [1]:
import chardet

with open('Chat.csv', 'rb') as f:
    result = chardet.detect(f.read())

print(result['encoding'])


Windows-1252


## Data Preprocessing  

In [2]:
import pandas as pd
import re
import nltk
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sumy.parsers.plaintext import PlaintextParser
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.tokenizers import Tokenizer
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\subed\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
# import pandas as pd
df = pd.read_csv('Chat.csv', encoding='Windows-1252')



In [4]:
df.head()

Unnamed: 0,Question,Answer,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 86,Unnamed: 87,Unnamed: 88,Unnamed: 89,Unnamed: 90,Unnamed: 91,Unnamed: 92,Unnamed: 93,Unnamed: 94,Answers(411)
0,What are the requirements for voting by absent...,Voters unable to vote in person on Election Da...,,,,,,,,,...,,,,,,,,,,Absentee voting is available if you meet any o...
1,What is the voter registration deadline?,"Primary Election Date is June 7, 2022 (Registe...",,,,,,,,,...,,,,,,,,,,n person registration at the county clerk's of...
2,Where can I cast my vote?,"After registering to vote, your Voter Registra...",,,,,,,,,...,,,,,,,,,,
3,What are the registration qualifications to vote?,Every U.S. citizen who possesses the following...,,,,,,,,,...,,,,,,,,,,
4,How to register by mail to vote?,1. Complete a Mail-In Voter Registration Appli...,,,,,,,,,...,,,,,,,,,,


In [5]:
# Select columns that have a name
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Remove columns that have only NaN values
df = df.dropna(axis=1, how='all')

In [6]:
df.head()

Unnamed: 0,Question,Answer,Answers(411)
0,What are the requirements for voting by absent...,Voters unable to vote in person on Election Da...,Absentee voting is available if you meet any o...
1,What is the voter registration deadline?,"Primary Election Date is June 7, 2022 (Registe...",n person registration at the county clerk's of...
2,Where can I cast my vote?,"After registering to vote, your Voter Registra...",
3,What are the registration qualifications to vote?,Every U.S. citizen who possesses the following...,
4,How to register by mail to vote?,1. Complete a Mail-In Voter Registration Appli...,


In [7]:
def preprocess_text(text):
    # Loading spaCy model
    nlp = spacy.load('en_core_web_sm')

    # Check if text is a string
    if isinstance(text, str):
        # Converting text to lowercase
        text = text.lower()

        # Removing extra whitespaces
        text = ' '.join(text.split())

        # Removing punctuation
        text = re.sub(r'[^\w\s]', '', text)

        # Lemmatization
        doc = nlp(text)
        text = ' '.join([token.lemma_ for token in doc])

    return text


In [8]:
# Preprocessing text data
df['preprocessed_text'] = df['Answer'].apply(preprocess_text)


In [10]:
df['preprocessed_text'] = df['preprocessed_text'].astype(str)


In [9]:
# Saving preprocessed data as a new CSV file
df.to_csv('preprocessed_file.csv', index=False)

## NLP Analysis

In [10]:
df.head()

Unnamed: 0,Question,Answer,Answers(411),preprocessed_text
0,What are the requirements for voting by absent...,Voters unable to vote in person on Election Da...,Absentee voting is available if you meet any o...,voter unable to vote in person on election day...
1,What is the voter registration deadline?,"Primary Election Date is June 7, 2022 (Registe...",n person registration at the county clerk's of...,primary election date be june 7 2022 register ...
2,Where can I cast my vote?,"After registering to vote, your Voter Registra...",,after register to vote your voter registration...
3,What are the registration qualifications to vote?,Every U.S. citizen who possesses the following...,,every us citizen who possess the follow qualif...
4,How to register by mail to vote?,1. Complete a Mail-In Voter Registration Appli...,,1 complete a mailin voter registration applica...


In [11]:
from sumy.summarizers.lex_rank import LexRankSummarizer

summarizer = LexRankSummarizer()


In [12]:
from sumy.parsers.plaintext import PlaintextParser

document = PlaintextParser.from_string(df['preprocessed_text'], Tokenizer("english"))


In [13]:
def get_summary(text, length=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summary = summarizer(parser.document, length)
    summary_text = "\n".join([str(sentence) for sentence in summary])
    return summary_text

In [25]:
df['SummaryOfficial'] = df['Answer'].apply(get_summary)

df['Summary(411)'] = df['Answers(411)'].apply(get_summary)

In [26]:
df.head()

Unnamed: 0,Question,Answer,Answers(411),preprocessed_text,SummaryOfficial,Summary(411)
0,What are the requirements for voting by absent...,Voters unable to vote in person on Election Da...,Absentee voting is available if you meet any o...,voter unable to vote in person on election day...,"Voters, who are required to be at work while t...",The last day to request an absentee ballot is ...
1,What is the voter registration deadline?,"Primary Election Date is June 7, 2022 (Registe...",n person registration at the county clerk's of...,primary election date be june 7 2022 register ...,"Primary Election Date is June 7, 2022 (Registe...",n person registration at the county clerk's of...
2,Where can I cast my vote?,"After registering to vote, your Voter Registra...",,after register to vote your voter registration...,"After registering to vote, your Voter Registra...",
3,What are the registration qualifications to vote?,Every U.S. citizen who possesses the following...,,every us citizen who possess the follow qualif...,Every U.S. citizen who possesses the following...,
4,How to register by mail to vote?,1. Complete a Mail-In Voter Registration Appli...,,1 complete a mailin voter registration applica...,If you do not provide your driverâ€™s license ...,


In [27]:
df['SummaryOfficial'][0]

'Voters, who are required to be at work while the polling places are open on Election Day or will be out of town, must absentee vote in person.\nPlease check with your Circuit or Municipal Clerk to determine if you are entitled to vote by an absentee ballot and to learn the procedures for doing so.\nUOCAVA voters may register to vote using the FPCA until ten days before an election and may receive and return an absentee ballot by mail, email, or fax.'

In [28]:
df['preprocessed_text'][0]

'voter unable to vote in person on election day may be eligible to vote by absentee ballot most absentee voter must appear before the circuit clerk or municipal clerk and absentee vote in person a few category of absentee voter may request a mail ballot voter who be require to be at work while the polling place be open on election day or will be out of town must absentee vote in person absentee voter who be 65 or old have a permanent or temporary physical disability or be temporarily reside outside their county of residence may absentee vote by mail please check with your circuit or municipal clerk to determine if you be entitle to vote by an absentee ballot and to learn the procedure for do so if you know you will vote by absentee ballot you may visit or contact your circuit or municipal clerkâs office within 45 day of the election voter include within the uniform and overseas citizen absentee voting act uocava such as member of the military and overseas citizen may register to vote a

In [29]:
df['Answers(411)'][0]

'Absentee voting is available if you meet any of the criteria below. The last day to request an absentee ballot is 5 days before the election.\xa0 You can return your absentee ballot request form through the mail or in person. Voted ballots must be postmarked by Election Day and received up to 5 days after the election in order to be counted. Contact your local elections office for more information. You are eligible to vote absentee if you are a qualified and registered voter who will be absent from your county of residence on Election Day, or are: A disabled war veteran who is a patient in any hospital and a citizen of Mississippi A citizen of Mississippi temporarily residing outside the territorial limits of the United States and the District of Columbia An employee engaged in interstate transportation A student, teacher or administrator An employee engaged in offshore employment, or as an employee on a vessel or other watercraft An employee, businessperson, professional, tradesman o

In [30]:
df['Summary(411)'][0]

'The last day to request an absentee ballot is 5 days before the election.\nYou are eligible to vote absentee if you are a qualified and registered voter who will be absent from your county of residence on Election Day, or are: A disabled war veteran who is a patient in any hospital and a citizen of Mississippi A citizen of Mississippi temporarily residing outside the territorial limits of the United States and the District of Columbia An employee engaged in interstate transportation A student, teacher or administrator An employee engaged in offshore employment, or as an employee on a vessel or other watercraft An employee, businessperson, professional, tradesman or worker required to be over 50 miles away from the county of residence on election day due to employment A person with a temporary or permanent physical disability 65 years of age or older A parent, spouse or dependent of a person with a temporary or permanent disability hospitalized more than 50 miles from home county and w