### Importing Libraries-


In [1]:

import pandas as pd
from textblob import TextBlob
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from collections import Counter


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Deepesh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Deepesh\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Sentiment Analysis -
This function takes a review as input and uses TextBlob to calculate the sentiment polarity. The polarity score ranges from -1 (negative) to +1 (positive), indicating the sentiment of the review.

In [2]:
def sentiment_analysis(review_text):
    #Evaluate the sentiment polarity of a review using TextBlob
    return TextBlob(review_text).sentiment.polarity


### Topic Extraction-
The function tokenizes the review text into words, removes stopwords (common words that don't contribute much meaning), and then finds the most common words. These common words are considered as the main topics of the review.

In [3]:
def extract_topic(review_text):
    #Extracting the most common topics from a review using NLTK
    stop_words = set(stopwords.words('english'))
    tokens = word_tokenize(review_text.lower())
    relevant_words = [token for token in tokens if token not in stop_words and token.isalpha()]
    top_words = Counter(relevant_words).most_common(3)
    extracted_topics = [word for word, _ in top_words]
    return ', '.join(extracted_topics)

### Summarization-
The reviews are combined into a single text and then summarized using the Latent Semantic Analysis (LSA) algorithm from the sumy library. The sentences_count parameter controls the number of sentences in the summary.


In [4]:
def generate_summary(reviews_list):
    #Generating a summary of reviews for a college using Sumy's LSA summarizer
    combined_reviews = ' '.join(reviews_list)
    parser = PlaintextParser.from_string(combined_reviews, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, sentences_count=3)  # can be adjusted the number of sentences as needed
    return ' '.join([str(sentence) for sentence in summary])

### Processing Data-
It applies the sentiment analysis and topic extraction functions to each review and adds the results as new columns in the DataFrame.
For each college, it generates a summary of all reviews and stores the results in a new DataFrame.
Finally, it saves the updated data and summaries to Excel file

In [5]:
def process_data():
    # Loading the dataset
    data = pd.read_excel('Sample_Interview.xlsx')

    #Applying sentiment analysis and topic extraction
    data['polarity'] = data['Review'].apply(sentiment_analysis)
    data['topic'] = data['Review'].apply(extract_topic)


    # Generating summaries for each college
    summary_data = pd.DataFrame(columns=['college_name', 'review_summary'])
    for college, group in data.groupby('college_name'):
            summary = generate_summary(group['Review'])
            summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)


    # Saving the updated data and summaries
    data.to_excel('Output_Sample_Interview.xlsx', index=False)
    summary_data.to_excel('College_Summary.xlsx', index=False)

    print("Hurray! Tasks completed.")

if __name__ == '__main__':
    process_data()

  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)
  summary_data = summary_data.append({'college_name': college, 'review_summary': summary}, ignore_index=True)


Hurray! Tasks completed.
