# Logical Explanation for Below Code:


1. Read the data from an Excel file containing college reviews.

2. Define functions to:
   - Determine the sentiment of a text.
   - Perform topic modeling on a collection of texts.
   - Summarize reviews for a given college.

3. Assign sentiment to each review by applying the sentiment analysis function to the 'Review' column.

4. Assign a topic to each review by performing topic modeling using the 'Review' column.

5. Create a summary table for each college by:
   - Extracting unique college names.
   - For each college, concatenate all reviews into a single summary.

6. Save the results to a new Excel file with two sheets:
   - 'Review_Data_with_Sentiment_and_Topic': Contains the original data with sentiment and topic columns.
   - 'Review_Summary': Contains the summary of reviews for each college.


In [9]:
import pandas as pd
from textblob import TextBlob
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

In [10]:
# Read the data from the Excel file
df = pd.read_excel("/content/Sample_Interview.xlsx")

In [11]:
# Function to determine sentiment
def get_sentiment(text):
    """
    Determine the sentiment of a given text.

    Parameters:
    text (str): The text for which sentiment needs to be determined.

    Returns:
    str: The sentiment of the text ('Positive', 'Neutral', or 'Negative').
    """
    # Create a TextBlob object from the input 'text'
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'Positive'
    elif analysis.sentiment.polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'



In [12]:
# Function to perform topic modeling
def get_topics(texts):
    """
    Perform topic modeling on a collection of texts.

    Parameters:
    texts (list of str): A list of texts to be analyzed.

    Returns:
    tuple: A tuple containing the vectorizer and the LDA model.
    """
    vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
    tf = vectorizer.fit_transform(texts)
    lda_model = LatentDirichletAllocation(n_components=2, random_state=42)
    lda_model.fit(tf)
    return vectorizer, lda_model


In [5]:
# Function to summarize reviews for each college
def summarize_reviews(college_name):
    """
    Summarize reviews for a given college.

    Parameters:
    college_name (str): The name of the college for which reviews need to be summarized.

    Returns:
    str: The summary of reviews for the given college.
    """
    reviews = df[df['college_name'] == college_name]['Review']
    summary = ' '.join(reviews)
    return summary


In [6]:
# Assign sentiment to each review
df['polarity'] = df['Review'].apply(get_sentiment)


In [7]:
# Assign topic to each review
vectorizer, topics_model = get_topics(df['Review'])
df['topic'] = topics_model.transform(vectorizer.transform(df['Review'])).argmax(axis=1)


In [13]:
# Create a summary table for each college
colleges = df['college_name'].unique()
summary_data = {'college': [], 'summary': []}
for college in colleges:
    summary_data['college'].append(college)
    summary_data['summary'].append(summarize_reviews(college))

summary_df = pd.DataFrame(summary_data)


In [None]:
# Save the results to a new Excel file
with pd.ExcelWriter('review_analysis_results.xlsx') as writer:
    df.to_excel(writer, sheet_name='Review_Data_with_Sentiment_and_Topic', index=False)
    summary_df.to_excel(writer, sheet_name='Review_Summary', index=False)


