In [None]:
# Install SpaCy model (needs to be done once per session)
!pip install spacy
!python -m spacy download en_core_web_sm

import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import spacy
from collections import Counter

# Download NLTK VADER lexicon
nltk.download('vader_lexicon')

# Load SpaCy English model
nlp = spacy.load('en_core_web_sm')

data = {
    'PatientID': [1, 2, 3, 4, 5],
    'SurveyResponse': [
        "The staff were very helpful and the waiting time was short.",
        "I felt the doctor was rushed and didn’t explain things clearly.",
        "Excellent service, very satisfied with the treatment.",
        "Long wait times and unorganized process, needs improvement.",
        "Friendly nurses and clean facility."
    ]
}

df = pd.DataFrame(data)

sia = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    score = sia.polarity_scores(text)
    return score['compound']

df['SentimentScore'] = df['SurveyResponse'].apply(analyze_sentiment)

def categorize_sentiment(score):
    if score >= 0.05:
        return 'Positive'
    elif score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

df['SentimentCategory'] = df['SentimentScore'].apply(categorize_sentiment)

def extract_keywords(text):
    doc = nlp(text.lower())
    keywords = [token.lemma_ for token in doc if token.pos_ in ['NOUN', 'ADJ'] and not token.is_stop]
    return keywords

df['Keywords'] = df['SurveyResponse'].apply(extract_keywords)

all_keywords = sum(df['Keywords'].tolist(), [])
keyword_freq = Counter(all_keywords)

print("Sentiment Analysis by Response:")
print(df[['PatientID', 'SurveyResponse', 'SentimentCategory', 'SentimentScore']])
print("\nMost common keywords:")
print(keyword_freq.most_common(10))



Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m105.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Sentiment Analysis by Response:
   PatientID                                     SurveyResponse  \
0          1  The staff were very helpful and the waiting ti...   
1          2  I felt the doctor was rushed and didn’t explai...   
2          3  Excellent service, very satisfied with the tre...   
3          4  Long wait times and unorganized process, needs...   
4          5                Friendly nurses and clean facility.   

  SentimentCategory  SentimentScore  
0          Positive          0.4754  
1          Positive          0.4019  
2          Positive          0.7778  
3          Positive          0.4588  
4          Positive          0.7096  

Most common keywords:
[('time', 2), ('staff', 1), ('helpful', 1), ('waiting', 1), ('short', 1), ('doctor', 1), ('thing', 1), ('excellent', 1), ('service', 1), ('satisfied', 1)]
