<a href="https://colab.research.google.com/github/viga132/peojectgit/blob/main/brain%20injury%20senser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Brain Injury Symptom Extraction from Reddit

import praw
import pandas as pd
import re
import spacy
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import seaborn as sns

# Download NLTK data
nltk.download('vader_lexicon')

# Initialize sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Reddit API credentials (replace with your own)
reddit = praw.Reddit(
    client_id='YOUR_CLIENT_ID',
    client_secret='YOUR_CLIENT_SECRET',
    user_agent='brain_injury_nlp_project'
)

# Parameters
subreddits = ['BrainInjury', 'traumaticbraininjury', 'AskDocs']
num_posts = 100

def clean_text(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = text.lower()
    return text

# Collect posts
data = []
for sub in subreddits:
    subreddit = reddit.subreddit(sub)
    for post in subreddit.hot(limit=num_posts):
        if post.selftext:
            cleaned = clean_text(post.selftext)
            sentiment_scores = sia.polarity_scores(cleaned)
            compound = sentiment_scores['compound']
            if compound >= 0.05:
                sentiment_label = 'positive'
            elif compound <= -0.05:
                sentiment_label = 'negative'
            else:
                sentiment_label = 'neutral'

            data.append({
                'subreddit': sub,
                'title': post.title,
                'text': post.selftext,
                'cleaned_text': cleaned,
                'sentiment': compound,
                'positive': sentiment_scores['pos'],
                'neutral': sentiment_scores['neu'],
                'negative': sentiment_scores['neg'],
                'sentiment_label': sentiment_label
            })

# Create DataFrame
df = pd.DataFrame(data)

# Extract symptom-like terms using SpaCy (simplified example)
symptom_keywords = ['headache', 'dizzy', 'nausea', 'confusion', 'fatigue', 'memory', 'blurred', 'speech', 'balance']
def extract_symptoms(text):
    return [word for word in symptom_keywords if word in text]

df['symptoms'] = df['cleaned_text'].apply(extract_symptoms)

# Visualize most common symptoms
all_symptoms = sum(df['symptoms'], [])
symptom_counts = pd.Series(all_symptoms).value_counts()
plt.figure(figsize=(10,5))
symptom_counts.plot(kind='bar')
plt.title('Most Common Symptoms Mentioned')
plt.xlabel('Symptom')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

# Generate Word Cloud
text_blob = ' '.join(df['cleaned_text'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_blob)
plt.figure(figsize=(10,5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Posts')
plt.show()

# Sentiment Distribution
plt.figure(figsize=(8,5))
sns.histplot(df['sentiment'], bins=20, kde=True)
plt.title('Distribution of Sentiment Scores')
plt.xlabel('Compound Sentiment Score')
plt.ylabel('Number of Posts')
plt.tight_layout()
plt.show()

# Sentiment Label Distribution
plt.figure(figsize=(6,4))
sns.countplot(x='sentiment_label', data=df, palette='Set2')
plt.title('Distribution of Sentiment Labels')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.tight_layout()
plt.show()

# Save data
df.to_csv('reddit_brain_injury_posts.csv', index=False)
print("Data collection and analysis complete.")


ModuleNotFoundError: No module named 'praw'