In [None]:

import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

# Load the CSV file
df = pd.read_csv('drug.csv')

# Ensure 'condition' column has no missing values
df['condition'] = df['condition'].fillna('Unknown')

# Initialize NLTK's VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Sentiment Analysis Function using VADER
def analyze_sentiment(review):
    """
    Analyze the sentiment of a review using VADER.
    Returns a sentiment polarity score between -1 (negative) and 1 (positive).
    """
    sentiment = sia.polarity_scores(review)
    return sentiment['compound']  # Compound score represents overall sentiment

# Apply Sentiment Analysis to the 'review' column
df['sentiment'] = df['review'].apply(analyze_sentiment)

# Grouping by 'condition' and 'drugName' to find average sentiment scores
grouped = df.groupby(['condition', 'drugName']).agg(
    avg_sentiment=('sentiment', 'mean'),
    review_count=('review', 'count'),
    avg_rating=('rating', 'mean')
).reset_index()

# Find the best drug for each condition
best_drugs = grouped.loc[grouped.groupby('condition')['avg_sentiment'].idxmax()].reset_index(drop=True)

# Sort results by condition for clarity
best_drugs = best_drugs.sort_values(by='condition')

# Save results to a CSV file
output_path = 'best_drugs_by_condition.csv'
best_drugs.to_csv(output_path, index=False)

# Print the top 20 results
def print_top_drugs(best_drugs, limit=20):
    for index, row in best_drugs.head(limit).iterrows():
        print(f"Condition: {row['condition']}")
        print(f"  Best Drug: {row['drugName']}")
        print(f"  Average Sentiment: {row['avg_sentiment']:.2f}")
        print(f"  Average Rating: {row['avg_rating']:.2f}")
        print(f"  Total Reviews: {row['review_count']}")
        print()

print("--- Top 20 Best Drugs by Condition ---")
print_top_drugs(best_drugs, limit=20)

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


--- Top 20 Best Drugs by Condition ---
Condition: 0</span> users found this comment helpful.
  Best Drug: Bepreve
  Average Sentiment: 0.86
  Average Rating: 3.00
  Total Reviews: 1

Condition: 10</span> users found this comment helpful.
  Best Drug: Ambien
  Average Sentiment: 0.84
  Average Rating: 9.00
  Total Reviews: 1

Condition: 110</span> users found this comment helpful.
  Best Drug: Plavix
  Average Sentiment: 0.00
  Average Rating: 9.00
  Total Reviews: 1

Condition: 11</span> users found this comment helpful.
  Best Drug: Saphris
  Average Sentiment: 0.98
  Average Rating: 9.00
  Total Reviews: 1

Condition: 121</span> users found this comment helpful.
  Best Drug: Plavix
  Average Sentiment: 0.63
  Average Rating: 6.00
  Total Reviews: 1

Condition: 123</span> users found this comment helpful.
  Best Drug: BuSpar
  Average Sentiment: 0.79
  Average Rating: 10.00
  Total Reviews: 1

Condition: 12</span> users found this comment helpful.
  Best Drug: Zyprexa
  Average Sentim

In [None]:
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon
nltk.download('vader_lexicon')

# Load the CSV file
df = pd.read_csv('drug.csv')

# Ensure 'condition' column has no missing values
df['condition'] = df['condition'].fillna('Unknown')

# Initialize NLTK's VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Sentiment Analysis Function using VADER
def analyze_sentiment(review):
    """
    Analyze the sentiment of a review using VADER.
    Returns a sentiment polarity score between -1 (negative) and 1 (positive).
    """
    sentiment = sia.polarity_scores(review)
    return sentiment['compound']  # Compound score represents overall sentiment

# Apply Sentiment Analysis to the 'review' column
df['sentiment'] = df['review'].apply(analyze_sentiment)

# Grouping by 'condition' and 'drugName' to find average sentiment scores
grouped = df.groupby(['condition', 'drugName']).agg(
    avg_sentiment=('sentiment', 'mean'),
    review_count=('review', 'count'),
    avg_rating=('rating', 'mean')
).reset_index()

# Find the best drug for each condition
best_drugs = grouped.loc[grouped.groupby('condition')['avg_sentiment'].idxmax()].reset_index(drop=True)

# Sort results by condition for clarity
best_drugs = best_drugs.sort_values(by='condition')

# Save results to a CSV file
output_path = 'best_drugs_by_condition.csv'
best_drugs.to_csv(output_path, index=False)

# Function to find the best drugs for a given condition
def get_best_drug_for_condition(condition):
    """
    This function takes a disease condition as input and returns the best drug(s)
    based on sentiment analysis.
    """
    # Normalize the input condition (case insensitive)
    condition = condition.strip().lower()

    # Filter the data to find the best drugs for the given condition
    best_drug_info = best_drugs[best_drugs['condition'].str.lower() == condition]

    if best_drug_info.empty:
        return f"No data found for the condition: {condition.capitalize()}"

    # If the condition is found, return the details of the best drug
    return best_drug_info[['condition', 'drugName', 'avg_sentiment', 'avg_rating', 'review_count']]

# Example usage: Get the best drug for a specific condition
input_condition = input("Enter the disease condition: ")
best_drug_for_condition = get_best_drug_for_condition(input_condition)

# Display the result
if isinstance(best_drug_for_condition, str):  # If no data found
    print(best_drug_for_condition)
else:
    print("\n--- Best Drug(s) for the Condition ---")
    for index, row in best_drug_for_condition.iterrows():
        print(f"Condition: {row['condition']}")
        print(f"  Best Drug: {row['drugName']}")
        print(f"  Average Sentiment: {row['avg_sentiment']:.2f}")
        print(f"  Average Rating: {row['avg_rating']:.2f}")
        print(f"  Total Reviews: {row['review_count']}")
        print()


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Enter the disease condition: Depression

--- Best Drug(s) for the Condition ---
Condition: Depression
  Best Drug: Luvox CR
  Average Sentiment: 0.95
  Average Rating: 4.00
  Total Reviews: 1



In [None]:
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon
nltk.download('vader_lexicon')

# Load the CSV file
df = pd.read_csv('drug.csv')

# Ensure 'condition' column has no missing values
df['condition'] = df['condition'].fillna('Unknown')

# Initialize NLTK's VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()

# Sentiment Analysis Function using VADER
def analyze_sentiment(review):
    """
    Analyze the sentiment of a review using VADER.
    Returns a sentiment polarity score between -1 (negative) and 1 (positive).
    """
    sentiment = sia.polarity_scores(review)
    return sentiment['compound']  # Compound score represents overall sentiment

# Apply Sentiment Analysis to the 'review' column
df['sentiment'] = df['review'].apply(analyze_sentiment)

# Grouping by 'condition' and 'drugName' to find average sentiment scores
grouped = df.groupby(['condition', 'drugName']).agg(
    avg_sentiment=('sentiment', 'mean'),
    review_count=('review', 'count'),
    avg_rating=('rating', 'mean')
).reset_index()

# Find the best drug for each condition
best_drugs = grouped.loc[grouped.groupby('condition')['avg_sentiment'].idxmax()].reset_index(drop=True)

# Sort results by condition for clarity
best_drugs = best_drugs.sort_values(by='condition')

# Save results to a CSV file
output_path = 'best_drugs_by_condition.csv'
best_drugs.to_csv(output_path, index=False)

# Function to find the best drugs for a given condition
def get_best_drug_for_condition(condition):
    """
    This function takes a disease condition as input and returns the best drug(s)
    based on sentiment analysis.
    """
    # Normalize the input condition (case insensitive)
    condition = condition.strip().lower()

    # Filter the data to find the best drugs for the given condition
    best_drug_info = best_drugs[best_drugs['condition'].str.lower() == condition]

    if best_drug_info.empty:
        return f"No data found for the condition: {condition.capitalize()}"

    # If the condition is found, return the details of the best drug
    return best_drug_info[['condition', 'drugName', 'avg_sentiment', 'avg_rating', 'review_count']]

# Example usage: Get the best drug for a specific condition
input_condition = input("Enter the disease condition: ")
best_drug_for_condition = get_best_drug_for_condition(input_condition)

# Display the result
if isinstance(best_drug_for_condition, str):  # If no data found

    print(best_drug_for_condition)
else:
    print("\n--- Best Drug(s) for the Condition ---")
    for index, row in best_drug_for_condition.iterrows():
        print(f"Condition: {row['condition']}")
        print(f"  Best Drug: {row['drugName']}")
        print(f"  Average Sentiment: {row['avg_sentiment']:.2f}")
        print(f"  Average Rating: {row['avg_rating']:.2f}")
        print(f"  Total Reviews: {row['review_count']}")
        print()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Enter the disease condition: Obesity

--- Best Drug(s) for the Condition ---
Condition: Obesity
  Best Drug: Fastin
  Average Sentiment: 0.80
  Average Rating: 10.00
  Total Reviews: 1

