In [13]:
!pip install vaderSentiment



In [14]:
# Sentiment Analysis for Book Data
import os
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [15]:
# Debug: confirm working directory
print("Current working directory:")
print(os.getcwd())
print("\nFiles in this directory:")
print(os.listdir())

Current working directory:
/Users/baran/Downloads/data-analytics-portfolio-main/04-sentiment-analysis

Files in this directory:
['books_with_sentiment.csv', 'books_with_descriptions.csv', 'README.md', 'sentiment_analysis.ipynb']


In [16]:
# Load cleaned price data
books = pd.read_csv("../data/cleaned/cleaned_books.csv")

In [17]:
# Normalize column names
books.columns = books.columns.str.lower().str.strip()
print("\nBooks columns:")
print(books.columns)


Books columns:
Index(['title', 'price'], dtype='object')


In [18]:
# Load scraped descriptions
descriptions = pd.read_csv("books_with_descriptions.csv")

In [19]:
# Normalize column names
descriptions.columns = descriptions.columns.str.lower().str.strip()
print("\nDescription columns:")
print(descriptions.columns)


Description columns:
Index(['title', 'description'], dtype='object')


In [20]:
# Ensure description column exists
if "description" not in descriptions.columns:
    raise ValueError("No 'description' column found in descriptions CSV")

In [21]:
# Merge datasets on title
df = books.merge(descriptions, on="title", how="left")
print("\nMerged dataframe columns:")
print(df.columns)
print("\nSample rows:")
print(df.head())


Merged dataframe columns:
Index(['title', 'price', 'description'], dtype='object')

Sample rows:
                                   title  price  \
0                   A Light in the Attic  51.77   
1                     Tipping the Velvet  53.74   
2                             Soumission  50.10   
3                          Sharp Objects  47.82   
4  Sapiens: A Brief History of Humankind  54.23   

                                         description  
0  It's hard to imagine a world without A Light i...  
1  "Erotic and absorbing...Written with starling ...  
2  Dans une France assez proche de la nÃ´tre, un ...  
3  WICKED above her hipbone, GIRL across her hear...  
4  From a renowned historian comes a groundbreaki...  


In [22]:
# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [23]:
# Compute sentiment scores
df["sentiment_score"] = df["description"].apply(
    lambda x: analyzer.polarity_scores(str(x))["compound"]
)

In [24]:
# Label sentiment
df["sentiment_label"] = df["sentiment_score"].apply(
    lambda x: "positive" if x > 0.05 else "negative" if x < -0.05 else "neutral"
)

In [25]:
# Save final dataset
output_path = "books_with_sentiment.csv"
df.to_csv(output_path, index=False)

print(f"\n✅ Saved {output_path}")
print("\nSentiment label counts:")
print(df["sentiment_label"].value_counts())


✅ Saved books_with_sentiment.csv

Sentiment label counts:
sentiment_label
positive    17
negative     3
Name: count, dtype: int64
