<h1>Aggregate Sentiment & Merge with Sales</h1>

<h2>Load Datasets</h2>

In [10]:
import pandas as pd

# Load sentiment-labeled reviews
reviews = pd.read_csv("/Users/udaymadhusudhan/Downloads/amazon_reviews_with_sentiment.csv")

# Load cleaned Walmart sales data
sales = pd.read_csv("/Users/udaymadhusudhan/Downloads/cleaned_walmart_sales.csv")


<h2>Prepare Reviews for Monthly Sentiment Aggregation
</h2>

In [11]:
# Convert date column to datetime if needed
reviews['date'] = pd.to_datetime(reviews['date'])

# Create a Year-Month column
reviews['YearMonth'] = reviews['date'].dt.to_period('M').astype(str)

# Count each sentiment per month
sentiment_counts = reviews.groupby(['YearMonth', 'sentiment']).size().unstack(fill_value=0)

# Total reviews per month
sentiment_counts['Total_Reviews'] = sentiment_counts.sum(axis=1)

# Calculate sentiment proportions
for sentiment in ['Positive', 'Neutral', 'Negative']:
    sentiment_counts[f'{sentiment}_Ratio'] = sentiment_counts[sentiment] / sentiment_counts['Total_Reviews']

# Reset index for merge
sentiment_counts.reset_index(inplace=True)

# Preview
sentiment_counts.head()


sentiment,YearMonth,Negative,Neutral,Positive,Total_Reviews,Positive_Ratio,Neutral_Ratio,Negative_Ratio
0,1999-10,0,0,2,2,1.0,0.0,0.0
1,1999-12,0,0,4,4,1.0,0.0,0.0
2,2000-01,0,1,7,8,0.875,0.125,0.0
3,2000-02,0,0,3,3,1.0,0.0,0.0
4,2000-06,3,0,2,5,0.4,0.0,0.6


<h2>Prepare Walmart Sales for Monthly Aggregation</h2>

In [12]:
# Group Walmart data by month
monthly_sales = sales.groupby('YearMonth')['Weekly_Sales'].sum().reset_index()

# Preview
monthly_sales.head()


Unnamed: 0,YearMonth,Weekly_Sales
0,2010-02,190333000.0
1,2010-03,181919800.0
2,2010-04,231412400.0
3,2010-05,186710900.0
4,2010-06,192246200.0


<h2>Merge Sales with Sentiment</h2>

In [13]:
# Merge on YearMonth
combined = pd.merge(monthly_sales, sentiment_counts, on='YearMonth', how='inner')

# Preview merged dataset
combined.head()


Unnamed: 0,YearMonth,Weekly_Sales,Negative,Neutral,Positive,Total_Reviews,Positive_Ratio,Neutral_Ratio,Negative_Ratio
0,2010-02,190333000.0,248,1099,4972,6319,0.786833,0.17392,0.039247
1,2010-03,181919800.0,247,1222,4926,6395,0.770289,0.191087,0.038624
2,2010-04,231412400.0,243,1020,4423,5686,0.777875,0.179388,0.042737
3,2010-05,186710900.0,231,1085,4797,6113,0.784721,0.177491,0.037788
4,2010-06,192246200.0,218,1077,4339,5634,0.770146,0.191161,0.038694


<h2>Save Merged Dataset</h2>

In [14]:
combined.to_csv("/Users/udaymadhusudhan/Downloads/merged_sentiment_sales.csv", index=False)
