In [None]:
import requests
import json
import csv
from datetime import datetime
from dateutil import relativedelta
from collections import deque
import plotly.express as px
import pandas as pd

In [None]:
# Open sentiment csv
f = open('review_sentiment_preds.csv')
reader = csv.reader(f)
next(reader)

In [None]:
# Get sentiment for a specific product id and group by month
sentiments = dict()
for line in f:
	s = line.split(",")
	sentiment = int(s[2])
	unix_ts = int(s[0])
	dt = datetime.fromtimestamp(unix_ts)
	my = datetime(dt.year, dt.month, 1)
	if my not in sentiments:
		sentiments[my] = [0, 0]
	sentiments[my][sentiment] += 1

In [None]:
# Find earliest and latest month of a review
smallest_key = min(sentiments, key=lambda x: x)
largest_key = max(sentiments, key=lambda x: x) + relativedelta.relativedelta(months=2)

# Calculate moving average
moving_average = []
months_dq = deque()
pos_sentiments = 0
num_sentiments = 0
curr = smallest_key
# Window size is 5 months
for i in range(5):
	months_dq.append(curr)
	if curr in sentiments:
		curr_sentiment = sentiments[curr]
		pos_sentiments += curr_sentiment[1]
		num_sentiments += curr_sentiment[0] + curr_sentiment[1]
	curr = curr + relativedelta.relativedelta(months=1)

tup = (months_dq[2], pos_sentiments / num_sentiments)
moving_average.append(tup)

while (curr <= largest_key):
	removed_month = months_dq.popleft()
	if removed_month in sentiments:
		removed_sentiment = sentiments[removed_month]
		pos_sentiments -= removed_sentiment[1]
		num_sentiments -= (removed_sentiment[0] + removed_sentiment[1])
	months_dq.append(curr)
	if curr in sentiments:
		curr_sentiment = sentiments[curr]
		pos_sentiments += curr_sentiment[1]
		num_sentiments += curr_sentiment[0] + curr_sentiment[1]
	curr = curr + relativedelta.relativedelta(months=1)
	average_sentiment = 0
  # If no reviews in the window, append previous value
	if (num_sentiments == 0):
		average_sentiment = moving_average[len(moving_average)-1][1]
	else:
		average_sentiment = pos_sentiments / num_sentiments
	tup = (months_dq[2], average_sentiment)
	moving_average.append(tup)

In [None]:
# Visualization
x_list = [t[0] for t in moving_average]
y_list = [t[1] for t in moving_average]
df = pd.DataFrame({'x_data':x_list, 'y_data':y_list})
fig = px.line(df, x='x_data', y='y_data', title="Review Sentiment over Time", markers=True)
fig.show()