In [None]:
import requests
import json
import csv
from datetime import datetime
from dateutil import relativedelta
from collections import deque
import plotly.express as px
import pandas as pd

In [None]:
# Open ratings csv
f = open('Electronics.csv')
reader = csv.reader(f)

In [None]:
# Get ratings for a specific product id and group by month
ratings = dict()
for line in f:
	s = line.split(",")
	asin = s[0]
	rating = int(float(s[2]))
	unix_ts = int(s[3])
	dt = datetime.fromtimestamp(unix_ts)
	my = datetime(dt.year, dt.month, 1)
	if asin == "B003L1ZYYW":
		if my not in ratings:
			ratings[my] = [0, 0, 0, 0, 0]
		ratings[my][rating-1] += 1

In [None]:
# Find earliest and latest month of a rating
smallest_key = min(ratings, key=lambda x: x)
largest_key = max(ratings, key=lambda x: x) + relativedelta.relativedelta(months=2)

# Calculate moving average
moving_average = []
months_dq = deque()
sum_ratings = 0
num_ratings = 0
curr = smallest_key
# Window size is 5 months
for i in range(5):
	months_dq.append(curr)
	if curr in ratings:
		curr_rating = ratings[curr]
		sum_ratings += (1*curr_rating[0] + 2*curr_rating[1] + 3*curr_rating[2] + 4*curr_rating[3] + 5*curr_rating[4])
		num_ratings += curr_rating[0] + curr_rating[1] + curr_rating[2] + curr_rating[3] + curr_rating[4]
	curr = curr + relativedelta.relativedelta(months=1)

tup = (months_dq[2], sum_ratings / num_ratings)
moving_average.append(tup)

while (curr <= largest_key):
	removed_month = months_dq.popleft()
	if removed_month in ratings:
		removed_rating = ratings[removed_month]
		sum_ratings -= (1*removed_rating[0] + 2*removed_rating[1] + 3*removed_rating[2] + 4*removed_rating[3] + 5*removed_rating[4])
		num_ratings -= (removed_rating[0] + removed_rating[1] + removed_rating[2] + removed_rating[3] + removed_rating[4])
	months_dq.append(curr)
	if curr in ratings:
		curr_rating = ratings[curr]
		sum_ratings += (1*curr_rating[0] + 2*curr_rating[1] + 3*curr_rating[2] + 4*curr_rating[3] + 5*curr_rating[4])
		num_ratings += curr_rating[0] + curr_rating[1] + curr_rating[2] + curr_rating[3] + curr_rating[4]
	curr = curr + relativedelta.relativedelta(months=1)
	average_rating = 0
  # If no reviews in the window, append previous value
	if (num_ratings == 0):
		average_rating = moving_average[len(moving_average)-1][1]
	else:
		average_rating = sum_ratings / num_ratings
	tup = (months_dq[2], average_rating)
	moving_average.append(tup)

In [None]:
# Visualization
x_list = [t[0] for t in moving_average]
y_list = [t[1] for t in moving_average]
df = pd.DataFrame({'x_data':x_list, 'y_data':y_list})
fig = px.line(df, x='x_data', y='y_data', title="Average Rating over Time", markers=True)
fig.show()