In [1]:
import pandas
from math import log
from numpy import timedelta64

import jupynbimp
import review_data_getData 
from review_data_classify import ReviewSentiment

importing Jupyter notebook from review_data_getData.ipynb
importing Jupyter notebook from restaurants_data_cleaning.ipynb
importing Jupyter notebook from review_data_classify.ipynb


In [2]:
def __calculateWeightedStars(stars, count):
    
    # weight star rating by review count    
    try:
        count = int(count)
    except ValueError:
        return nan
    
    if not(1 <= stars <= 5):
        return nan
    
    else:
        normalStars = stars - 3 # 'neutral' rating of 3 normalized to 0
        return normalStars * log(count)

In [3]:
def __calculateSentiment(data, label, text):
        
        # calculate 'sentiment' of reviews (predicted star rating from review text)
        # -see review_data_classify module for model fitting and tuning
        # -fit the tuned model to the entire data and predict the star rating
        
        reviewSentiment = ReviewSentiment(data=data, label=label, text=text)
        
        return reviewSentiment.predictSentiment(ReviewSentiment._OPTIMAL_PARAMETERS)

In [4]:
def __aggregateReviews(data):
    
    data = (data
            .groupby('business_id')
            .aggregate({'business_id':'count',
                        'stars':'mean',
                        'date': lambda g: g.max() - g.min(),
                        'sentiment':'mean'
                       })
            .rename(columns={'business_id':'count',
                             'date':'review_span'
                            }
                   )
           )
    
    data['review_span'] = (data['review_span']
                              .astype('timedelta64[D]')
                              .astype(int)
                             )
    
    data['rating'] = data.apply(lambda row: __calculateWeightedStars(row['stars'], row['count']),axis=1)
    
    return data

In [5]:
def getData():
    return __aggregateReviews(
                __calculateSentiment(label='stars', 
                                     text='text', 
                                     data=review_data_getData.getReviews()
                                    )
    )