In [1]:
import os
import pandas as pd
from flask import Flask, request
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from collections.abc import Iterable
import pickle
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

@app.route('/')
def hello():
    return "Hello An!"


def weightedRating(x):
    v = x['Rating']
    R = x['AggregatedRating']
    return (v / (v + 51.0) * R) + (51.0 / (51.0 + v) * 4.56)

# weighted popularity model
qualifiedRecipes = pickle.load(open('qualifiedRecipes.pkl', 'rb'))

qualifiedRecipesCollaborative = pickle.load(open('qualifiedRecipesCollaborative.pkl', 'rb'))
qualifiedRecipesCollaborative['score'] = qualifiedRecipesCollaborative.apply(weightedRating, axis=1)
names = qualifiedRecipesCollaborative[["RecipeId","Name"]]
df_rating = pd.DataFrame(qualifiedRecipesCollaborative.groupby(by = ['RecipeId'])['score'].agg('mean')) 

categoryBasedRecommender = pickle.load(open('categoryBasedRecommender.pkl', 'rb'))
metadataBasedRecommender = pickle.load(open('metadataBasedRecommender.pkl', 'rb'))
# dataset dump for algolia
datasetDump = pickle.load(open('datasetDump.pkl', 'rb'))

filtered_ratings = pickle.load(open('filtered_ratings.pkl', 'rb'))
matrix = filtered_ratings.pivot(index = 'AuthorId', columns ='RecipeId', values = 'Rating').fillna(0)

@app.route('/popularity')
def weightedRatingPopularity():
    return qualifiedRecipes.to_json()


# category - got the data in weightedPopularRecipes and now creating the route function
@app.route('/category', methods=['POST'])
def getCategoryWiseRecommendations():
    data = request.get_json()
    RecipeCategory = data['keyword']
    percentile = 0.85
    categorySpecificRecipes = categoryBasedRecommender[categoryBasedRecommender['Category'] == RecipeCategory]
    RatingCounts = categorySpecificRecipes[categorySpecificRecipes['Rating'].notnull()]['Rating'].astype('int')
    RatingAverages = categorySpecificRecipes[categorySpecificRecipes['AggregatedRating'].notnull()][
        'AggregatedRating'].astype('int')
    C = RatingAverages.mean()
    m = RatingCounts.quantile(percentile)

    qualified = categorySpecificRecipes[
        (categorySpecificRecipes['Rating'] >= m) & (categorySpecificRecipes['Rating'].notnull()) & (
            categorySpecificRecipes['AggregatedRating'].notnull())][
        ['RecipeId', 'Name', 'Rating', 'AggregatedRating', 'ReviewCount']]
    qualified['Rating'] = qualified['Rating'].astype('int')
    qualified['AggregatedRating'] = qualified['AggregatedRating'].astype('int')

    qualified['wr'] = qualified.apply(
        lambda x: (x['Rating'] / (x['Rating'] + m) * x['AggregatedRating']) + (m / (m + x['Rating']) * C), axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(12)

    return qualified.to_json()


# metadata
termFrequency2 = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
tfidMatrix2 = termFrequency2.fit_transform(metadataBasedRecommender['Metadata'])
cosineSimilarity2 = linear_kernel(tfidMatrix2, tfidMatrix2)

metadataBasedRecommender = metadataBasedRecommender.reset_index()
names2 = metadataBasedRecommender[['RecipeId', 'Name']]
indices2 = pd.Series(metadataBasedRecommender.index, index=metadataBasedRecommender['Name'])


@app.route('/metadata', methods=['POST'])
def getMetadataBasedRecommendations():
    data = request.get_json()
    title = data['keyword']
    idx = indices2[title]
    if isinstance(idx, Iterable):
        for i in idx:
            similarityScores = sorted(list(enumerate(cosineSimilarity2[i])), key=lambda x: x[1], reverse=True)[1:13]
            break
    else:
        similarityScores = sorted(list(enumerate(cosineSimilarity2[idx])), key=lambda x: x[1], reverse=True)[1:13]
    recipeIndices = [i[0] for i in similarityScores]
    return names2.iloc[recipeIndices].to_json()


# content with ratings considered
contentBasedRecommender = metadataBasedRecommender


@app.route('/content', methods=['POST'])
def getContentBasedRecommendations():
    data = request.get_json()
    title = data['keyword']
    idx = indices2[title]

    if isinstance(idx, Iterable):
        recipeDump = pd.DataFrame()
        for i in idx:
            cnt = 0
            similarityScores = sorted(list(enumerate(cosineSimilarity2[i])), key=lambda x: x[1], reverse=True)[1:26]
            recipeIndices = [i[0] for i in similarityScores]

            contentBasedRecommendedRecipes = contentBasedRecommender.iloc[recipeIndices][
                ['RecipeId', 'Name', 'Rating', 'AggregatedRating']]
            ratingCounts = contentBasedRecommendedRecipes[contentBasedRecommendedRecipes['Rating'].notnull()][
                'Rating'].astype('int')
            ratingAverages = \
            contentBasedRecommendedRecipes[contentBasedRecommendedRecipes['AggregatedRating'].notnull()][
                'AggregatedRating'].astype('int')
            C = ratingAverages.mean()
            m = ratingCounts.quantile(0.60)
            qualifiedRecipes2 = contentBasedRecommendedRecipes[(contentBasedRecommendedRecipes['Rating'] >= m) & (
                contentBasedRecommendedRecipes['Rating'].notnull()) & (contentBasedRecommendedRecipes[
                                                                           'AggregatedRating'].notnull())]
            qualifiedRecipes2['Rating'] = qualifiedRecipes2['Rating'].astype('int')
            qualifiedRecipes2['AggregatedRating'] = qualifiedRecipes2['AggregatedRating'].astype('int')
            qualifiedRecipes2['wr'] = qualifiedRecipes2.apply(weightedRating, axis=1)
            qualifiedRecipes2 = qualifiedRecipes2.sort_values('wr', ascending=False).head(12)
            if cnt == 0:
                recipeDump = qualifiedRecipes2
            else:
                recipeDump.append(qualifiedRecipes2, ignore_index=True)
            cnt += 1
        recipeDump = recipeDump.sort_values('wr', ascending=False).head(12)
        return recipeDump.to_json()

    else:
        similarityScores = sorted(list(enumerate(cosineSimilarity2[idx])), key=lambda x: x[1], reverse=True)[1:26]
        recipeIndices = [i[0] for i in similarityScores]

        contentBasedRecommendedRecipes = contentBasedRecommender.iloc[recipeIndices][
            ['RecipeId', 'Name', 'Rating', 'AggregatedRating']]
        ratingCounts = contentBasedRecommendedRecipes[contentBasedRecommendedRecipes['Rating'].notnull()][
            'Rating'].astype('int')
        ratingAverages = contentBasedRecommendedRecipes[contentBasedRecommendedRecipes['AggregatedRating'].notnull()][
            'AggregatedRating'].astype('int')
        C = ratingAverages.mean()
        m = ratingCounts.quantile(0.60)
        qualifiedRecipes2 = contentBasedRecommendedRecipes[
            (contentBasedRecommendedRecipes['Rating'] >= m) & (contentBasedRecommendedRecipes['Rating'].notnull()) & (
                contentBasedRecommendedRecipes['AggregatedRating'].notnull())]
        qualifiedRecipes2['Rating'] = qualifiedRecipes2['Rating'].astype('int')
        qualifiedRecipes2['AggregatedRating'] = qualifiedRecipes2['AggregatedRating'].astype('int')
        qualifiedRecipes2['wr'] = qualifiedRecipes2.apply(weightedRating, axis=1)
        qualifiedRecipes2 = qualifiedRecipes2.sort_values('wr', ascending=False).head(12)
        return qualifiedRecipes2.to_json()

@app.route('/collaborative', methods=['POST'])
def getCollaborativeRecommendations():
    data = request.json
    if 'likedRecipeList' in data:
        print("1")
        likedRecipeList = data['likedRecipeList']
        return getCollaborativeRecommendations(likedRecipeList)  
    

def getCollaborativeRecommendations(recipe_ids):
    for recipe_id in recipe_ids:
        if recipe_id not in matrix.columns:
            # remove recipe that doesnt have more than {threshold} count of reviews from participating in correlation matching
            recipe_ids.remove(recipe_id)

    users_rating = matrix[recipe_ids]
    similar_recipes = matrix.corrwith(users_rating, method='pearson')
    similar_recipes = pd.DataFrame(similar_recipes,columns=['correlation'])
    result_recipes = similar_recipes.join(df_rating['score']).sort_values(by='correlation', ascending=False)
    result_recipes = result_recipes[result_recipes['score'] > 4].sort_values(by = 'correlation', ascending = False) 
    recipeIndices = result_recipes.iloc[len(recipe_ids):len(recipe_ids) + 10].index.tolist()
    
    returnValue = pd.DataFrame(columns=['RecipeId', 'Name', 'Images', 'TotalTime', 'RecipeCategory'])
    
    for recipe_id in recipeIndices:
        filtered_rows = datasetDump[datasetDump['RecipeId'] == recipe_id]
        selected_columns = filtered_rows[['RecipeId', 'Name', 'Images', 'TotalTime', 'RecipeCategory']]
        returnValue = pd.concat([returnValue, selected_columns], ignore_index=True)
    return returnValue.to_json()

#print(getCollaborativeRecommendations([519, 3370, 10744]))
    
@app.route('/datasetDump')
def getDatasetDump():
    return datasetDump.to_json()


if __name__ == "__main__":
    osPort = os.getenv("PORT")
    if osPort == None:
        port = 5000
    else:
        port = int(osPort)
    app.run(host='0.0.0.0', port=port)


SyntaxError: invalid syntax (2315883001.py, line 178)