In [1]:
import pandas as pd
from prefect import flow, task, get_run_logger

#IMPORTING DATA FROM CSV FILES INTO A DATAFRAME FOR ANALYSIS 
post = pd.read_csv("data/posts.csv")
user = pd.read_csv("data/users.csv")
view = pd.read_csv("data/views.csv")
post["category"] = post["category"].fillna("random")
cat = {}
for i in post["category"]:
    cat.update({i:[]})
    for j in i.split("|"):
        cat[i].append(j)
updated_data =  []
for i in cat:
    dummy = post[post['category']==i]
    id = dummy['_id'].values[0]
    title = dummy['title'].values[0]
    post_type = dummy[' post_type'].values[0]
    for j in cat[i]: 
        dict1 = {}
        dict1.update({'_id':id})
        dict1.update({'title':title})
        dict1.update({'category':j})
        dict1.update({' post_type':post_type})
        updated_data.append(dict1)
post1 = pd.DataFrame(updated_data)
post1.rename(columns={"_id":'post_id'}, inplace = True)
user.rename(columns={"user_id":'post_id'},  inplace = True)
main = pd.merge(view,post1)
users = list(main["user_id"].unique())
categories = list(main["category"].unique())

#MATRIX WILL BE OF 88,235
user_mat = [[] for i in range(len(users))]
for i in range(len(users)):
    for j in range(len(categories)):
        value = len(main[(main["user_id"]==users[i]) & (main["category"]==categories[j])])
        user_mat[i].append(value)
from scipy.sparse import csr_matrix 
user_mat = csr_matrix(user_mat)

In [11]:
main.head()

Unnamed: 0,user_id,post_id,timestamp,title,category,post_type
0,5df49b32cc709107827fb3c7,5ec821ddec493f4a2655889e,2020-06-01T10:46:45.131Z,Save Earth.,Visual Arts,artwork
1,5df49b32cc709107827fb3c7,5ec821ddec493f4a2655889e,2020-06-01T10:46:45.131Z,Save Earth.,Graphic Design,artwork
2,5df49b32cc709107827fb3c7,5ec821ddec493f4a2655889e,2020-06-01T10:46:45.131Z,Save Earth.,Artistic design,artwork
3,5df49b32cc709107827fb3c7,5ec821ddec493f4a2655889e,2020-06-01T10:46:45.131Z,Save Earth.,Graphic,artwork
4,5df49b32cc709107827fb3c7,5ec821ddec493f4a2655889e,2020-06-01T10:46:45.131Z,Save Earth.,Illustration,artwork


In [13]:
import pickle
import scipy.sparse as sp
# Saving the list of IDs as a serialized object
with open('users.pkl', 'wb') as f:
    pickle.dump(users, f)



In [14]:
# Loading the list of IDs from the serialized object
with open('users.pkl', 'rb') as f:
    id_list = pickle.load(f)

In [16]:
import pandas as pd

# Saving the dataframe as a CSV file
main.to_csv('main.csv', index=False)

In [17]:
# Loading the dataframe from the CSV file
df = pd.read_csv('main.csv')

In [5]:
import scipy.sparse as sp
# Save the matrix to a file
sp.save_npz('matrix.npz', user_mat)

In [3]:
# Load the matrix from a file
matrix = sp.load_npz('matrix.npz')

In [None]:
with open('models/lin_reg.bin', 'wb') as f_out:
    pickle.dump((dv, lr), f_out)

In [None]:
import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000")
RUN_ID='fa350b60cf564ff1a1d3341e2ac38cbe'
#logged_model = f'mlflow-artifacts:/728115650857226939/eb5ec70b031246f3a1336ba5ea0afe76/artifacts/model'
logged_model=f'mlflow-artifacts:/773732190913986377/{RUN_ID}/artifacts/model'
#logged_model = f'runs:/{RUN_ID}/model'
model = mlflow.sklearn.load_model(logged_model)
from flask import Flask, request, jsonify
import data_helper

main=data_helper.main
users=data_helper.users
def recommender(user_id, data=data_helper.user_mat, model=model):
    model.fit(data)
    index = users.index(user_id)
    current_user = main[main['user_id']==user_id]
    distances, indices = model.kneighbors(data[index], 15)
    recomendation = []
    for i in indices[0]:
        user = main[main['user_id']==users[i]]
        for i in user['category'].unique():
            if i not in current_user['category'].unique():
                recomendation.append(i)
    return recomendation
#     print(indices)
#print(recommender('5df49b32cc709107827fb3c7')[:10])
#recommender(users[0])[:10]
app = Flask('recommender-prediction')


@app.route('/predict', methods=['POST'])
def predict_endpoint():
    rec = request.get_json()

    pred = recommender(rec)[:10]

    result = {
        "id":rec,
        'recommendations': pred,
        'model_version': RUN_ID
    }

    return jsonify(result)


if __name__ == "__main__":
    app.run(debug=True, host='0.0.0.0', port=9696)