In [18]:
from google.colab import drive
import sys
import os
drive.mount('/content/gdrive')

sys.path.append(r'/content/gdrive/My Drive/iss/irs/PracticeModule/data/Recommendation Systems')
print("All the system paths:")
sys.path

os.chdir(r'/content/gdrive/My Drive/iss/irs/PracticeModule/data/Recommendation Systems')
print("Current working directory:")
os.getcwd()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
All the system paths:
Current working directory:


'/content/gdrive/My Drive/iss/irs/PracticeModule/data/Recommendation Systems'

## **Import Libraries**



In [19]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import csv
import math
import heapq
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, TfidfTransformer
import spacy


## **Load the data**

In [20]:
data = pd.read_csv('newdata_2.csv')
# print(tfidf['Name'])
print(data.loc[0])
# print(data.shape)

corpus = list(data['Corpus'])
print(corpus)

Unnamed: 0        0
Dog_No          0.0
Name          Adora
Gender            0
Age               3
              ...  
tfidf1765       0.0
tfidf1766       0.0
tfidf1767       0.0
tfidf1768       0.0
tfidf1769       0.0
Name: 0, Length: 1777, dtype: object
[' absolute darling food take medicine food aggression starve point confident trust independent separation anxiety company story able smiley face', ' tip tiny bit smart unfamiliar pretend calm fine cool pup mesmerize pup puppy inside', ' sweet absolutely companion hesitate affection gentle giant reactive single training recommend ideally', ' anxious afraid affection trust wriggle seek', ' recall post baby display change temperament bitey take poope indoor gorgeous quirk good responsible patient walk till today bumpy ensure quirk remain committed chapter kid elderly walk outdoors timid phase sensitive willing engage professional trainer confident playful good patient warm sterilize vaccinated healthy chip food motivate work leash walk

## **Build Up the Language Processor**

In [21]:
def my_preprocessing(raw_sentence):
    nlp_tool = spacy.load('en_core_web_sm')
    token_sentence = nlp_tool(raw_sentence.lower())
    with open('./irrelevant_words.txt') as file:
        irrelevantlist = [stopword.replace('\n', '').lower() for stopword in file.readlines()]
#     new_sentence = [word for word in token_sentence if word not in irrelevantlist]
    
    preprocessed_sentence = []
    
    for token in token_sentence:
        if token.pos_ == "PUNCT" or token.is_stop == True or token.is_alpha == False or token.pos_ == "SYM":
            continue

        elif token.lemma_ in irrelevantlist or len(token)==1:
            continue
            
        else:
            preprocessed_sentence.append(token.lemma_)
            
    return preprocessed_sentence


## **Content-Based Recommendation**

In [22]:
# Calculate the cosine similarity between two dogs
def cosine_similarity(dog_x,dog_y,only_description):
  numerator = 0
  denominator_x = 0
  denominator_y = 0
  for i in range(1729):
    numerator += dog_x['tfidf'+str(i)]*dog_y['tfidf'+str(i)]
    denominator_x += dog_x['tfidf'+str(i)]**2
    denominator_y += dog_y['tfidf'+str(i)]**2
  if only_description == False:
    numerator += dog_x['Gender']*dog_y['Gender']+dog_x['HDB']*dog_y['HDB']
    denominator_x += dog_x['Gender']**2+dog_x['HDB']**2
    denominator_y += dog_y['Gender']**2+dog_y['HDB']**2

  denominator_x = math.sqrt(denominator_x)
  denominator_y = math.sqrt(denominator_y)  

  return (numerator/(denominator_x*denominator_y))

# Find similar dogs
def get_similar_dogs(inputdog,target_num,only_description):
  similarity = []
  for i in range(335):
    similarity.append(cosine_similarity(inputdog,data.loc[i],only_description))
  max_index = list(map(similarity.index, heapq.nlargest(target_num+1,similarity)))
  # max_index.pop(0)
  max_value = heapq.nlargest(target_num+1,similarity)   
  # max_value.pop(0)
  return [[max_index],[max_value]]

# Build a user_reviewed_dog according to his/her previous selections
def user_reviewed_dog(dogs_selected,gender,hdb):
  dog_profile = []
  dog_profile.append(999)
  dog_profile.append('DreamDog')
  dog_profile.append(gender)
  dog_profile.append(hdb)
  dog_description = ''
  for dog in dogs_selected:
    # print(dog['Corpus'])
    dog_description+=dog['Corpus']
  # print(dog_description)
  dog_profile.append(dog_description)
  new_corpus = []
  for i in corpus:
    new_corpus.append(i)
  new_corpus.append(dog_description)
  # print(len(new_corpus))
  vectorizer = CountVectorizer()
  word_vec = vectorizer.fit_transform(new_corpus)
  transformer = TfidfTransformer()
  tfidf = transformer.fit_transform(word_vec)
  tfidf_matrix = tfidf.toarray()
  # print(tfidf_matrix.shape)
  # print(tfidf_matrix.shape[0])
  for i in range(tfidf_matrix.shape[1]):
    dog_profile.append(tfidf_matrix[335][i])
  dtf = DataFrame()
  dtf['Dog_No'] = [dog_profile[0]]
  dtf['Name'] = [dog_profile[1]]
  dtf['Gender'] = [dog_profile[2]]
  dtf['HDB'] = [dog_profile[3]]
  dtf['Corpus'] = [dog_profile[4]]
  for i in range(tfidf_matrix.shape[1]):
    dtf['tfidf'+str(i)] = [dog_profile[5+i]]
  return dtf.loc[0]


In [23]:
print(get_similar_dogs(data.loc[12],5,only_description=True))

[[[12, 20, 123, 146, 28, 33]], [[1.0, 0.24389064661054394, 0.17461004105717134, 0.1722439908977591, 0.16243838142141387, 0.1350135131165665]]]


In [24]:
dogs_selected = [data.loc[12],data.loc[32],data.loc[234],data.loc[43]]
dream_dog = user_reviewed_dog(dogs_selected,1,1)
print(dream_dog)

get_similar_dogs(dream_dog,10,True)



Dog_No                                                     999
Name                                                  DreamDog
Gender                                                       1
HDB                                                          1
Corpus        bambi bertha bradley brandon stick power grid...
                                   ...                        
tfidf1765                                                  0.0
tfidf1766                                                  0.0
tfidf1767                                                  0.0
tfidf1768                                                  0.0
tfidf1769                                                  0.0
Name: 0, Length: 1775, dtype: object


[[[12, 234, 234, 43, 32, 20, 123, 28, 33, 146, 36]],
 [[0.707775921817555,
   0.6028611981913166,
   0.6028611981913166,
   0.3594546679771134,
   0.24096019534387375,
   0.1966222899614445,
   0.16370515518623865,
   0.14688475676236012,
   0.1298192438406726,
   0.11946013237599962,
   0.10965764210907147]]]

## **Recommender for new users**

In [None]:
# class user():
#   def __init__(self,userid,dog_gender,dog_hdb,dog_age,dog_description):
#     self.userid = userid
#     self.dog_gender = dog_gender
#     self.dog_hdb = dog_hdb
#     self.dog_age = dog_age
#     self.dog_description = dog_description

In [25]:
# class new_recommender():
#   def __init__(self,corpus):
#     self.corpus = corpus



def filter(gender,age,hdb):
  filtered = []
  for i in range(335):
    if data.loc[i,'Gender'] in gender:
      if data.loc[i,'Age'] in age:
        if data.loc[i,'HDB'] in hdb:
          filtered.append(data.loc[i])
  return filtered

def generateNewCorpus(dog_filter, dog_description):
  print(len(dog_filter))
  # print(dog_filter[3])

  user_dog_des = my_preprocessing(dog_description)
  print(user_dog_des)
  user_dog_des_str = ''
  for word in user_dog_des:
    user_dog_des_str += word
    user_dog_des_str += ' '
  corpus_new = []
  corpus_new.append(user_dog_des_str)
  for dog in dog_filter:
    corpus_new.append(dog['Corpus'])
  print(len(corpus_new))
  # print(len(corpus))
  print(corpus_new)
  return corpus_new



In [35]:
def getTargetDogs(dog_description):  
  dog_filter = filter([0],[1,3],[1])
  vectorizer = CountVectorizer()
  corpus_new = generateNewCorpus(dog_filter, dog_description)
  word_vec = vectorizer.fit_transform(corpus_new)
  # print(word_vec.toarray())
  # print(vectorizer.get_feature_names())
  transformer = TfidfTransformer()
  tfidf = transformer.fit_transform(word_vec)
  tfidf_matrix = tfidf.toarray()
  print(tfidf_matrix.shape)
  # print(tfidf_matrix)
  # print(tfidf_matrix[71][1])
  cosim = []
  denominator_user = 0
  for j in range(tfidf_matrix.shape[1]):
    denominator_user += tfidf_matrix[0][j]**2
  denominator_user = math.sqrt(denominator_user)
  for i in range(1,tfidf_matrix.shape[0]):
    numerator = 0
    denominator_target = 0
    for j in range(tfidf_matrix.shape[1]):
      numerator += tfidf_matrix[0][j]*tfidf_matrix[i][j]
      denominator_target += tfidf_matrix[i][j]**2
    denominator_target = math.sqrt(denominator_target)
    cosim.append(numerator/(denominator_user*denominator_target))

  target_num = 5
  max_index = list(map(cosim.index, heapq.nlargest(target_num,cosim)))
  max_value = heapq.nlargest(target_num,cosim)
  print(max_index)
  print(max_value)

  for index in max_index:
    print(dog_filter[index]['Corpus'])
  return max_index

dog_description = "I like a smart dog"
target = getTargetDogs(dog_description)
print('target:', target)

71
['smart']
72
['smart ', ' absolute darling food take medicine food aggression starve point confident trust independent separation anxiety company story able smiley face', ' tip tiny bit smart unfamiliar pretend calm fine cool pup mesmerize pup puppy inside', ' eat handfeed meal eat fearful take trust cheer confidence', ' ash young mum hard gentle nurture soul shine quiet thoughtful smooch happily trick caring compassionate confidence understand bit shy ideal child pet perfect companion quiet friendly pet good manner ash pup bail pound save euthanize petite size perfect ash great compassion shy young mum proud irresistibly adorable pup nervous doggie timid nature encounter warm low maintenance gentle opportunity explore perfect companion furry chill petite size sharp feature narrow bridge large pointy ash shiny beauty throw curveball shyness noticed easily forget long special true affectionate beauty lie beauty lie', ' long aggressive threaten attack fear change enjoy walk enjoy affe

## **Import Flask libraries**

In [1]:
!pip install flask-ngrok
!pip install flask_cors

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flask_cors
  Downloading Flask_Cors-3.0.10-py2.py3-none-any.whl (14 kB)
Installing collected packages: flask-cors
Successfully installed flask-cors-3.0.10


In [2]:
from flask import Flask, jsonify, request, make_response
from flask_cors import CORS, cross_origin
from flask_ngrok import run_with_ngrok
from unicodedata import name
from flask import Blueprint, jsonify, request
import sqlite3
import sys
import json

## **Initialize the database**

In [5]:
con = sqlite3.connect('database.db')
with open('schema.sql') as f:
    con.executescript(f.read())
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())
cursor.close()
con.close()

[('sqlite_sequence',), ('adopter',)]


## **Database service**

In [6]:
class Adopter:
    adopter_name = ''
    password = ''
    accomodation = -1 # -1: unknown, 1: hdb, 2: condo
    prefer_age_group = -1 # -1: unknown, 1: puppy, 10:middle, 100: old
    prefer_gender = -1 # -1: unknown, 1: male, 10: female 
    personality_preference = "" # TEXT description
    recommend_dog_index = '' # 2|3|45

In [54]:
def insert_adopter_to_db(adopter):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute("INSERT INTO adopter (adopter_name, password, accomodation, prefer_age_group, prefer_gender, personality_preference) VALUES (?, ?, ?, ?, ?, ?)",
                   (adopter.adopter_name, adopter.password, adopter.accomodation,adopter.prefer_age_group, adopter.prefer_gender, adopter.personality_preference,))
    conn.commit()
    cursor.close()
    conn.close() 

def update_preference_to_db(adopter):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute("UPDATE adopter SET accomodation = ?, prefer_age_group = ?, prefer_gender = ?, personality_preference = ?, recommend_dog_index = ? WHERE adopter_name = ?",
                         (adopter.accomodation, adopter.prefer_age_group, adopter.prefer_gender, adopter.personality_preference, adopter.recommend_dog_index, adopter.adopter_name, ))
    conn.commit()
    cursor.close()
    conn.close() 

def is_adopter_in_db(adopter_name, password):
    conn = get_db_connection()
    cursor = conn.cursor()
    query_result = []
    adopter = cursor.execute("SELECT * FROM adopter WHERE adopter_name = ? AND password = ?",
                         (adopter_name,password,)).fetchall()
    for row in adopter:
        query_result.append(list(row))
    rowcount = len(adopter)
    print ('query_result:', query_result," adopter.count:", rowcount)
    conn.commit()
    cursor.close()
    conn.close()
    return rowcount == 1

def query_adopter_from_db(adopter_name):
    conn = get_db_connection()
    cursor = conn.cursor()
    query_result = []
    execute_result = cursor.execute("SELECT * FROM adopter WHERE adopter_name = ?",
                         (adopter_name,)).fetchall()           
    query_result = json.dumps([tuple(row) for row in execute_result])
    rowcount = cursor.rowcount
    cursor.close()
    conn.close()
    print ('query_result:', query_result," adopter count:", len(execute_result))
    return query_result

def get_db_connection():
    connection = sqlite3.connect('database.db')
    connection.row_factory = sqlite3.Row
    return connection

## **Run the web application**

In [33]:
sys.path.append(r'./')
api = Blueprint('api', __name__)

@api.route('/hello/<string:name>/')
@cross_origin()
def say_hello(name):
    response = { 'msg': "Hello {}".format(name) }
    return jsonify(response)

@api.route('/recommend/signin', methods=['POST'])
@cross_origin(origin='*')
def signin():
    print('request.method:',request.method)
   
    data = request.get_data()
    request_json = json.loads(data)
    print("request_json:", request_json)
    adopter_name = request_json['adopter_name']
    password = request_json['password']
    print ("adopter_name:", adopter_name)
    if(is_adopter_in_db(adopter_name, password)):  
        data = query_adopter_from_db(adopter_name)
        print ("query data:", data)
        result = {'code': 'SU001','data': data}
        return jsonify(result)
    else:
    # The adopter does not exist, return 
        result = {'code': 'ER001', 'msg': 'Invalid account or password!'}
        print (adopter_name, result)
        return jsonify(result) 

@api.after_app_request
def after_app_request(response):
    response.headers['Access-Control-Allow-Origin'] = '*'
    response.headers['Access-Control-Allow-Methods'] = '*'
    print('add headers', response)
    
    return response
        
@api.route('/recommend/signup', methods=['POST'])
def signup():
    request_json = request.json
    print("request_json:", request_json)
    adopter_name = request_json['adopter_name']
    password = request_json['password']
    if(is_adopter_in_db(adopter_name, password)):
    # Dulplicate adopter name is not allowed
        print ("in")
    else:
        print ('not in')
        adopter = Adopter()
        adopter.adopter_name = adopter_name
        adopter.password = password
        insert_adopter_to_db(adopter)
    response = jsonify({"adopter_name": adopter_name, "password": password})
    
    print('response', response)
    return response

@api.route('/recommend/update_preference', methods=['POST'])
@cross_origin()
def update_preference():
    request_json = request.json
    adopter = Adopter()
    adopter.adopter_name = request_json['adopter_name']
    adopter.accomodation = request_json['accomodation']
    adopter.prefer_age_group = request_json['prefer_age_group']
    adopter.prefer_gender = request_json['prefer_gender']
    adopter.personality_preference = request_json['personality_preference']
    # invoke Hu Hang's processing
    adopter.recommend_dog_index = ' '.join(getTargetDogs(adopter.personality_preference))
    print("adopter.recommend_dog_index:", adopter.recommend_dog_index)
    result = update_preference_to_db(adopter)
    return jsonify(result)

def create_app(app_name='SURVEY_API'):
    app = Flask(app_name)
    app.register_blueprint(api, url_prefix="/api")
   # cors = CORS(app, resources={r"/api/*": {"origins": "http://localhost:5173"}})
    cors = CORS(app, resources={r"/api/*": {"origins": "*"}})
    run_with_ngrok(app)
    return app

if __name__ == '__main__':
    app = create_app()
    app.run()


 * Serving Flask app "SURVEY_API" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://9ed5-35-222-97-85.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


In [None]:
# (venv) $ python
# import requests
# headers = {'Content-type': 'application/json'}
# body = {'adopter_name': 'Tom', 'password': '123456'}
# response = requests.options('http://88f1-35-239-175-96.ngrok.io/api/recommend/signin', json = body, headers=headers)

## **Delete the adopter database**

In [None]:
con = sqlite3.connect('database.db')
cursor = con.cursor()
cursor.execute("DROP TABLE adopter;")
con.close()

## **Test database**

In [28]:
adopter = Adopter()
adopter.adopter_name = 'Tom3'
adopter.password = '123456'
adopter.accomodation = '0'
adopter.prefer_age_group = 11
adopter.prefer_gender = 11
adopter.personality_preference = "I like a friendly dog"
insert_adopter_to_db(adopter)

In [55]:
result  = query_adopter_from_db('Tom3')
print (result)

query_result: [[3, "Tom3", "123456", 0, 11, 11, "I like a smart dog", "69 1 11 34 56"]]  adopter count: 1
[[3, "Tom3", "123456", 0, 11, 11, "I like a smart dog", "69 1 11 34 56"]]


In [None]:
result  = is_adopter_in_db('Tom2', '123456')
print( result )

In [46]:
adopter.personality_preference = "I like a smart dog"
tartget_dogs = getTargetDogs(adopter.personality_preference)

adopter.recommend_dog_index = ' '.join(map(str, tartget_dogs))
print("adopter.recommend_dog_index:", adopter.recommend_dog_index)
update_preference_to_db(adopter)

71
['smart']
72
['smart ', ' absolute darling food take medicine food aggression starve point confident trust independent separation anxiety company story able smiley face', ' tip tiny bit smart unfamiliar pretend calm fine cool pup mesmerize pup puppy inside', ' eat handfeed meal eat fearful take trust cheer confidence', ' ash young mum hard gentle nurture soul shine quiet thoughtful smooch happily trick caring compassionate confidence understand bit shy ideal child pet perfect companion quiet friendly pet good manner ash pup bail pound save euthanize petite size perfect ash great compassion shy young mum proud irresistibly adorable pup nervous doggie timid nature encounter warm low maintenance gentle opportunity explore perfect companion furry chill petite size sharp feature narrow bridge large pointy ash shiny beauty throw curveball shyness noticed easily forget long special true affectionate beauty lie beauty lie', ' long aggressive threaten attack fear change enjoy walk enjoy affe