In [None]:
!pip install sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import numpy as np
import re
import pickle
import random
import torch
import nltk
nltk.download('stopwords')

from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Knowledge Base: Data Pre-processing

In [34]:
df = pd.read_csv('/content/Mental_Health_FAQ.csv')

In [35]:
df.head()

Unnamed: 0,Question_ID,Questions,Answers
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi..."


In [36]:
df = df.drop('Question_ID',axis=1)

In [37]:
# Normalise the question to lower case and remove special characters
df['Questions'] = df['Questions'].str.lower().apply(lambda x: re.sub(r'[^A-Za-z0-9\s]', ' ', x)) 

In [38]:
df.head()

Unnamed: 0,Questions,Answers
0,what does it mean to have a mental illness,Mental illnesses are health conditions that di...
1,who does mental illness affect,It is estimated that mental illness affects 1 ...
2,what causes mental illness,It is estimated that mental illness affects 1 ...
3,what are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,can people with mental illness recover,"When healing from mental illness, early identi..."


In [39]:
# Export processed knowledge base
df.to_csv('cleaned_df.csv', index=False)

### Model Development: Multi-qa-mpnet-base

In [40]:
# Initiate model
model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

In [41]:
# Encode the questions to get embeddings
qn_embeddings = model.encode(df['Questions'].to_list(), convert_to_tensor=True)

In [42]:
# Extract the word embeddings into pickle
pickle.dump(qn_embeddings, open('stsb-embedding.pkl', 'wb'))

In [43]:
def hr_response(req):
    words = []

    # Pre-processing
    req = re.sub(r'[^A-Za-z0-9\\s]', ' ', req.lower())
    
    # Encode the text to get embeddings
    model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')
    req_embeddings = model.encode(req).reshape(1, -1)

    # Compute similarity
    qn_embeddings = pickle.load(open('stsb-embedding.pkl', 'rb'))
    cosine_sim = cosine_similarity(qn_embeddings, req_embeddings) 
    cosine_sim = [(idx, item) for idx,item in enumerate(cosine_sim)]
    sim_scores = sorted(cosine_sim, key=lambda x: x[1], reverse=True) 
    # Return response of the top most similar question
    top_score = sim_scores[0]
    qn_indice = top_score[0]

    df = pd.read_csv('cleaned_df.csv')
    
    print(top_score[1][0])

    if top_score[1][0] > .70:
      return df['Answers'].iloc[qn_indice]

    print("Could you please elaborate your situation more? I don't really understand.")
    return 

In [None]:
hr_response('''Does people with mental health probkems recover?''')