In [137]:
import pandas as pd
import numpy as np

def load_library(path='./reflection.csv'):
    df = pd.read_csv(path, sep=';', header=0, index_col=None).dropna(axis=0,how='all').reset_index(inplace=False) 
    return df

df = load_library(path='./reflection.csv')

In [138]:
df

Unnamed: 0,index,context_bot,context_user,sentiment,response_bot,label
0,0,Why are you at a 6 but not a 4? Tell me one re...,exercise can make me healthier,2.0,That's true. It is a good reason.,1.0
1,1,Why did you rate it a 3 but not a 5? What woul...,It cost me much time.,0.0,"Ok, I can see that. Sounds reasonable indeed.",1.0
2,3,Why are you at a 7 but not a 5? Tell me one st...,I like physical activity.,2.0,That's indeed something could help you with ch...,1.0
3,4,Why did you rate it a 3 but not a 5? What woul...,If I can have more time.,2.0,"Ok, I understand. Sound reasonable indeed! Let...",1.0
4,6,Let's think of a moment in time where you ulti...,I am overfitting. But I diet and lose weight b...,1.0,That does sound challenging.,1.0
5,7,How did you ultimately succeed?,I did everything doctor asked me to do.,2.0,That's indeed one thing can help you go throug...,1.0
6,8,Now let's revisit that and talk more about how...,It made me quite stressful.,0.0,"You had hard times, I can imagine. But you man...",1.0
7,9,Now let's revisit that and talk more about how...,I am so proud of this experience.,2.0,Seems you are pretty happy for managing yourse...,1.0
8,10,"I wonder, is there something you can think of ...",Some exercise can make me feel a little bit be...,2.0,Sounds like a good way for you to remediated t...,1.0
9,11,I'm curious how you make sense of this experie...,Do more health activities can benefit for both...,2.0,Great to hear that you learned something from ...,1.0


---

In [247]:
import requests

def sentiment_pred(query):
    API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment"
    headers = {"Authorization": "Bearer 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"}

    payload = {"inputs": query}
    response = requests.post(API_URL, headers=headers, json=payload).json()[0]

    scores = []
    for i in response:
        scores.append(i['score'])
    
    labels = ['negative', 'neutral', 'positive']
    pred_res = np.argmax(scores)

    return pred_res #labels[pred_res] # 0:neg - 1:neu - 2:pos

query = "I am overfitting. But I diet and lose weight by doing exercise.",
pred_res = sentiment_pred(query)
pred_res

1

---

In [366]:
from gensim import corpora, models, similarities
from gensim.test.utils import get_tmpfile, datapath
from nltk import word_tokenize 
import os


def get_training_data(path):
    df = load_library(path='./reflection.csv')    
    texts = []
    labels = ['negative', 'neutral', 'positive']
    for i in range(len(df)):
        # texts.append(df.loc[i, 'context_bot'] + ' \n ' + df.loc[i, 'context_user'] + ' \n ' + str(int(df.loc[i, 'sentiment'])))
        texts.append(df.loc[i, 'context_bot'] + ' \n ' + df.loc[i, 'context_user'] + ' \n ' + labels[int(df.loc[i, 'sentiment'])])
    # print(texts) 
    
    return texts


def train(text_path, model_path, dictionary_path, index_path):

    texts = get_training_data(text_path) 
    texts = [word_tokenize(text) for text in texts]

    dictionary = corpora.Dictionary(texts)
    leng_dict = len(dictionary.token2id)
    corpus = [dictionary.doc2bow(text) for text in texts]

    # model = models.LdaModel(corpus) 
    model = models.tfidfmodel.TfidfModel(corpus) 
    index = similarities.SparseMatrixSimilarity(model[corpus], num_features=leng_dict)

    model.save(datapath(model_path))
    dictionary.save_as_text(get_tmpfile(dictionary_path))
    index.save(get_tmpfile(index_path))
        
    return model, dictionary, index


text_path = './reflection.csv'
model_path, dictionary_path, index_path = os.getcwd()+'/save/model', os.getcwd()+'/save/dictionary', os.getcwd()+'/save/index'
model, dictionary, index = train(text_path, model_path, dictionary_path, index_path)


In [370]:
from gensim import corpora, models, similarities
from gensim.test.utils import get_tmpfile, datapath
from nltk import word_tokenize 
import os


def inference(model_path, dictionary_path, index_path, query, k):

    # modelss = models.LdaModel.load(datapath(model_path))
    modelss = models.tfidfmodel.TfidfModel.load(datapath(model_path))
    dictionarys = corpora.Dictionary.load_from_text(get_tmpfile(dictionary_path))   
    indexs = similarities.SparseMatrixSimilarity.load(get_tmpfile(index_path))

    query_vector = dictionary.doc2bow(word_tokenize(query))
    sim = index[model[query_vector]]

    sim_k = sorted(enumerate(sim), key=lambda item: item[1], reverse=True)[:k]

    df = load_library(path='./reflection.csv')
    reflection = df.loc[sim_k[0][0], "response_bot"]
    
    return sim_k, reflection


model_path, dictionary_path, index_path = os.getcwd()+'/save/model', os.getcwd()+'/save/dictionary', os.getcwd()+'/save/index'

context = "Now let's revisit that and talk more about how you felt. How did the challenge make you feel?"
query = "I feel bad about that."
pred_sent = sentiment_pred(query)
labels = ['negative', 'neutral', 'positive']
pred_sent = labels[pred_sent]
print(f'sentiment is: {pred_sent}')

query = context + ' \n ' + query + ' \n ' + str(pred_sent)
sim_k, reflection = inference(model_path, dictionary_path, index_path, query, 3)


sentiment is: negative


In [371]:
sim_k

[(6, 0.76286155), (7, 0.7377623), (8, 0.12153189)]

In [372]:
print(f'query is: {query}\n')
print(f'Reflection is: {reflection}')

query is: Now let's revisit that and talk more about how you felt. How did the challenge make you feel? 
 I feel bad about that. 
 negative

Reflection is: You had hard times, I can imagine. But you managed to succeed in the end!
