# Read Syntetic reviews of fashions 

In [1]:
import pandas as pd

reviews = pd.read_csv('../Syntetic_reviews/sample_reviews_fashion.csv')

# Useful classes and functions

In [16]:
import numpy as np

def cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
    return distance.cosine(v1, v2)

class VectorDatabase:
    def __init__(self,nlp,model):
        self.vectors = {}
        self.nlp = nlp
        self.model = model

    def split_sentences(self, text):
        doc = self.nlp(text, disable=["ner"])
        roots = [token  for token in doc if token.dep_ == "ROOT" ]
    
        texts = []
        for root in roots:
            token_list = [e.i for e in root.subtree]
            token_list = list(dict.fromkeys(token_list))
            token_list.sort()
            text = ' '.join([doc[i].text for i in token_list ])
            texts.append(text.lower().strip())
            
        return texts


    def insert(self, sentence: str, polarity: int, type: str) -> None:
        model = self.model
        embeddings = list(model.encode([sentence])[0])
        key = len(self.vectors) + 1
        self.vectors[key] = {'text': sentence,
                             'polarity': polarity,
                             'type': type,
                             'vector': embeddings}

    def search(self, query: str):
        model = self.model
        query_vector = list(model.encode([query])[0])
        
        similarities = [(key, value['text'],distance.cosine(query_vector, value['vector']),value['polarity'],value['type']) for key, value in self.vectors.items()]
        

        aux = pd.DataFrame(similarities)
        aux.columns = ['index_db','text','similarity','polarity','topic']

        aux = aux.reset_index().query('index<10 or similarity<0.5').query('similarity<0.6')[['index','topic']].groupby(['topic']).count()
        
        aux['index2'] = aux['index']/aux['index'].sum()
        
        
        
        return  list(aux.query('index2>0.4 and index>=4').index.values)

    def long_search(self, query: str):
        topics = []
        for str in self.split_sentences(query):
            topics.extend(self.search(str))    
        return  list(set(topics))




# Create V Database

In [17]:
import numpy as np
from scipy.spatial import distance
from collections import defaultdict
from typing import List, Tuple
import spacy
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')


nlp = spacy.load("en_core_web_lg")
vector_db = VectorDatabase(nlp, model)

for index, row in reviews.iterrows():
    vector_db.insert(row['Review'],row['Polarity'],row['Topic'])



# Test VDB

In [18]:
str = '''
These shorts are very flattering and perfect for running in. 
I really loved them, until I put them through the wash once. 
I hung dry them, but the edges have already started deteriorating. 
I can tell these shorts will have a short lifespan, and I've never had this type of issue with any other shorts I bought.
'''

aux = vector_db.long_search(str)
aux

['longevity', 'Material and Quality']

# Test VDB with some actual reviews from our dataset

In [5]:

from google.cloud import bigquery
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../sa.json"


client = bigquery.Client()


sql = '''

SELECT reviewText,overall,asin
from `factored.raw_reviews`
WHERE asin = 'B017HXIFV8'
'''

df = client.query(sql).result().to_dataframe()


#aux = vector_db.long_search(str)
#aux

In [19]:
for index, row in df.iterrows():
    review = row['reviewText']
    aux = vector_db.long_search(review)

    print(review,aux)
    print('-------------------------------')

This jacket has a lovely hourglass taper, with tailored bust, slimmed waist and flaired hips. It's not extreme - just a flattering fit. I would say it would be better for someone 5'7" or under, as the sleeves aren't super long. Adequate, but not great for a taller girl. Love the contrast, and the stitching. Very attractive jacket. ['Fit and Comfort', 'Material and Quality']
-------------------------------
CHARLES RIVER APPAREL WOMEN'S LITHIUM QUILTED JACKET

WOW!  This jacket is very, very nice.  Believe me when I tell you that the picture does not do this jacket any justice.  My jacket is grey with pink highlights on the zipper and pockets.  The jacket has a quilted design.  We live in Ohio, and it is definitely winter here.  This jacket is warm, wind and water resistant, and while it is doing all of that, it is looking good.  It is also very light weight.  The jacket is tapered at the waist and kind of has a slimming effect.  Looks great!

There is a dropped tail hem for extra warmth