<a href="https://colab.research.google.com/github/horyekhunley/langchain_learning/blob/master/index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain
!pip install sentence-transformers
!pip install faiss-cpu
!pip install ydata_profiling

In [2]:
import os
import json
import gzip
import pandas as pd

In [18]:
import torch

# Checking if GPU is available
if torch.cuda.is_available():
 gpu_name = torch.cuda.get_device_name(torch.cuda.current_device())
 total_memory = torch.cuda.get_device_properties(0).total_memory
 total_memory_gb = total_memory / (1024**3) # Converting memory to Gb
 print("GPU is available. \nUsing GPU")
 print("\nGPU Name:", gpu_name)
 print(f"Total GPU Memory: {total_memory_gb:.2f} GB")

 device = torch.device('cuda')
else:
 print("GPU is not available. \nUsing CPU")
 device = torch.device('cpu')

GPU is not available. 
Using CPU


In [3]:
# we extract the data from the files

data = []
with gzip.open('/content/sample_data/AMAZON_FASHION_5.json.gz') as f:
    for line in f:
        data.append(json.loads(line.strip()))

print(data)

[{'overall': 5.0, 'verified': True, 'reviewTime': '09 4, 2015', 'reviewerID': 'ALJ66O1Y6SLHA', 'asin': 'B000K2PJ4K', 'style': {'Size:': ' Big Boys', 'Color:': ' Blue/Orange'}, 'reviewerName': 'Tonya B.', 'reviewText': 'Great product and price!', 'summary': 'Five Stars', 'unixReviewTime': 1441324800}, {'overall': 5.0, 'verified': True, 'reviewTime': '09 4, 2015', 'reviewerID': 'ALJ66O1Y6SLHA', 'asin': 'B000K2PJ4K', 'style': {'Size:': ' Big Boys', 'Color:': ' Black (37467610) / Red/White'}, 'reviewerName': 'Tonya B.', 'reviewText': 'Great product and price!', 'summary': 'Five Stars', 'unixReviewTime': 1441324800}, {'overall': 5.0, 'verified': True, 'reviewTime': '09 4, 2015', 'reviewerID': 'ALJ66O1Y6SLHA', 'asin': 'B000K2PJ4K', 'style': {'Size:': ' Big Boys', 'Color:': ' Blue/Gray Logo'}, 'reviewerName': 'Tonya B.', 'reviewText': 'Great product and price!', 'summary': 'Five Stars', 'unixReviewTime': 1441324800}, {'overall': 5.0, 'verified': True, 'reviewTime': '09 4, 2015', 'reviewerID':

In [4]:
df = pd.DataFrame.from_dict(data)
df = df[df['reviewText'].notna()]

In [20]:
# Auto EDA
from ydata_profiling import ProfileReport

report = ProfileReport(df, title='Amazon reviews')

In [21]:
report

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [6]:
# truncate the reviewText
max_text_len = 500
def truncate_review(text):
    return text[:max_text_len]

df['truncated'] = df.apply(lambda row: truncate_review(row['reviewText']), axis=1)

In [7]:
# choose productIds with enough reviews
df.groupby('asin').count().sort_values('overall')

Unnamed: 0_level_0,overall,verified,reviewTime,reviewerID,style,reviewerName,reviewText,summary,unixReviewTime,vote,image,truncated
asin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
B01H7KY678,1,1,1,1,1,1,1,1,1,0,0,1
B016XAJLVO,1,1,1,1,1,1,1,1,1,0,0,1
B01595OS62,1,1,1,1,1,1,1,1,1,0,0,1
B00ZUA6AJK,1,1,1,1,1,1,1,1,1,0,0,1
B003M6060S,1,1,1,1,1,1,1,1,1,0,0,1
B00LKWYX2I,1,1,1,1,1,1,1,1,1,0,0,1
B00MLYE8PQ,1,1,1,1,0,1,1,1,1,0,0,1
B00GKF5BAS,1,1,1,1,1,1,1,1,1,0,0,1
B00ND9047Y,2,2,2,2,2,2,2,2,2,0,0,2
B00I0VHS10,4,4,4,4,4,4,4,4,4,0,0,4


In [51]:
# # using only a slice of the data
# df = df.loc[df['asin'] == 'B001IKJOLW'].copy()
# df

In [52]:
texts = df['truncated'].tolist()
texts

['Good light weight shoe...had to add insole for more support.',
 'I love my tennis shoes',
 "This is a shoe I will wear with black dress pants or jeans when I need comfort and a little style, but I am not impressed.  This is a very flimsy shoe with little support at all.  Not like any Nike I've ever purchased in the past.  It looks nice, but it's not comfortable.",
 'Love it!! Super comfortable and nice!! Got more than I expected, super flexible great for training. Definetly recommend it.',
 'Excelente',
 "These shoes are poorly constructed and I don't expect them to last more that one summer.  Would not recommend, and will not order another Nike downshifter series again",
 "For the price I spent on the pair of shoes, I'd say it's as good as expected.\nIt's comfortable and doesn't hurt my feet when I run.",
 'I always get a half size up in my tennis shoes. For some reason these feel to big in the heel area and wide.',
 'Put them on and walked 3 hours with no problem! Love them! So lig

In [None]:
!pip install chromadb

In [44]:
from langchain_community.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embedings = HuggingFaceEmbeddings()
# db = FAISS.from_texts(texts, embedings)

chroma_db = Chroma.from_texts(
    texts,
    embedings,
    persist_directory='chroma_db'
)

In [35]:
from google.colab import userdata

# Defined in the secrets tab in Google Colab
hf_token = userdata.get('HF_TOKEN')

In [48]:
from langchain.llms import HuggingFaceHub

llm = HuggingFaceHub(
    huggingfacehub_api_token = hf_token,
    repo_id="google/gemma-2b",
    model_kwargs={"temperature": 0.9,
                  "max_length": 512}
)

In [49]:
from langchain.chains import RetrievalQA
from langchain.schema import retriever

retriever = chroma_db.as_retriever()
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

In [53]:
prompt = """These are the reviews for a fashion product.
What is the most popular product?"""

output = chain.invoke(prompt)
print(output['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Comfortable, consistent each time I order and good looking

The color pattern and fit is what I liked the most what I liked the least is that they are not easy to clean and stains do not come out very easy or at all

My wife loves these shoes. We have both been wearing sketchers and new balance for several years and recently we have both purchased Nike. There is a reason why Nike is still number 1 after all these years, you just cant beat their comfort.

Purchased these for our teenage daughter. She loves them. For both casual wear and sports.

Question: These are the reviews for a fashion product.
What is the most popular product?
Helpful Answer: The best-selling product is Fitting Walking Shoes.
Please edit this answer if you think it is wrong.

I have a pair of these in black and when they finally needed new soles I decid