In [1]:
import pandas as pd
import chromadb
from sentence_transformers import SentenceTransformer

Data = pd.read_csv("SalesData.csv")
Model = SentenceTransformer('all-MiniLM-L6-v2')
Client = chromadb.Client()
Collection = Client.create_collection(name="VectorDB_Datas")
Texts = [" ".join(map(str, Row)) for Row in Data.values]


Labelled_Text = []
for index, row in Data.iterrows():
    Label = "\n".join([f"{col}: {row[col]}" for col in row.index])
    Labelled_Text.append(Label)


Embeds = Model.encode(Labelled_Text, batch_size=25)
Collection.add(
    ids=[str(Index) for Index in range(len(Labelled_Text))],
    embeddings=Embeds,
    metadatas=[{"text": Text} for Text in Labelled_Text]
)

Vector_DB = Collection

In [2]:
Query = "What is the total sales amount for transactions where the product type is 'Furniture' and the payment method is 'Credit Card'?"
Query_Embed = Model.encode(Query)
Result = Vector_DB.query(query_embeddings=[Query_Embed])
Relevant_Data = [item['text'] for metadata_list in Result.get("metadatas", []) for item in metadata_list]
Pass_Content = "\n".join(Relevant_Data)

In [4]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
Pipe = pipeline("text-generation", model="openai-community/gpt2-large", max_token=100)
Template = """Generates text based on transaction data and a template.

Args:
    transaction_data: A string containing the transaction data in key-value pairs.
    prompt_template: A string containing the template for the prompt.

    Returns:
        The generated text from the LLM, or None if there's an error.
    """

Device set to use cpu


In [5]:
PromptTemp = PromptTemplate(template=Template, input_variables=["Pass_Content"])
Pipe = pipeline("text-generation", model="openai-community/gpt2-large", max_new_tokens=100)
LLM = HuggingFacePipeline(pipeline=Pipe)
Mod = LLMChain(llm=LLM, prompt=PromptTemp)
Report = Mod.run(Pass_Content=Pass_Content)
print(Report)

Device set to use cpu
  LLM = HuggingFacePipeline(pipeline=Pipe)
  Mod = LLMChain(llm=LLM, prompt=PromptTemp)
  Report = Mod.run(Pass_Content=Pass_Content)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Here is a list of details:
TransactionID: TXN-1016
SalesAmount: 6932.16
DiscountPercentage: 2.66
SalesRegion: West
ProductType: Furniture
UnitsSold: 1
PaymentMethod: Credit Card
CustomerType: Returning
DayOfPurchase: Tuesday
ReturnStatus: No Return
TransactionID: TXN-1029
SalesAmount: 3820.45
DiscountPercentage: 3.52
SalesRegion: Central
ProductType: Furniture
UnitsSold: 9
PaymentMethod: Credit Card
CustomerType: Returning
DayOfPurchase: Sunday
ReturnStatus: No Return
TransactionID: TXN-1000
SalesAmount: 4905.23
DiscountPercentage: 20.93
SalesRegion: Central
ProductType: Furniture
UnitsSold: 6
PaymentMethod: Credit Card
CustomerType: Returning
DayOfPurchase: Saturday
ReturnStatus: No Return
TransactionID: TXN-1371
SalesAmount: 4501.62
DiscountPercentage: 48.1
SalesRegion: North
ProductType: Furniture
UnitsSold: 4
PaymentMethod: Credit Card
CustomerType: Returning
DayOfPurchase: Sunday
ReturnStatus: No Return
TransactionID: TXN-1008
SalesAmount: 7596.85
DiscountPercentage: 38.77
SalesRe

In [7]:
from fpdf import FPDF

# Replace unsupported characters
Report = Report.replace("\u2013", "-")  # Replace en dash with a normal dash

# Continue with PDF generation
pdf = FPDF()
pdf.add_page()
pdf.set_font('Arial', 'B', 16)
pdf.cell(200, 10, txt="Transaction Data Report", ln=True, align='C')
pdf.set_font('Arial', '', 12)
pdf.ln(10)
pdf.multi_cell(0, 10, txt=Report)

# Save the PDF to a file
pdf.output("transaction_data_report.pdf")

print("PDF generated successfully.")



PDF generated successfully.


In [5]:
import random
import pandas as pd
from faker import Faker

# Initialize Faker instance
fake = Faker()

# Possible realistic customer feedback categories and demographic features
products = ["Laptop", "Smartphone", "Headphones", "Tablet", "Smartwatch"]
service_feedback = ["Customer service was excellent", "Fast delivery", "Friendly staff", "Package arrived damaged", "Item was defective", "Support was unhelpful"]
sentiments = ["positive", "neutral", "negative"]
locations = ["USA", "UK", "India", "Germany", "Canada"]
age_groups = ["18-24", "25-34", "35-44", "45-54", "55+"]
genders = ["Male", "Female", "Other"]
product_categories = ["Electronics", "Apparel", "Home Appliances", "Toys", "Beauty"]
brands = ["Apple", "Samsung", "Sony", "Nike", "Adidas"]
support_channels = ["Phone", "Email", "Chat", "Social Media"]
issue_types = ["Delivery", "Defect", "Refund", "Warranty", "Customer Service"]

# Function to generate realistic feedback data with added features
def generate_advanced_feedback(n):
    data = []
    for _ in range(n):
        product = random.choice(products)
        feedback = f"{random.choice(service_feedback)} for {product}"
        rating = random.choice([1, 2, 3, 4, 5])
        sentiment = random.choice(sentiments)
        location = random.choice(locations)
        age_group = random.choice(age_groups)
        gender = random.choice(genders)
        product_category = random.choice(product_categories)
        brand_name = random.choice(brands)
        support_channel = random.choice(support_channels)
        issue_type = random.choice(issue_types)
        timestamp = fake.date_this_year()
        data.append([feedback, product, rating, sentiment, location, age_group, gender, product_category, brand_name, support_channel, issue_type, timestamp])
    return data

# Generate 100 fake but realistic feedback entries with more features
advanced_feedback_data = generate_advanced_feedback(100)

# Create a DataFrame with the advanced features
df_advanced = pd.DataFrame(advanced_feedback_data, columns=["feedback", "product", "rating", "sentiment", "location", "age_group", "gender", "product_category", "brand_name", "support_channel", "issue_type", "timestamp"])


df_advanced.to_csv("Products.csv", index=False)



In [4]:
pip install faker

Collecting faker
  Downloading Faker-35.2.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-35.2.0-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   ---------------- ----------------------- 0.8/1.9 MB 11.2 MB/s eta 0:00:01
   --------------------------- ------------ 1.3/1.9 MB 6.1 MB/s eta 0:00:01
   -------------------------------------- - 1.8/1.9 MB 3.4 MB/s eta 0:00:01
   ---------------------------------------- 1.9/1.9 MB 3.0 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-35.2.0
Note: you may need to restart the kernel to use updated packages.
