In [5]:
import os
from dotenv import load_dotenv

In [6]:
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")

In [7]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(
    model='gemini-2.0-flash-lite-preview-02-05',
    temperature=0.3
)

In [8]:
a = llm.invoke("hi how are you")
print(a.content)

I am doing well, thank you for asking! How are you today?


In [9]:
import pandas as pd
df = pd.read_csv('transaction_2025.csv')

In [10]:
df.head()

Unnamed: 0,Date,Description,Amount (INR),Payment Method
0,2025-02-10,McDonald's,4283.83,Credit Card
1,2024-03-06,Hotstar,1399.45,UPI
2,2024-11-12,Big Bazaar,4569.77,UPI
3,2024-11-16,Metro Ticket,2050.67,UPI
4,2024-06-07,Mobile Recharge,954.59,Debit Card


In [11]:
unique_transactions = df['Description'].unique()

In [12]:
def hop(start,stop,step):
    for i in range(start,stop,step):
        yield i
    yield stop
index_list = list(hop(0,len(unique_transactions),30))
index_list

[0, 30, 41]

In [13]:
def categorize_transactions(transaction_names,llm):
    response = (llm.invoke("Can you categories the data of expenses with the appropriate catogory for example food , travel,entertainment and etc but the order should be like Food - Swiggy , the whole should be in this form giving (do not group them)..."+ "," + transaction_names +"Give the output in the format of Category - Transactions ")).content
    response = response.split('\n')
    cleaned_data = [item.replace("*", "") for item in response]
    for item in cleaned_data:
        print(item)
    categories_df = pd.DataFrame({'Transaction vs category': cleaned_data})
    categories_df[['Category', 'Transaction']] = categories_df['Transaction vs category'].str.split(' - ', expand=True)
    
    return categories_df

In [14]:
categories_df_all = pd.DataFrame()
for i in range(0,len(index_list)-1):
    transaction_name = unique_transactions[index_list[i]:index_list[i+1]]
    transaction_name = ','.join(transaction_name)
    
    categories_df = categorize_transactions(transaction_name,llm)
    categories_df_all = pd.concat([categories_df_all,categories_df],ignore_index=True)

Here's the categorization of your expenses:

   Food - McDonald's
   Entertainment - Hotstar
   Shopping - Big Bazaar
   Travel - Metro Ticket
   Mobile & Internet - Mobile Recharge
   Health - Medicine Purchase
   Education - Udemy Course
   Gifts - Gift Purchase
   Savings & Investments - Fixed Deposit
   Food - Swiggy Order
   Health & Fitness - Gym Membership
   Entertainment - Concert Pass
   Charity - Charity
   Charity - Temple Donation
   Utilities - Water Bill
   Travel - Train Ticket
   Shopping - Book Purchase
   Food - Reliance Fresh
   Food - Local Kirana
   Education - College Fees
   Uncategorized - Unplanned Expense
   Shopping - Amazon Purchase
   Food - Local Market
   Education - Coaching Fee
   Travel - Ola Ride
   Travel - Petrol Pump
   Savings & Investments - Stock Purchase
   Savings & Investments - Mutual Fund SIP
   Mobile & Internet - Wi-Fi Bill
   Food - Domino's
Here's the categorization of your expenses:

   Food - Swiggy
   Food - Zomato Order
   Healthca

In [15]:
categories_df_all

Unnamed: 0,Transaction vs category,Category,Transaction
0,Here's the categorization of your expenses:,Here's the categorization of your expenses:,
1,,,
2,Food - McDonald's,Food,McDonald's
3,Entertainment - Hotstar,Entertainment,Hotstar
4,Shopping - Big Bazaar,Shopping,Big Bazaar
5,Travel - Metro Ticket,Travel,Metro Ticket
6,Mobile & Internet - Mobile Recharge,Mobile & Internet,Mobile Recharge
7,Health - Medicine Purchase,Health,Medicine Purchase
8,Education - Udemy Course,Education,Udemy Course
9,Gifts - Gift Purchase,Gifts,Gift Purchase


In [16]:
categories_df_all['Transaction'] = categories_df_all['Transaction'].str.replace(r'\d+\.\s+', '')
categories_df_all

Unnamed: 0,Transaction vs category,Category,Transaction
0,Here's the categorization of your expenses:,Here's the categorization of your expenses:,
1,,,
2,Food - McDonald's,Food,McDonald's
3,Entertainment - Hotstar,Entertainment,Hotstar
4,Shopping - Big Bazaar,Shopping,Big Bazaar
5,Travel - Metro Ticket,Travel,Metro Ticket
6,Mobile & Internet - Mobile Recharge,Mobile & Internet,Mobile Recharge
7,Health - Medicine Purchase,Health,Medicine Purchase
8,Education - Udemy Course,Education,Udemy Course
9,Gifts - Gift Purchase,Gifts,Gift Purchase


In [17]:
df_new = df
df_new = pd.merge(df_new, categories_df_all, left_on='Description', right_on='Transaction', how='left')
df_new = df_new.drop(columns=['Transaction vs category','Transaction'])

In [22]:
df_new.sample()

Unnamed: 0,Date,Description,Amount (INR),Payment Method,Category
145,2025-01-23,Petrol Pump,2573.01,Debit Card,Travel


In [25]:
df_new['Text'] = df_new.apply(lambda x: f"Spent ₹{x['Amount (INR)']} on {x['Category']} at {x['Description']} on {x['Date']} using payment method {x['Payment Method']}", axis=1)

In [26]:
df_new

Unnamed: 0,Date,Description,Amount (INR),Payment Method,Category,Text
0,2025-02-10,McDonald's,4283.83,Credit Card,Food,Spent ₹4283.83 on Food at McDonald's on 202...
1,2024-03-06,Hotstar,1399.45,UPI,Entertainment,Spent ₹1399.45 on Entertainment at Hotstar ...
2,2024-11-12,Big Bazaar,4569.77,UPI,Shopping,Spent ₹4569.77 on Shopping at Big Bazaar on...
3,2024-11-16,Metro Ticket,2050.67,UPI,Travel,Spent ₹2050.67 on Travel at Metro Ticket on...
4,2024-06-07,Mobile Recharge,954.59,Debit Card,Mobile & Internet,Spent ₹954.59 on Mobile & Internet at Mobil...
...,...,...,...,...,...,...
224,2024-12-28,Hotstar,703.08,UPI,Entertainment,Spent ₹703.08 on Entertainment at Hotstar o...
225,2025-02-06,Temple Donation,1945.02,UPI,Charity,Spent ₹1945.02 on Charity at Temple Donatio...
226,2024-05-29,Stock Purchase,2172.96,Credit Card,Savings & Investments,Spent ₹2172.96 on Savings & Investments at ...
227,2024-02-27,Mobile Recharge,4683.61,Credit Card,Mobile & Internet,Spent ₹4683.61 on Mobile & Internet at Mobi...


In [33]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model ="models/embedding-001"
)

229

In [64]:
from langchain.schema import Document
import faiss
from langchain.vectorstores import FAISS
import numpy as np
from langchain.docstore.document import Document

texts = df_new['Text'].tolist()
documents = [Document(page_content=text) for text in texts]
vectorstore = FAISS.from_documents(documents,embeddings)
vectorstore.save_local("faiss_index") 

In [79]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are an AI assistant that answers questions in good detail not leaving a single doubt in the user, based on the user's transaction history and .

Context:
{context}

Question: {question}

Provide a well-structured response using the given context. and dont shate the Context with the user
"""
)

In [80]:
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain

qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt_template)

answer_generation_chain = RetrievalQA(
    retriever=vectorstore.as_retriever(),
    combine_documents_chain=qa_chain
)

In [86]:
query = "Can you guide how can i save the maximum"

response = answer_generation_chain.invoke(query)
cleaned_response = response["result"].replace("*", "").strip()
print(cleaned_response)

# response = answer_generation_chain.invoke(query)

# print(response["result"]) 

Okay, I can definitely guide you on how to maximize your savings based on your transaction history. Here's a breakdown of your current savings behavior and some suggestions for improvement:

Analysis of Your Current Savings & Investment Habits:

   Investment Methods: You are currently utilizing three primary methods for savings and investments:
       Mutual Fund SIPs (Systematic Investment Plans): You've made multiple investments through SIPs, which is a good practice for long-term wealth creation.
       Stock Purchases: You've also invested in stocks, indicating a willingness to explore potentially higher-growth investments.
   Payment Methods: You are using a combination of payment methods:
       Debit Card: Used for one Mutual Fund SIP.
       Credit Card: Used for Stock Purchase and a Mutual Fund SIP.
       UPI: Used for one Mutual Fund SIP.
   Frequency: You have been investing at different times of the year, indicating a somewhat irregular investment schedule.

Strategies to