In [1]:
# import torch

# if torch.cuda.is_available():
#     print(f"✅ GPU is available: {torch.cuda.get_device_name(0)}")
# else:
#     print("❌ No GPU detected, running on CPU")


In [2]:
import pandas as pd

file_path = "./data/Updated_Novel_Team_Dataset.xlsx"
xls = pd.ExcelFile(file_path)

print("Detected Sheets:", xls.sheet_names)

sheets_data = {sheet: pd.read_excel(xls, sheet_name=sheet) for sheet in xls.sheet_names}

for sheet_name, df in sheets_data.items():
    print(f"\nSheet: {sheet_name}")
    print(df.head())


Detected Sheets: ['Customer Profile (Org)', 'Customer Profile (Individual)', 'Social Media Sentiment', 'Transaction History']

Sheet: Customer Profile (Org)
                       Industry  \
0           E-commerce & Retail   
1         Entertainment & Media   
2  Healthcare & Pharmaceuticals   
3          Consumer Electronics   
4           E-commerce & Retail   

                                     Financial Needs  \
0          Crowdfunding, Venture Capital, R&D Grants   
1          Crowdfunding, Venture Capital, R&D Grants   
2  Supply Chain Financing, Inventory Loans, Retai...   
3  Supply Chain Financing, Inventory Loans, Retai...   
4       Agriculture Loans, Green Loans, Microfinance   

                                         Preferences Revenue (in dollars)  \
0  Online Banking, Mobile App, Digital Transforma...            330M-147M   
1  Farm Equipment, Distribution Channels, Global ...            595M-812M   
2  Regulatory Compliance, Risk Management, Blockc...            

In [3]:
file_path = "./data/Updated_Novel_Team_Dataset.xlsx"
xls = pd.ExcelFile(file_path)

customer_profile = pd.read_excel(xls, sheet_name="Customer Profile (Individual)")

customer_profile["Co-Borrower"] = customer_profile["Co-Borrower"].fillna(
    customer_profile["Family Member Name"]
)

customer_profile.to_csv("./data/Customer_Profile_Updated.csv", index=False)

In [4]:
import pandas as pd

file_path = "./data/Updated_Novel_Team_Dataset.xlsx"
xls = pd.ExcelFile(file_path)
print(xls.sheet_names)

if "Social Media Sentiment" in xls.sheet_names:
    sentiment_data = pd.read_excel(xls, sheet_name="Social Media Sentiment")
    print("✅ Social Media Sentiment Data Loaded Successfully!")
else:
    print("❌ ERROR: 'Social Media Sentiment' sheet not found!")

print(sentiment_data.head())


['Customer Profile (Org)', 'Customer Profile (Individual)', 'Social Media Sentiment', 'Transaction History']
✅ Social Media Sentiment Data Loaded Successfully!
   Customer_id Post_id   Platform  \
0  CUST_IND_54  POST_1   LinkedIn   
1  CUST_IND_17  POST_2   LinkedIn   
2  CUST_IND_14  POST_3  Instagram   
3  CUST_IND_35  POST_4   LinkedIn   
4   CUST_IND_3  POST_5   LinkedIn   

                                             Content               Timestamp  \
0  Navigating fluctuations raw material prices!! ... 2024-12-13 09:57:46.795   
1         Loving the new fashion trends this season! 2024-10-26 09:57:46.795   
2  Just finished a 5K run! Need new running shoes... 2024-11-18 09:57:46.795   
3  Exciting collaborations coming soon!! Guess wh... 2024-11-17 09:57:46.795   
4  Need to start saving more. Thinking of opening... 2025-02-16 09:57:46.795   

   Sentiment_Score     Intent  
0                0  Complaint  
1                1     Praise  
2                1    Inquiry  
3       

In [8]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from scipy.special import softmax

MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to("cuda" if torch.cuda.is_available() else "cpu")

def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to("cuda" if torch.cuda.is_available() else "cpu")
    with torch.no_grad():
        outputs = model(**inputs)
    scores = softmax(outputs.logits.cpu().numpy())[0]
    return scores[2]  # Positive sentiment score

sentiment_data["Sentiment Score"] = sentiment_data["Content"].apply(lambda x: predict_sentiment(str(x)))

sentiment_data.to_csv("./data/Sentiment_Segmentation.csv", index=False)

print("✅ Sentiment Analysis Completed! Data Saved.")


✅ Sentiment Analysis Completed! Data Saved.


In [14]:
def categorize_sentiment(score):
    if score >= 0.7:
        return "Positive"
    elif score >= 0.4:
        return "Neutral"
    else:
        return "Negative"

sentiment_data["Sentiment Category"] = sentiment_data["Sentiment Score"].apply(categorize_sentiment)

sentiment_data.to_csv("./data/Sentiment_Categorized.csv", index=False)

print("✅ Sentiment Segmentation Completed! Data saved.")


✅ Sentiment Segmentation Completed! Data saved.


In [19]:
bank_services = {
    "Loans": ["Personal Loan", "Home Loan (Mortgage)", "Auto Loan", "Student Loan", "Business Loan"],
    "Credit Cards": ["Basic Credit Card", "Premium Credit Card", "Secured Credit Card", "Business Credit Card"],
    "Investments": ["High-Yield Savings Account", "Certificate of Deposit (CD)", "Retirement Plan (401k, IRA)", "Stock Market Investment"],
    "Banking Services": ["Debt Management Plan", "Overdraft Protection", "Joint Account", "Wealth Management"]
}

bank_services_df = pd.DataFrame([(category, service) for category, services in bank_services.items() for service in services],
                                columns=["Category", "Service"])

bank_services_df.to_csv("./data/Bank_Services.csv", index=False)

print("✅ Bank Services List Created and Saved!")


✅ Bank Services List Created and Saved!


Reccommendation engine

In [24]:
import pandas as pd

sentiment_data = pd.read_csv("./data/Sentiment_Categorized.csv")

file_path = "./data/Updated_Novel_Team_Dataset.xlsx"
xls = pd.ExcelFile(file_path)

if "Transaction History" in xls.sheet_names:
    transaction_data = pd.read_excel(xls, sheet_name="Transaction History")
    print("✅ Transaction History Loaded!")
else:
    raise ValueError("❌ ERROR: 'Transaction History' sheet not found!")

bank_services = pd.read_csv("./data/Bank_Services.csv")

transaction_data.rename(columns={"Customer ID": "Customer_id"}, inplace=True)

merged_data = transaction_data.merge(sentiment_data[['Customer_id', 'Sentiment Category']], on='Customer_id', how='left')

merged_data["Sentiment Category"].fillna("Neutral", inplace=True)

print("✅ Data Merged Successfully!")

def recommend_services(row):
    recommendations = []

    if row["Sentiment Category"] == "Positive" and row["Amount (In Dollars)"] > 5000:
        recommendations.append("Premium Credit Card")
        recommendations.append("Investment Plan")

    elif row["Sentiment Category"] == "Neutral" and "loan" in str(row.get("Search_History", "")).lower():
        recommendations.append("Personal Loan")

    elif row["Sentiment Category"] == "Negative" and row["Amount (In Dollars)"] > 3000:
        recommendations.append("Low-Interest Credit Card")
        recommendations.append("Debt Management Consultation")

    if not recommendations:
        recommendations.append("Savings Account Upgrade")

    return ", ".join(recommendations)

merged_data["Recommendations"] = merged_data.apply(recommend_services, axis=1)

merged_data.to_csv("./data/Recommendations.csv", index=False)

print("✅ Recommendations Generated and Saved!")


✅ Transaction History Loaded!
✅ Data Merged Successfully!
✅ Recommendations Generated and Saved!


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged_data["Sentiment Category"].fillna("Neutral", inplace=True)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


