In [2]:
from pymongo import MongoClient
import pandas as pd
import numpy as np
import datetime as dt
import time
from dateutil.relativedelta import relativedelta


In [4]:
Client = MongoClient("mongodb://3.20.189.28:2036/momoKashBd?readPreference=secondary&directConnection=true")
db_loan = Client["momoKashBd"]
Loans = db_loan["Loan"]
print("MongoDB connection established with success")

# Define start and end of iteration
current_date = dt.datetime(2025, 10, 1)
final_date = dt.datetime(2025, 10, 31)

while current_date <= final_date:
    date_start = current_date
    date_end = date_start + relativedelta(months=1) - dt.timedelta(days=1)

    print(f"\nProcessing loans from {date_start.strftime('%B %Y')}...")

    start = time.time()

    cursor = Loans.find(
        { 
            "dateTime": {"$gte": date_start, "$lte": date_end},
        },
        {
            "_id": 1, 
            "loanAmount" : 1,
            "amountPaid" : 1,
            "dueDate" : 1,
            "loanDate" : 1,
            "dueAmount" :1,
            "amountwithInterest" : 1,
            "loanStatus" : 1,
            "interest_rate" : 1,
            "clientID" : 1, 
            "bonusAmount" : 1,
            "penaltyAmounnt": 1,
            "amountDisbursed" : 1,  
            "totalDueAmount" : 1,
            "dateTime": 1
        })

    loans_df = pd.DataFrame(list(cursor))

    if loans_df.empty:
        print("No completed loans were made during this period.")
    else:
        for col in ["loanDate", "dueDate"]:
            loans_df[col] = pd.to_datetime(loans_df[col], dayfirst=True, errors="coerce")

        loans_df = loans_df.sort_values("dateTime", ascending=True)

        # Clean fields
        date_cols = ["loanDate", "dueDate"]
        for col in date_cols:
            loans_df[col] = pd.to_datetime(loans_df[col], dayfirst=True, errors="coerce")

        req_fields = ["dueAmount", "penaltyAmount"]
        for col in req_fields:
            if col not in loans_df.columns:
                loans_df[col] = 0
            else:
                loans_df[col] = loans_df[col].fillna(0)

        num_cols = ["loanAmount", "amountPaid", "amountwithInterest", "interest_rate",
                    "is_complete", "bonusAmount", "amountDisbursed", "totalDueAmount"]
        for col in num_cols:
            if col in loans_df.columns:
                loans_df[col] = loans_df[col].fillna(0).apply(lambda x: max(x, 0))

        text_cols = ["loanStatus", "clientID"]
        for col in text_cols:
            if col in loans_df.columns:
                loans_df[col] = loans_df[col].astype(str).str.strip()

        loans_df.index = np.arange(loans_df.shape[0])
        loans_df = loans_df.sort_values("loanDate", ascending=True)

        print("Sample of fully cleaned refunds:")
        print(loans_df.head(3))

        # Export CSV
        filename = f"loans_{date_start.strftime('%b_%Y').lower()}.csv"
        loans_df.to_csv(filename, index=False, sep=",")
        print(f"CSV export of {filename} complete")

    end = time.time()
    print(f"Fetching time: {round((end-start)/60, 2)} minutes")

    # Move to next month
    current_date += relativedelta(months=1)


MongoDB connection established with success

Processing loans from October 2025...


KeyboardInterrupt: 

In [6]:
Client = MongoClient("mongodb://3.20.189.28:2036/momoKashBd?readPreference=secondary&directConnection=true")
db_loan = Client["momoKashBd"]
Loans = db_loan["Loan"]
print("MongoDB connection established with success")
df = pd.read_csv('merged_loans_all.csv')  # Or .csv if that's your file format
# Create a new column for MongoDB _id
mongo_ids = []

for idx, row in df.iterrows():
    query = {
        "clientID": row["clientID"],
        "loanDate": row["loanDate"],
        "amountDisbursed": row["amountDisbursed"]  # Add other fields for more accurate matches if necessary
    }
    doc = Loans.find_one(query)
    if doc and '_id' in doc:
        mongo_ids.append(str(doc['_id']))  # Convert ObjectId to string
    else:
        mongo_ids.append(None)  # Or 'Not found'

# Add this as a new column to the DataFrame
df["mongo_id"] = mongo_ids

df.to_excel('merged_loans_with_id.csv', index=False)



MongoDB connection established with success



KeyboardInterrupt

