In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import FeatureUnion
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
import joblib
import re
from imblearn.over_sampling import SMOTE

In [2]:
# 2. Load and clean the dataset
df_1 = pd.read_excel("C:/Users/HP/Desktop/Hogist/Data For DM.xlsx", sheet_name="Sheet 1")

In [3]:
# Load Excel file
excel_file = pd.ExcelFile("C:/Users/HP/Desktop/Hogist/Data For DM.xlsx")

In [4]:

# Get sheet names (optional)
print(excel_file.sheet_names)

['Sheet 1', 'Sheet 2', 'Sheet 3', 'Sheet 4', 'Sheet 5', 'sheet 6', 'Sheet 7', 'Sheet 8', 'Sheet 9']


In [5]:
# Load specific sheets
df1 = excel_file.parse("Sheet 1")
df2 = excel_file.parse("Sheet 2")
# Load specific sheets
df3 = excel_file.parse("Sheet 3")
df4 = excel_file.parse("Sheet 4")

In [6]:
# Combine the sheets (assuming they have the same columns)
combined_df = pd.concat([df1, df2, df3, df4], ignore_index=True)

In [7]:
# Optional: drop duplicates or reset index
combined_df = combined_df.drop_duplicates().reset_index(drop=True)

In [8]:
# Preview
print(combined_df.head())

   SL NO                 DATE LEAD STATUS  LEADS FROM  \
0    1.0  2023-01-08 00:00:00         COLD     BIVINA   
1    2.0  2023-02-08 00:00:00         COLD      AYONA   
2    3.0  2023-07-09 00:00:00         COLD      AYONA   
3    4.0  2023-07-09 00:00:00         COLD     BIVINA   
4    5.0           28-08-2023         COLD      AYONA   

                     LOCATION  \
0                     Noombal   
1      AMBATTUR & GOPALAPURAM   
2                Arumbakkam\n   
3  RMZ Millenia Tech park\n\n   
4                    Teynapet   

                                        COMPANY NAME  \
0                                KAG India pvt ltd\n   
1                        ALCRAFT THERMO TECHNOLOGIES   
2                  Aymarkz industrial tech pvt ltd\n   
3                                    Athena Health\n   
4  Co chartered accountants, standard chartered (...   

             CONTACT PERSON   PHONE NUMBER  \
0  Hari( Genaral Manager)\n  9840400678 \n   
1                    PALANI  

In [9]:
combined_df.columns

Index([                   'SL NO',                     'DATE',
                   'LEAD STATUS ',               'LEADS FROM',
                       'LOCATION',             'COMPANY NAME',
                 'CONTACT PERSON',             'PHONE NUMBER',
                        'MAIL ID',               'REQUIRMENT',
                           'TYPE',            'REQUIRED DATE',
                     'HEAD COUNT',       'NO OF WORKING DAYS',
                  'CLIENT BUDGET',            'VENDOR PRICE ',
                           'MENU',                  'REMARKS',
              2023-10-08 00:00:00,        2023-11-08 00:00:00,
                      '16/8/2023',                '17/8/2023',
                      '21/8/2023',                '22/8/2023',
              2023-01-09 00:00:00,        2023-05-09 00:00:00,
              2023-06-09 00:00:00,                '29/9/2023',
              2013-03-10 00:00:00,        2023-12-10 00:00:00,
                     '16-10-2023',               '17-10

In [10]:
# Assuming 'combined_df' is your final merged DataFrame
combined_df.to_excel("C:/Users/HP/Desktop/Hogist/Combined_Leads.xlsx", index=False)

In [11]:
# Drop unnamed and date-based columns (keep only string-named columns)
df = combined_df.loc[:, combined_df.columns.map(lambda x: isinstance(x, str))]

In [12]:
df['LOCATION'].unique()

array(['Noombal', 'AMBATTUR & GOPALAPURAM', ' Arumbakkam\n',
       'RMZ Millenia Tech park\n\n', 'Teynapet', 'Ambattur',
       ' Savitha dental college poonamallee high road.',
       'Kandanchavadi\n', ' ambattur\n', 'Tharamani',
       'Padi Chennai Tamil Nadu 600058\n', ' Ambattur',
       'Sidco industrial estate thirumudivakkam', ' Puthulakkam.',
       'Kundrathur,AR edaimedai,Thirumudipakkam',
       ' Manikam lane, guindy. ', 'Industrial estate Ambattur.',
       'No:A-31, MEPZ-SEZ, MEPZ, WEST TAMBARAM,\n',
       'Fayola Towers, 3A, 200 Feet Radial Rd, Pallikaranai',
       'IRUNGATTU KOTTAI\n\n', 'AMBATTUR & MYLAPORE', 'GUINTY',
       ' Industrial EstateAmbattur\n\n', 'Industrial Estate ambattur',
       'SIDCO industrial estate poonamalle.\n', 'Ponneri\n', 'Coimbatore',
       'OMR Nehru Nagar Kottivakkam',
       'Dowlath Towers 8th floor,Taylors Rd, Kilpauk, Chennai,\n',
       'Riveira Park, 4th Main Rd Ext, Adyar House, Kotturpuram',
       'Sipcot Irungattu kottai',


In [13]:
df.columns

Index(['SL NO', 'DATE', 'LEAD STATUS ', 'LEADS FROM', 'LOCATION',
       'COMPANY NAME', 'CONTACT PERSON', 'PHONE NUMBER', 'MAIL ID',
       'REQUIRMENT', 'TYPE', 'REQUIRED DATE', 'HEAD COUNT',
       'NO OF WORKING DAYS', 'CLIENT BUDGET', 'VENDOR PRICE ', 'MENU',
       'REMARKS', '16/8/2023', '17/8/2023', '21/8/2023', '22/8/2023',
       '29/9/2023', '16-10-2023', '17-10-2023', '18-10-2023', '30-10-2024',
       'Unnamed: 34', 'FOLLOW UP BY', 'Vendor price',
       'OTHER CHARGES FOR HOGIST', 'Unnamed: 28', 'Unnamed: 29', '31-10-2023',
       'BUDGET', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20',
       'Unnamed: 21', 'Unnamed: 22', 'Date of Meeting', 'Company Name',
       'Location', 'Company Verticle', 'Contact Person', 'Designation',
       'Contact Number', 'EMP Strength', 'Current Strength', 'Requirment',
       'COP/CC', 'Remarks', 'Next F/up date', 'Order Status',
       'Sourcing Person'],
      dtype='object')

In [14]:
# Clean column names
df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]

In [15]:
# Add a dummy target column (converted: 1 or 0)
df["converted"] = np.random.choice([0, 1], size=len(df))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["converted"] = np.random.choice([0, 1], size=len(df))


In [16]:
def clean_text(text):
    text = re.sub(r'\d+', '', text)  # remove numbers
    text = re.sub(r'\s+', ' ', text)  # remove extra spaces
    text = text.strip().lower()
    return text

In [17]:
df['remarks']

Unnamed: 0,remarks,remarks.1
0,Meeting Done,
1,Customer in dilemma on menu and price.,
2,Profile sent,
3,Meeting Done -failed,
4,They will employees bring food from home.Once ...,
...,...,...
215,,Order Confirmed (Trail service starts from 02/...
216,,Waiting for price confirmation from the client.
217,,Waiting for vendor price
218,,Waiting for the pricing confirmation from the ...


In [18]:
# Normalize all column headers
df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]

In [19]:
print(df.columns.tolist())

['sl_no', 'date', 'lead_status', 'leads_from', 'location', 'company_name', 'contact_person', 'phone_number', 'mail_id', 'requirment', 'type', 'required_date', 'head_count', 'no_of_working_days', 'client_budget', 'vendor_price', 'menu', 'remarks', '16/8/2023', '17/8/2023', '21/8/2023', '22/8/2023', '29/9/2023', '16-10-2023', '17-10-2023', '18-10-2023', '30-10-2024', 'unnamed:_34', 'follow_up_by', 'vendor_price', 'other_charges_for_hogist', 'unnamed:_28', 'unnamed:_29', '31-10-2023', 'budget', 'unnamed:_17', 'unnamed:_18', 'unnamed:_19', 'unnamed:_20', 'unnamed:_21', 'unnamed:_22', 'date_of_meeting', 'company_name', 'location', 'company_verticle', 'contact_person', 'designation', 'contact_number', 'emp_strength', 'current_strength', 'requirment', 'cop/cc', 'remarks', 'next_f/up_date', 'order_status', 'sourcing_person', 'converted']


In [20]:
print(df.columns[df.columns.duplicated()])

Index(['vendor_price', 'company_name', 'location', 'contact_person',
       'requirment', 'remarks'],
      dtype='object')


In [21]:
df = df.loc[:, ~df.columns.duplicated()]

In [22]:
df.columns

Index(['sl_no', 'date', 'lead_status', 'leads_from', 'location',
       'company_name', 'contact_person', 'phone_number', 'mail_id',
       'requirment', 'type', 'required_date', 'head_count',
       'no_of_working_days', 'client_budget', 'vendor_price', 'menu',
       'remarks', '16/8/2023', '17/8/2023', '21/8/2023', '22/8/2023',
       '29/9/2023', '16-10-2023', '17-10-2023', '18-10-2023', '30-10-2024',
       'unnamed:_34', 'follow_up_by', 'other_charges_for_hogist',
       'unnamed:_28', 'unnamed:_29', '31-10-2023', 'budget', 'unnamed:_17',
       'unnamed:_18', 'unnamed:_19', 'unnamed:_20', 'unnamed:_21',
       'unnamed:_22', 'date_of_meeting', 'company_verticle', 'designation',
       'contact_number', 'emp_strength', 'current_strength', 'cop/cc',
       'next_f/up_date', 'order_status', 'sourcing_person', 'converted'],
      dtype='object')

In [23]:
df['location'].unique()

array(['Noombal', 'AMBATTUR & GOPALAPURAM', ' Arumbakkam\n',
       'RMZ Millenia Tech park\n\n', 'Teynapet', 'Ambattur',
       ' Savitha dental college poonamallee high road.',
       'Kandanchavadi\n', ' ambattur\n', 'Tharamani',
       'Padi Chennai Tamil Nadu 600058\n', ' Ambattur',
       'Sidco industrial estate thirumudivakkam', ' Puthulakkam.',
       'Kundrathur,AR edaimedai,Thirumudipakkam',
       ' Manikam lane, guindy. ', 'Industrial estate Ambattur.',
       'No:A-31, MEPZ-SEZ, MEPZ, WEST TAMBARAM,\n',
       'Fayola Towers, 3A, 200 Feet Radial Rd, Pallikaranai',
       'IRUNGATTU KOTTAI\n\n', 'AMBATTUR & MYLAPORE', 'GUINTY',
       ' Industrial EstateAmbattur\n\n', 'Industrial Estate ambattur',
       'SIDCO industrial estate poonamalle.\n', 'Ponneri\n', 'Coimbatore',
       'OMR Nehru Nagar Kottivakkam',
       'Dowlath Towers 8th floor,Taylors Rd, Kilpauk, Chennai,\n',
       'Riveira Park, 4th Main Rd Ext, Adyar House, Kotturpuram',
       'Sipcot Irungattu kottai',


In [24]:
# Fill missing values

df["remarks"] = df["remarks"].fillna("none").apply(lambda x: clean_text(str(x)))
df["requirment"] = df["requirment"].fillna("unknown")
df["company_name"] = df["company_name"].fillna("unknown")
df["location"] = df["location"].fillna("unknown")
df["lead_status"] = df["lead_status"].fillna("unknown")
df["head_count"] = pd.to_numeric(df["head_count"], errors='coerce').fillna(0).astype(int)
df["client_budget"] = pd.to_numeric(df["client_budget"], errors='coerce').fillna(0)


In [25]:
print(df["lead_status"].unique())


['COLD' 'Not interested ' 'WON' 'Hot ' 'Hot' 'cOLD' 'WARM' 'HOT'
 'NOT INTERESTED' 'NOT INTERESTED ' 'WON ' 'Won' 'won' 'unknown']


In [26]:
# Normalize and map all statuses to cleaned categories
def map_lead_status(status):
    status = status.strip().lower()
    if status in ["hot", "won"]:
        return "converted"
    elif status == "warm":
        return "in_progress"
    elif "not interested" in status:
        return "not_interested"
    else:
        return "new"

# Apply mapping
df["converted"] = df["lead_status"].apply(map_lead_status)


In [27]:
# Map string labels to numerical classes
status_to_num = {
    "not_interested": 0,
    "converted": 1,
    "in_progress": 2,
    "new": 3
}
df["converted_num"] = df["converted"].map(status_to_num)
y = df["converted_num"]


In [28]:
#df["converted"] = df["lead_status"].str.lower().apply(
    #lambda x: 1 if x in ["hot", "cold", "won", "warm", "not interested"] else 0
#)

In [29]:
# 3. Feature selection
features = [
    "location", "company_name", "remarks", "lead_status",
    "head_count", "client_budget"
]
target = "converted_num"

X = df[features]
y = df[target]

In [30]:
# 4. Preprocessing pipeline
text_features = ["remarks","requirment"]
categorical_features = ["location", "company_name", "lead_status"]
numeric_features = ["head_count", "client_budget"]

preprocessor = ColumnTransformer(
    transformers=[
        ("text", TfidfVectorizer(), "remarks"),
        ("cat", ColumnTransformer(
            [("label_enc", LabelEncoder(), col) for col in categorical_features], remainder="drop"), categorical_features),
        ("num", StandardScaler(), numeric_features)
    ]
)

In [31]:
for col in categorical_features:
    le = LabelEncoder()
    if isinstance(X[col], pd.DataFrame):
        X[col] = le.fit_transform(X[col].iloc[:, 0])  # if it's accidentally a DataFrame, get the first column
    else:
        X[col] = le.fit_transform(X[col].astype(str))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col].astype(str))


In [32]:
# TF-IDF for remarks
tfidf = TfidfVectorizer(stop_words="english",
    max_features=100,
    ngram_range=(1, 2))
tfidf_matrix = tfidf.fit_transform(df["remarks"])  # shape: (220, 100) ✅

In [33]:
def clean_location(text):
    return text.strip().lower().replace("\n", "").replace(".", "").replace(",", "")

def clean_company_name(text):
    return text.strip().lower().replace("\n", "").replace(".", "").replace(",", "")

def clean_lead_status(text):
    return text.strip().lower().replace("\n", "").replace(".", "").replace(",", "")

In [34]:
company_encoder = LabelEncoder()
df["company_name_encoder"] = df["company_name"].astype(str).apply(clean_company_name)
company_encoder.fit(df["company_name_encoder"])

# Save it
joblib.dump(company_encoder, "company_encoder.pkl")

location_encoder = LabelEncoder()
df["location_name_encoder"] = df["location"].astype(str).apply(clean_location)
location_encoder.fit_transform(df["location_name_encoder"])

joblib.dump(location_encoder, "location_encoder.pkl")

status_encoder = LabelEncoder()
df["lead_status_encoder"] = df["lead_status"].astype(str).apply(clean_lead_status)
status_encoder.fit(df["lead_status_encoder"])

# Save it
joblib.dump(status_encoder, "lead_status_encoder.pkl")

['lead_status_encoder.pkl']

In [35]:
df["company_name_encoder"].unique()

array(['kag india pvt ltd', 'alcraft thermo technologies',
       'aymarkz industrial tech pvt ltd', 'athena health',
       'co chartered accountants standard chartered ( there are 4 branches',
       'on load gears', 'cross manufacturing company',
       'dun & bradstreet technologies and data services',
       'electro flux equipments pvt ltd', 'jeevan pack systems',
       'datatracks services private limited', 'metallic components',
       'bharath coal chemicals ltdd', 'eucare pharmacuticals(p)ltd',
       'softgel healthcare pvt ltd', 'fine tech enterprises',
       'mashiv moulds  dies', 'quadsel system pvt ltd(software company)',
       'zymass auto systems', 'quintessence fragrances (p) ltd',
       'technerds solutions private limited', 'esga pharma',
       'gourmet popcornica llp', 'htl ltd', 'kun aerospace pvt ltd',
       'vortex engineering pvt ltd', 'ostberg india pvt ltd',
       'nissei electric india p ltd',
       'mektron manufacturing india pvt ltd',
       'cele

In [36]:
df["location_name_encoder"].unique()

array(['noombal', 'ambattur & gopalapuram', 'arumbakkam',
       'rmz millenia tech park', 'teynapet', 'ambattur',
       'savitha dental college poonamallee high road', 'kandanchavadi',
       'tharamani', 'padi chennai tamil nadu 600058',
       'sidco industrial estate thirumudivakkam', 'puthulakkam',
       'kundrathurar edaimedaithirumudipakkam', 'manikam lane guindy',
       'industrial estate ambattur',
       'no:a-31 mepz-sez mepz west tambaram',
       'fayola towers 3a 200 feet radial rd pallikaranai',
       'irungattu kottai', 'ambattur & mylapore', 'guinty',
       'industrial estateambattur', 'sidco industrial estate poonamalle',
       'ponneri', 'coimbatore', 'omr nehru nagar kottivakkam',
       'dowlath towers 8th floortaylors rd kilpauk chennai',
       'riveira park 4th main rd ext adyar house kotturpuram',
       'sipcot irungattu kottai',
       'ambedkar roadsubbaraya nagarkodambakkam',
       'orakkadu post sholavaram block sekkanjerired hillskaranodai',
      

In [37]:
# Combine features
X_combined = np.hstack([
    tfidf_matrix.toarray(),
    X[categorical_features + numeric_features].values
])


In [38]:
# 5. Split the data
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)

In [39]:
# 6. Train a RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [40]:
# 7. Evaluate
preds = model.predict(X_test)
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      0.83      0.91        12
           2       0.50      1.00      0.67         1
           3       0.90      0.97      0.93        29

    accuracy                           0.89        44
   macro avg       0.60      0.70      0.63        44
weighted avg       0.88      0.89      0.88        44



In [41]:
def safe_label_transform(encoder, value):
    value = value.lower().strip()
    if value in encoder.classes_:
        return encoder.transform([value])[0]
    else:
        print(f"⚠️ Unseen value in encoder: {value}")
        return -1  # Or len(encoder.classes_) if you handled that in training

In [42]:
# 8. Save model and vectorizers
joblib.dump(model, "lead_conversion_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")
print("✅ Model trained and saved for conversion prediction!")


✅ Model trained and saved for conversion prediction!


In [43]:
# Load encoders
company_encoder = joblib.load("company_encoder.pkl")
location_encoder = joblib.load("location_encoder.pkl")
status_encoder = joblib.load("lead_status_encoder.pkl")

# Sample input lead
lead = {
    "location": "Ambattur",
    "company_name": "cts",
    "lead_status": "warm",
    "remarks": "waiting for start date confirmation",
    "head_count": 300,
    "client_budget": 200,
    "contacted_before": 1,
    "meeting_done": 1
}

# Use actual values from the lead
location = safe_label_transform(location_encoder, lead["location"])
company = safe_label_transform(company_encoder, lead["company_name"])
lead_status = safe_label_transform(status_encoder, lead["lead_status"])
# Transform remarks
remarks_vector = tfidf.transform([lead["remarks"]]).toarray()


# Combine features
feature_vector = np.hstack([
    remarks_vector,
    [[
        location,
        company,
        lead_status,
        lead["head_count"],
        lead["client_budget"],
        lead["contacted_before"],
        lead["meeting_done"]
    ]]
])


⚠️ Unseen value in encoder: cts


In [44]:
print(location_encoder.classes_)

['151 sidco 12 main road'
 '67 bazullah rd t nagar chennai tamil nadu 600017'
 '7th floor iifl tower 143 perungudi chennai 600096'
 'alagappa roadvalsarakkam' 'alandur' 'ambattur' 'ambattur & gopalapuram'
 'ambattur & mylapore' 'ambedkar roadsubbaraya nagarkodambakkam'
 'aminjikarai' 'arumbakkam' 'ascendas 12th floortaramani' 'chetpet'
 'coddissia coimbatore' 'coimbatore' 'dlframapuramchennai'
 'dowlath towers 8th floortaylors rd kilpauk chennai' 'ecr' 'egmore'
 'ennore' 'ennore express highway ernavoor chennai - 600057'
 'fayola towers 3a 200 feet radial rd pallikaranai' 'gst road alanthur'
 'guindy' 'guinty'
 'hamid building 2nd floor anna street thousand lights chennai'
 'iitm taramani' 'industrial estate ambattur' 'industrial estateambattur'
 'irungattu kottai' 'irungatukottai' 'je' 'kandanchavadi' 'karapakkam'
 'kodambakkam' 'kootturpuram' 'kuduvancherry'
 'kundrathurar edaimedaithirumudipakkam'
 'mahindra world city chengalpattu sengundram'
 'mahindra world city thirutheri rfchen

In [45]:
print("Feature vector shape:", feature_vector.shape)
print("TF-IDF top words:", tfidf.get_feature_names_out()[:10])


Feature vector shape: (1, 107)
TF-IDF top words: ['ask' 'ask send' 'asked' 'asked menu' 'asked shared' 'budget' 'card'
 'carry' 'cash' 'cash carry']


In [46]:
print(df["converted_num"].value_counts())

converted_num
3    145
1     52
2     12
0     11
Name: count, dtype: int64


In [47]:
print(set(y))

{0, 1, 2, 3}


In [None]:
probs = model.predict_proba(feature_vector)[0]
# Class label mapping (match your training label encoding!)
label_map = {
    1: "✅ Converted (80:20)",
    2: "🕗 In Progress (60:40)",
    3: "🧊 New (30:70)",
    0: "❌ Not Interested (20:80)"
}

# Print results in a sorted order (optional: most likely first)
sorted_indices = np.argsort(probs)[::-1]
print("🔍 Lead Prediction Breakdown:")
for idx in sorted_indices:
    label = label_map.get(idx, f"Class {idx}")
    print(f"{label} ➤ {probs[idx] * 100:.1f}%")

In [None]:
final_class = np.argmax(probs)
final_label = label_map[final_class]
print(f"\n🎯 Final Predicted Status: {final_label}")


#B2C Lead

In [None]:
# Load once
lead_model = joblib.load("lead_conversion_model.pkl")
company_encoder = joblib.load("company_encoder.pkl")
location_encoder = joblib.load("location_encoder.pkl")
status_encoder = joblib.load("lead_status_encoder.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")

def safe_label_transform(encoder, value):
    if value in encoder.classes_:
        return encoder.transform([value])[0]
    return -1

def calculate_lead_score(lead):
    try:
        location = safe_label_transform(location_encoder, lead.delivery_location)
        company = safe_label_transform(company_encoder, lead.company_name)
        lead_status = safe_label_transform(status_encoder, lead.lead_status)
        remarks_vector = tfidf.transform([lead.remarks or ""]).toarray()

        # Combine features
        feature_vector = np.hstack([
            remarks_vector,
            [[
                location,
                company,
                lead_status,
                lead.count or 0,
                lead.prefered_menu_budget or 0,
            ]]
        ])

        score = lead_model.predict_proba(feature_vector)[0][1]
        return round(score * 100, 2)
    except Exception as e:
        print(f"⚠️ Failed to calculate lead score for lead {lead.id}: {str(e)}")
        return 0.0