In [45]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression

In [46]:
df = pd.read_csv('crm_activities_100_voc.csv')

In [47]:
df.rename(columns={
    "POTENTIALID": "deal_id",
    "ACTIVITY_TIMESTAMP": "timestamp",
    "ACTIVITY": "activity_type",
    "CONTACTID": "contact_id",
    "ACCOUNTID": "account_id",
    "CALL_VOC": "call_voc",
    "EMAIL_VOC": "email_voc",
    "EVENT_VOC": "event_voc",
    "INVOICE_VOC": "invoice_voc",
    "QUOTE_VOC": "quote_voc",
    "SALESORDER_VOC": "salesorder_voc",
    "DEALSTAGE": "deal_stage",
    "CLASS_TAG": "class_tag",
    "INVOICEGRANDTOTAL": "deal_amount",
}, inplace=True)

df["timestamp"] = pd.to_datetime(df["timestamp"])

In [48]:
contact_features = []
engagement_weights = {"call": 1, "mail": 0.5, "meeting": 1}

for contact_id, group in df.groupby("contact_id"):
    deals = group["deal_id"].nunique()
    amount = group.drop_duplicates("deal_id")["deal_amount"].sum()
    stages = group.drop_duplicates("deal_id")["deal_stage"].nunique()
    activity_count = group.shape[0]

    # Sort timestamps and compute avg gap in days
    timestamps = group.sort_values("timestamp")["timestamp"]
    gaps = timestamps.diff().dt.days.dropna()
    activity_frequency = gaps.mean() if not gaps.empty else 30

    # Compute engagement score
    engagement_score = group["activity_type"].map(engagement_weights).sum()

    contact_features.append({
        "contact_id": contact_id,
        "num_deals": deals,
        "sum_deal_amount": amount,
        "deal_stages": stages,
        "activity_count": activity_count,
        "activity_frequency": activity_frequency,
        "engagement_score": engagement_score
    })

features_df = pd.DataFrame(contact_features)

In [49]:
binning_rules = {}

for column in ["num_deals", "sum_deal_amount", "deal_stages", "activity_count", "activity_frequency", "engagement_score"]:
    values = features_df[column]
    if column == "activity_frequency":
        bins = {
            "max": round(values.max(), 2),
            "avg": round(values.mean(), 2),
            "min": round(values.min(), 2)
        }
    else:
        bins = {
            "min": round(values.min(), 2),
            "avg": round(values.mean(), 2),
            "max": round(values.max(), 2)
        }
    binning_rules[column] = bins

In [50]:
def bin_score(value, thresholds, reverse=False):
    if reverse:
        if value <= thresholds["min"]:
            return 1.0
        elif value <= thresholds["avg"]:
            return 0.5
        else:
            return 0.2
    else:
        if value <= thresholds["min"]:
            return 0.2
        elif value <= thresholds["avg"]:
            return 0.5
        else:
            return 1.0

In [51]:
def compute_contact_persona_score(contact_features, weights, binning_rules):
    individual_scores = {}
    weighted_scores = []

    for key in binning_rules:
        thresholds = binning_rules[key]
        reverse = key == "activity_frequency"
        score = bin_score(contact_features[key], thresholds, reverse)
        individual_scores[key] = score
        weighted_scores.append(score * weights.get(key, 1.0))

    total_weight = sum(weights.values())
    final_score = sum(weighted_scores) / total_weight if total_weight else 0

    return {
        "contact_id": contact_features["contact_id"],
        "final_persona_score": round(final_score, 3),
        "individual_feature_scores": individual_scores
    }

In [52]:
weights = {
    "num_deals": 1.0,
    "sum_deal_amount": 0.5,
    "deal_stages": 1.0,
    "activity_count": 1.0,
    "activity_frequency": 1.0,
    "engagement_score": 1.0
}

In [53]:
scored_personas = [compute_contact_persona_score(row, weights, binning_rules) for row in contact_features]

In [54]:
df = [json_data for json_data in scored_personas]

In [55]:
# Flatten it
flat_data = []
for row in df:
    base = {
        'contact_id': row['contact_id'],
        'final_persona_score': row['final_persona_score']
    }
    base.update(row['individual_feature_scores'])  # flatten the nested dict
    flat_data.append(base)

# Create DataFrame
df = pd.DataFrame(flat_data)

In [56]:
df.to_csv("scored_personas_v1.csv", index=False)

In [57]:
def score_new_contact(contact_id, deal_records, activity_records, weights, binning_rules, engagement_weights):
    df_deals = pd.DataFrame(deal_records)
    df_activities = pd.DataFrame(activity_records)

    merged = pd.merge(df_activities, df_deals, on="deal_id")

    deals = merged["deal_id"].nunique()
    amount = merged.drop_duplicates("deal_id")["deal_amount"].sum()
    stages = merged.drop_duplicates("deal_id")["deal_stage"].nunique()
    activity_count = merged.shape[0]

    timestamps = merged.sort_values("timestamp")["timestamp"]
    gaps = timestamps.diff().dt.days.dropna()
    activity_frequency = gaps.mean() if not gaps.empty else 30

    engagement_score = merged["activity_type"].map(engagement_weights).sum()

    contact_features = {
        "contact_id": contact_id,
        "num_deals": deals,
        "sum_deal_amount": amount,
        "deal_stages": stages,
        "activity_count": activity_count,
        "activity_frequency": activity_frequency,
        "engagement_score": engagement_score
    }

    return compute_contact_persona_score(contact_features, weights, binning_rules)


In [58]:
example_deal_records = [
    {"deal_id": "DX1", "deal_amount": 9000, "deal_stage": "Proposal"},
    {"deal_id": "DX2", "deal_amount": 14000, "deal_stage": "Won"}
]

example_activity_records = [
    {"deal_id": "DX1", "activity_type": "call", "timestamp": datetime.now() - timedelta(days=5)},
    {"deal_id": "DX1", "activity_type": "mail", "timestamp": datetime.now() - timedelta(days=3)},
    {"deal_id": "DX2", "activity_type": "meeting", "timestamp": datetime.now() - timedelta(days=1)}
]

In [59]:

new_contact_score = score_new_contact("CNew", example_deal_records, example_activity_records, weights, binning_rules, engagement_weights)
print("\nScored new contact:")
print(json.dumps(new_contact_score, indent=2))



Scored new contact:
{
  "contact_id": "CNew",
  "final_persona_score": 0.418,
  "individual_feature_scores": {
    "num_deals": 0.2,
    "sum_deal_amount": 1.0,
    "deal_stages": 1.0,
    "activity_count": 0.2,
    "activity_frequency": 0.2,
    "engagement_score": 0.2
  }
}
