In [85]:
import getpass
import pandas as pd
import requests
import re
import openai

In [2]:
gpt_api_key = getpass.getpass("Enter your OpenAI API key: ")
os.environ["OPENAI_API_KEY"] = gpt_api_key

Enter your OpenAI API key:  ········


In [77]:
def get_chatgpt_prompt(customer_features, feature_importance, example_message, example_intervention_plan):
    """
    Generate a prompt for ChatGPT based on customer details, feature importance, an example message, and an example intervention plan.

    Inputs:
    - customer_features: List of customer feature details (e.g., CustomerID, Tenure, DiscountAmount, etc.).
    - feature_importance: List of important features and their importance scores (e.g., [('Tenure', 0.14), ('DiscountAmount', 0.07), ...]).
    - example_message: String, an example message template to personalize.
    - example_intervention_plan: String, an example intervention plan outline.

    Output:
    - A formatted prompt string for ChatGPT.
    """
    
    # Format the feature list for display in the prompt
    formatted_features = "\n".join([f"{feature}: [Insert {feature}]" for feature in customer_features])

    # Format the feature importance for display in the prompt
    formatted_importance = "\n".join([f"{feature} (~{importance} importance)" for feature, importance in feature_importance])

    # Construct the prompt with placeholders and provided examples
    prompt = f"""You are an AI assistant specializing in customer retention strategies for an e-commerce platform. 
A customer has been predicted to churn based on our predictive model. Your task is to create a customized intervention plan and draft a personalized message to encourage the customer to remain engaged with our platform.

Customer Details (Z-sorce):

{formatted_features}

Important Features Identified by the Model:

{formatted_importance}

Your Tasks:

Analyze the Customer's Profile:

- Examine the customer's data, focusing on the important features listed above.
- Identify possible reasons why the customer is likely to churn.

Develop a Customized Intervention Plan:

- Propose specific actions or offers tailored to the customer's needs and preferences.
- Ensure the plan addresses the key factors contributing to potential churn.

Draft a Personalized Message:

- Craft a message to be sent to the customer that aligns with the intervention plan.
- Use a professional, empathetic, and engaging tone.
- Encourage the customer to continue using our platform.

Guidelines:

Tailor the Intervention:

- Use the customer's specific data to make the intervention relevant.
- Consider offering discounts, personalized product recommendations, or enhanced customer support.

Company Policies and Resources:

- Ensure that the proposed actions are feasible within our company's capabilities.
- Align recommendations with our business objectives and ethical standards.

Communication Style:

- Maintain a tone consistent with our brand.
- Be clear, concise, and persuasive without being intrusive.

Output Format:

Intervention Plan:

- Outline the steps of the proposed intervention.
- Explain how each step addresses the customer's specific needs.

Draft Message to Customer:

- Provide the full text of the message to be sent.
- Personalize it based on the customer's data.

Example:

Intervention Plan:
{example_intervention_plan}

Draft Message to Customer:
{example_message}
"""
    return prompt


In [80]:
# Customer features as per the prompt
customer_features = [
    'Tenure', 'DiscountAmount', 'NumberOfStreamerFollowed', 'MaritalStatus', 
    'SatisfactionScore', 'CouponUsed', 'Complain', 'DaySinceLastOrder', 'WarehouseToHome', 
    'PreferedOrderCat'
]

# Important features and their importance values
feature_importance = [
    ('Tenure', 0.14), 
    ('DiscountAmount', 0.07), 
    ('NumberOfStreamerFollowed', 0.065), 
    ('MaritalStatus', 0.06), 
    ('SatisfactionScore', 0.055), 
    ('CouponUsed', 0.05), 
    ('Complain', 0.045), 
    ('DaySinceLastOrder', 0.04), 
    ('WarehouseToHome', 0.035), 
    ('PreferedOrderCat', 0.03)
]

# Example intervention plan as per the prompt
example_intervention_plan = """- Offer a Special Discount: Since the customer has a high DiscountAmount and hasn't used a Coupon recently, provide a personalized discount code for their preferred category (PreferedOrderCat).
- Engage Through Interests: The customer follows NumberOfStreamerFollowed streamers. Collaborate with those streamers to create targeted content or offers.
- Address Complaints: If the customer has filed a Complain, ensure that the issue has been resolved satisfactorily. Offer additional support if needed.
- Improve Satisfaction Score: Since the SatisfactionScore is low, invite the customer to provide feedback and show that their opinions are valued."""

# Example message as per the prompt
example_message = """Dear Customer,

We hope this message finds you well. We noticed that you haven't shopped with us in a while, and we wanted to reach out personally.

As a valued member of our community since [Tenure] months ago, your satisfaction is our top priority. To show our appreciation, we're offering you an exclusive discount on your favorite products in [PreferedOrderCat].

Use the code [DiscountCode] at checkout to enjoy your special offer. If there's anything we can assist you with or if you have feedback to share, please don't hesitate to let us know.

We're here to make your shopping experience exceptional.

Warm regards, [Company Name] Team"""


In [78]:
def intervention_advice(df, gpt_api_key, customer_features, feature_importance, example_message, example_intervention_plan):
    """
    Generate personalized intervention advice for customers at risk of churn using OpenAI GPT.

    Inputs:
    - df: pandas DataFrame containing customer data with relevant columns for features.
    - gpt_api_key: String, your OpenAI API key.
    - customer_features: List of strings, customer feature names to include in the prompt.
    - feature_importance: List of tuples, each containing a feature name and its importance score.
    - example_message: String, an example message template to personalize.
    - example_intervention_plan: String, an example intervention plan outline.

    Outputs:
    - df: pandas DataFrame with additional columns ['Intervention_Plan', 'Personalized_Message'] generated by GPT.
    """

    # Set up OpenAI API key
    openai.api_key = gpt_api_key

    # Function to generate GPT response for a single customer's intervention advice
    def generate_intervention(row):
        # Generate customer feature strings
        customer_feature_values = [f"{feature}: {row[feature]}" for feature in customer_features]
        
        # Generate the prompt
        prompt = get_chatgpt_prompt(
            customer_features=customer_feature_values,
            feature_importance=feature_importance,
            example_message=example_message,
            example_intervention_plan=example_intervention_plan
        )
        
        # Call the OpenAI API
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # Use "gpt-3.5-turbo" or the appropriate model you have access to
            messages=[
                {"role": "system", "content": "You are an assistant specializing in customer retention strategies."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=500
        )

        # Extract and parse the response
        reply = response['choices'][0]['message']['content']

        # Split the response into the intervention plan and message sections
        intervention_plan = None
        personalized_message = None
        try:
            # Assuming the response is formatted with "Intervention Plan:" and "Draft Message to Customer:"
            parts = reply.split("Draft Message to Customer:")
            intervention_plan = parts[0].replace("Intervention Plan:", "").strip()
            personalized_message = parts[1].strip() if len(parts) > 1 else None
        except Exception as e:
            print(f"Error parsing response: {e}\nResponse: {reply}")
        
        return pd.Series([intervention_plan, personalized_message])

    # Apply the generate_intervention function to each row of the DataFrame
    df[['Intervention_Plan', 'Personalized_Message']] = df.apply(generate_intervention, axis=1)

    return df

In [83]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Suppress warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


df = pd.read_csv(r'C:\Users\h1785\OneDrive\Desktop\NUS\IS5126_Hands_on_with_Applied_Analytics\Assignment\Final_Group_Project\Project Dataset.csv')
missing_values = df.isnull().sum()
missing_columns = missing_values[missing_values > 0]
print("Columns with missing values:")
print(missing_columns)

Columns with missing values:
Tenure                         264
WarehouseToHome                251
HourSpendOnApp                 255
OrderCount                     258
OrderAmountHikeFromlastYear    265
DaySinceLastOrder              307
CouponUsed                     256
dtype: int64


In [37]:
df = df.dropna()
df_cleaned = df.drop_duplicates()
df_cleaned['PreferredLoginDevice'] = df_cleaned['PreferredLoginDevice'].replace('Phone', 'PC')

In [38]:
print(df.dtypes)

CustomerID                       int64
Churn                            int64
Tenure                         float64
PreferredLoginDevice            object
CityTier                         int64
WarehouseToHome                float64
MaritalStatus                   object
AgeGroup                         int64
Gender                          object
HourSpendOnApp                 float64
OrderCount                     float64
OrderAmountHikeFromlastYear    float64
DaySinceLastOrder              float64
PreferedOrderCat                object
NumberOfStreamerFollowed         int64
SatisfactionScore                int64
Complain                         int64
CouponUsed                     float64
DiscountAmount                   int64
dtype: object


In [84]:
# The Z-score for “CityTier” doesn’t make much sense
df['CityTier'] = df['CityTier'].astype('category')

In [40]:
sampled_df = df.sample(frac=0.01, random_state=42)
print(sampled_df.dtypes)

CustomerID                        int64
Churn                             int64
Tenure                          float64
PreferredLoginDevice             object
CityTier                       category
WarehouseToHome                 float64
MaritalStatus                    object
AgeGroup                          int64
Gender                           object
HourSpendOnApp                  float64
OrderCount                      float64
OrderAmountHikeFromlastYear     float64
DaySinceLastOrder               float64
PreferedOrderCat                 object
NumberOfStreamerFollowed          int64
SatisfactionScore                 int64
Complain                          int64
CouponUsed                      float64
DiscountAmount                    int64
dtype: object


In [41]:
from sklearn.preprocessing import StandardScaler

# Select numeric columns
num_cols = sampled_df.select_dtypes(include=['int64', 'float64']).columns
num_cols = num_cols.drop('CustomerID', errors='ignore')  # Exclude 'customer ID' if it exists


# Standardize numeric columns
scaler = StandardScaler()
sampled_df[num_cols] = scaler.fit_transform(sampled_df[num_cols])

print(sampled_df)

      CustomerID     Churn    Tenure PreferredLoginDevice CityTier  \
178        50179 -0.557086 -0.454791         Mobile Phone        3   
5627       55628 -0.557086 -0.836013         Mobile Phone        1   
4136       54137 -0.557086  0.053505         Mobile Phone        3   
2348       52349 -0.557086 -0.581865                Phone        1   
642        50643 -0.557086  0.688875                Phone        1   
3585       53586 -0.557086 -0.327717         Mobile Phone        1   
4057       54058 -0.557086 -0.836013                Phone        1   
1875       51876 -0.557086 -0.581865         Mobile Phone        1   
5081       55082 -0.557086 -0.708939                  Pad        1   
4163       54164 -0.557086 -0.073569                  Pad        1   
4673       54674  1.795055 -0.836013                Phone        1   
321        50322 -0.557086  0.815949                  Pad        3   
549        50550 -0.557086  3.230354         Mobile Phone        1   
4658       54659 -0.

In [79]:
df_with_advice = intervention_advice(
    sampled_df,
    gpt_api_key,
    customer_features,
    feature_importance,
    example_message,
    example_intervention_plan
)

# Display the new columns
print(df_with_advice[['Intervention_Plan', 'Personalized_Message']])

                                      Intervention_Plan  \
178   ****\n\n1. **Offer Personalized Discount:** Gi...   
5627  ****\n\n1. **Special Discount Offer:** Provide...   
4136  1. Offer a Loyalty Discount: Given the custome...   
2348  1. Offer Personalized Discount: Given the cust...   
642   1. Extend a Personalized Discount: Offer a tai...   
3585  **Customer Analysis:**\n\nBased on the importa...   
4057  1. Offer Personalized Discount: Given the cust...   
1875  ****\n1. **Personalized Discount Offer:** Give...   
5081  ****\n\n1. **Offer Personalized Discount:** Si...   
4163  - Provide a Personalized Discount: Offer a spe...   
4673  ****\n\n1. **Personalized Discount Offer**: Gi...   
321   - Personalized Discount Offer: Given the signi...   
549   1. Offer a Special Discount: Since the custome...   
4658  1. Extend Special Discount: Considering the cu...   
803   Customer Retention \n\n1. Offer a Personalized...   
1360  1. Offer Personalized Discount: Given the high... 

In [81]:
# Check if the new columns have been created successfully
if 'Intervention_Plan' in df_with_advice.columns and 'Personalized_Message' in df_with_advice.columns:
    # Print one example of the intervention and message to verify
    example_row = df_with_advice.iloc[0] 
    print("Intervention Plan:")
    print(example_row['Intervention_Plan'])
    print("\nPersonalized Message:")
    print(example_row['Personalized_Message'])
else:
    print("The columns 'Intervention_Plan' and 'Personalized_Message' do not exist in the DataFrame.")


Intervention Plan:
****

1. **Offer Personalized Discount:** Given the customer's high DiscountAmount and CouponUsed, we will provide a tailored discount on products from their preferred category, Laptop & Accessory.
2. **Enhance Streamer Engagement:** As the customer follows several streamers, we will collaborate with these influencers to create exclusive content or promotions that align with the customer's interests.
3. **Address Complaints:** Since the customer has expressed dissatisfaction (Complain), we will ensure that any outstanding issues are resolved promptly and offer additional support to enhance their experience.
4. **Improve Retention through Engagement:** Considering the low SatisfactionScore, we will engage the customer with personalized recommendations and invite them to participate in surveys or feedback sessions to better understand their needs.

**

Personalized Message:
**

Subject: 🌟 Exclusive Offer Inside! We Miss You at [Company Name] 🛍️

Dear [Customer Name],

