In [1]:
%run config.py

data = pd.read_csv('Data/customer_support_tickets.csv')

In [5]:
# Encode categorical variables
le_gender = LabelEncoder()
data['Customer Gender Encoded'] = le_gender.fit_transform(data['Customer Gender'])

le_product = LabelEncoder()
data['Product Purchased Encoded'] = le_product.fit_transform(data['Product Purchased'])

le_ticket_type = LabelEncoder()
data['Ticket Type Encoded'] = le_ticket_type.fit_transform(data['Ticket Type'])

# Prepare features and target
X = data[['Product Purchased Encoded', 'Customer Age', 'Customer Gender Encoded']]
y = data['Ticket Type Encoded']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a simpler model for demonstration
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

# Define a simplified function for predicting and ranking issue types
def predict_rank_issues_simple(product, age, gender):
    product_encoded = le_product.transform([product])[0]
    gender_encoded = le_gender.transform([gender])[0]
    
    # Predict probabilities
    probabilities = model.predict_proba([[product_encoded, age, gender_encoded]])
    
    # Rank issue types by likelihood
    issue_ranking = sorted(list(zip(le_ticket_type.classes_, probabilities[0])), key=lambda x: x[1], reverse=True)
    
    return issue_ranking

# Example: Predict and rank issues for a simplified combination of inputs
example_product = "LG Smart TV"
example_age = 30
example_gender = "Female"

issue_ranking_example = predict_rank_issues_simple(example_product, example_age, example_gender)

issue_ranking_example


[('Billing inquiry', 0.7040000000000001),
 ('Cancellation request', 0.1625),
 ('Product inquiry', 0.07061904761904762),
 ('Technical issue', 0.03188095238095238),
 ('Refund request', 0.031)]

In [3]:
preds = model.predict(X_test)


print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.20      0.18      0.19       357
           1       0.17      0.18      0.17       327
           2       0.17      0.18      0.18       316
           3       0.20      0.23      0.22       345
           4       0.24      0.22      0.23       349

    accuracy                           0.20      1694
   macro avg       0.20      0.20      0.20      1694
weighted avg       0.20      0.20      0.20      1694



In [9]:
example_age = 45
example_gender = "Male"

output_formatted = []

unique_products = data['Product Purchased'].unique()

for product in unique_products:
    ranking = predict_rank_issues_simple(product, example_age, example_gender)
    formatted_ranking = ', '.join([f"{issue}: {prob*100:.0f}%" for issue, prob in ranking])
    output_formatted.append(f"Product: {product}\nLikelihood of Issues: {formatted_ranking}\n")

output_report = "\n".join(output_formatted)

print(output_report[:3000])  # Displaying a smaller portion


Product: GoPro Hero
Likelihood of Issues: Technical issue: 87%, Billing inquiry: 10%, Refund request: 2%, Product inquiry: 1%, Cancellation request: 0%

Product: LG Smart TV
Likelihood of Issues: Technical issue: 67%, Product inquiry: 14%, Billing inquiry: 12%, Cancellation request: 7%, Refund request: 0%

Product: Dell XPS
Likelihood of Issues: Product inquiry: 33%, Refund request: 21%, Cancellation request: 19%, Billing inquiry: 17%, Technical issue: 10%

Product: Microsoft Office
Likelihood of Issues: Billing inquiry: 46%, Refund request: 41%, Product inquiry: 9%, Technical issue: 3%, Cancellation request: 1%

Product: Autodesk AutoCAD
Likelihood of Issues: Cancellation request: 49%, Refund request: 25%, Billing inquiry: 17%, Technical issue: 9%, Product inquiry: 0%

Product: Microsoft Surface
Likelihood of Issues: Product inquiry: 67%, Refund request: 19%, Billing inquiry: 10%, Technical issue: 2%, Cancellation request: 1%

Product: Philips Hue Lights
Likelihood of Issues: Cancella