In [27]:
%run config.py

data = pd.read_csv('Data/customer_support_tickets.csv')

In [42]:
# Encode categorical variables
le_gender = LabelEncoder()
data['Customer Gender Encoded'] = le_gender.fit_transform(data['Customer Gender'])

le_product = LabelEncoder()
data['Product Purchased Encoded'] = le_product.fit_transform(data['Product Purchased'])

le_ticket_type = LabelEncoder()
data['Ticket Type Encoded'] = le_ticket_type.fit_transform(data['Ticket Type'])

# Prepare features and target
X = data[['Product Purchased Encoded', 'Customer Age', 'Customer Gender Encoded']]
y = data['Ticket Type Encoded']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a simpler model for demonstration
model = RandomForestClassifier(random_state=42, n_estimators=100)  # Reduced n_estimators for simplicity
model.fit(X_train, y_train)

# Define a simplified function for predicting and ranking issue types
def predict_rank_issues_simple(product, age, gender):
    product_encoded = le_product.transform([product])[0]
    gender_encoded = le_gender.transform([gender])[0]
    
    # Predict probabilities
    probabilities = model.predict_proba([[product_encoded, age, gender_encoded]])
    
    # Rank issue types by likelihood
    issue_ranking = sorted(list(zip(le_ticket_type.classes_, probabilities[0])), key=lambda x: x[1], reverse=True)
    
    return issue_ranking

# Example: Predict and rank issues for a simplified combination of inputs
example_product = "LG Smart TV"
example_age = 3
example_gender = "Female"
issue_ranking_example = predict_rank_issues_simple(example_product, example_age, example_gender)

issue_ranking_example


[('Technical issue', 0.49233333333333335),
 ('Product inquiry', 0.4626666666666667),
 ('Billing inquiry', 0.035),
 ('Cancellation request', 0.01),
 ('Refund request', 0.0)]

In [43]:
preds = model.predict(X_test)


print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.20      0.18      0.19       357
           1       0.17      0.18      0.17       327
           2       0.17      0.18      0.18       316
           3       0.20      0.23      0.22       345
           4       0.24      0.22      0.23       349

    accuracy                           0.20      1694
   macro avg       0.20      0.20      0.20      1694
weighted avg       0.20      0.20      0.20      1694



In [39]:
output_formatted = []

unique_products = data['Product Purchased'].unique()

for product in unique_products:
    ranking = predict_rank_issues_simple(product, example_age, example_gender)
    formatted_ranking = ', '.join([f"{issue}: {prob*100:.0f}%" for issue, prob in ranking])
    output_formatted.append(f"Product: {product}\nLikelihood of Issues: {formatted_ranking}\n")

# Joining all formatted outputs with a separator for readability
output_report = "\n".join(output_formatted)

print(output_report[:3000])  # Displaying a portion for brevity


Product: GoPro Hero
Likelihood of Issues: Billing inquiry: 46%, Product inquiry: 28%, Refund request: 25%, Technical issue: 2%, Cancellation request: 0%

Product: LG Smart TV
Likelihood of Issues: Technical issue: 49%, Product inquiry: 46%, Billing inquiry: 4%, Cancellation request: 1%, Refund request: 0%

Product: Dell XPS
Likelihood of Issues: Billing inquiry: 88%, Refund request: 6%, Technical issue: 3%, Product inquiry: 3%, Cancellation request: 0%

Product: Microsoft Office
Likelihood of Issues: Product inquiry: 45%, Technical issue: 45%, Cancellation request: 5%, Billing inquiry: 3%, Refund request: 2%

Product: Autodesk AutoCAD
Likelihood of Issues: Technical issue: 48%, Cancellation request: 26%, Billing inquiry: 25%, Product inquiry: 0%, Refund request: 0%

Product: Microsoft Surface
Likelihood of Issues: Product inquiry: 57%, Technical issue: 19%, Cancellation request: 17%, Refund request: 4%, Billing inquiry: 3%

Product: Philips Hue Lights
Likelihood of Issues: Billing inqu