In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [None]:
# Function to clean and generalised text
def text_cleaning(text):
    if isinstance(text, str):  # Check if the value is a string
        text = text.strip()  # Remove leading/trailing whitespaces
        text = text.lower()  # Convert to lowercase
        text = re.sub(r'[-]', '', text)  # Remove hyphens
        text = re.sub(r'[^a-z0-9\s]', '', text)  # Remove special characters
        text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
        return text.strip()  # Return cleaned text with leading/trailing spaces removed
    else:
        return np.nan  # Return NaN for non-string values

In [None]:
# Function for text formating
def capitalize_text(text):
    # Split the text into words
    words = text.split(' ')

    # Capitalize the first letter of each word
    capitalized_words = [word.capitalize() for word in words]

    # Join the capitalized words back into a single string
    return ' '.join(capitalized_words)

In [None]:
df = pd.read_csv(r'/content/drive/MyDrive/united2/RAw/data.csv')

In [None]:
# taking relvant feature
df2 = df[['call_id', 'average_sentiment', 'call_hour', 'AHT', 'AST', 'primary_call_reason']].copy()
# pre-processing the text data
df2['primary_call_reason'] = df2['primary_call_reason'].apply(text_cleaning)
df2['primary_call_reason'] = df2['primary_call_reason'].apply(capitalize_text)



In [4]:
# Import necessary libraries
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Encode the target labels (primary call reason)
label_encoder = LabelEncoder()
df2['primary_call_reason_encoded'] = label_encoder.fit_transform(df2['primary_call_reason'])

# Define features (X) and target (y)
X = df2[['call_id', 'average_sentiment', 'call_hour', 'AHT', 'AST']]
y = df2['primary_call_reason_encoded']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Define hyperparameters to tune
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# Perform grid search for hyperparameter tuning
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid,
                           scoring='accuracy', cv=5, verbose=2, n_jobs=-1)

# Fit the model on the training data
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_rf_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

# Convert predicted labels back to their original text form
predicted_labels = label_encoder.inverse_transform(y_pred)

Model Accuracy: 40.00%


In [None]:
# Load the test data
test_df = pd.read_csv(r'/content/drive/MyDrive/united2/RAw/test.csv')

# Extract the call_id column from the test data
test_X = test_df[['call_id']]

# Merge the test data with the original dataframe to get other features
test_data = pd.merge(test_X, df2, on='call_id', how='left')

# Select the features for prediction (excluding 'call_id')
test_X = test_data[['call_id','average_sentiment', 'call_hour', 'AHT', 'AST']]

# Make predictions using the best model
y_pred_test = best_rf_model.predict(test_X)

# Convert predicted labels back to their original text form
predicted_labels_test = label_encoder.inverse_transform(y_pred_test)

# Add the predicted labels to the test dataframe
test_df['predicted_primary_call_reason'] = predicted_labels_test

# Save the results to a new CSV file
test_df.to_csv('/content/drive/MyDrive/united2/RAw/test_orfolks.csv', index=False)


## Signing Off
Team OR Focks