In [None]:
import pandas as pd
import re
import nltk
import gradio as gr 
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import joblib 


In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
#The punkt tokenizer is a model NLTK needs to split text into words (i.e., tokenization). Without punkt, word_tokenize() will raise an error.
#The lemmatizer uses WordNet as a dictionary to reduce words to their base form (e.g., "running" → "run").


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Jatin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Jatin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Jatin\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [4]:
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words]
    return ' '.join(tokens)


In [7]:
df = pd.read_csv("csv_data/ticket.csv")
df.dropna(subset=['ticket_text', 'issue_type', 'urgency_level'], inplace=True)
df['cleaned_text'] = df['ticket_text'].apply(preprocess)


In [8]:
issue_enc = LabelEncoder()
urgency_enc = LabelEncoder()
df['issue_encoded'] = issue_enc.fit_transform(df['issue_type'])
df['urgency_encoded'] = urgency_enc.fit_transform(df['urgency_level'])


In [9]:
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['cleaned_text'])


In [10]:
X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(
    X, df['issue_encoded'], test_size=0.2, stratify=df['issue_encoded'], random_state=42)


In [12]:
X_train_u, X_test_u, y_train_u, y_test_u = train_test_split(
    X, df['urgency_encoded'], test_size=0.2, stratify=df['urgency_encoded'], random_state=42)


In [13]:
issue_model = LogisticRegression(max_iter=1000)
urgency_model = LogisticRegression(max_iter=1000)
issue_model.fit(X_train_i, y_train_i)
urgency_model.fit(X_train_u, y_train_u)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [14]:
joblib.dump(issue_model, "models/issue_model.pkl")
joblib.dump(urgency_model, "models/urgency_model.pkl")



['models/urgency_model.pkl']

In [15]:
PRODUCT_LIST = ['SmartWatch V2', 'UltraClean Vacuum', 'SoundWave 300', 'EcoBreeze AC',
                'PhotoSnap Cam', 'Vision LED TV', 'RoboChef Blender', 'FitRun Treadmill',
                'PowerMax Battery']
KEYWORDS = ['broken', 'late', 'error', 'malfunction', 'lost', 'issue', 'not working', 'no response']


In [16]:
def extract_entities(text):
    entities = {
        'products': [p for p in PRODUCT_LIST if p.lower() in text.lower()],
        'complaints': [k for k in KEYWORDS if k in text.lower()]
    }
    return entities


In [17]:
def predict(text):
    cleaned = preprocess(text)
    vector = vectorizer.transform([cleaned])
    issue_pred = issue_model.predict(vector)[0]
    urgency_pred = urgency_model.predict(vector)[0]
    issue = issue_enc.inverse_transform([issue_pred])[0]
    urgency = urgency_enc.inverse_transform([urgency_pred])[0]
    entities = extract_entities(text)
    return issue, urgency, entities


In [19]:
import gradio as gr
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(lines=5, label="Enter Ticket Text"),
    outputs=[
        gr.Text(label="Issue Type"),
        gr.Text(label="Urgency Level"),
        gr.JSON(label="Entities")
    ],
    title="Simple Ticket Classifier",
    description="Predict issue type and urgency from support ticket and extract key entities."
)

if __name__ == "__main__":
    iface.launch()


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.
