In [261]:
# Import packages
import pandas as pd
import seaborn as sns
import plotly.express as px
%matplotlib inline
import matplotlib.pyplot as plt

from dash import Dash, html, dash_table, dcc
from jupyter_dash import JupyterDash
from dash.dependencies import Input, Output

sns.set_theme(style="darkgrid")

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_info_columns', 200)
pd.set_option('display.max_colwidth', None)

# Suppress SettingWithCopyWarning
pd.options.mode.chained_assignment = None

# Initialize the app
app = Dash()

## Read in CSV File

In [262]:
file_path = '/Users/alihushyar/Documents/Development/Python/NaaS/incidents_2024-08-31_L90.csv'
df_inc = pd.read_csv(file_path, low_memory=False, index_col=0)

# Get number of rows and columns in dataset
df_inc.shape

(76251, 94)

In [263]:
#df_inc.info()

## Clean data

In [264]:
# Drop any column that is completely empty
df_inc.dropna(axis=1, how='all', inplace=True)
df_inc.shape

(76251, 84)

In [265]:
cols = ['number',  'priority', 'state',  'incCreatedAt', 'case_number__pk', 'case_state', 'resolved_at', 'alert_id', 'alert_last_occurred_at', 'ai_category', 'ai_sub_category', 'issue_type', 'sub_issue_type', 'shortDescription', 'ticketClassification', 'device_name', 'configurationItem', 'assetSerialNum', 'account_id', 'account_name', 'sub_site_id', 'sub_site_name', 'incCreatedBy', 'assignmentGroup', 'resolved_by']
cols1 = ['number',  'priority', 'state',  'alert_last_occurred_at', 'incCreatedAt', 'resolved_at', 'alert_id', 'case_number__pk', 'case_state', 'ai_category', 'ai_sub_category', 'issue_type', 'sub_issue_type', 'device_name', 'account_id', 'incCreatedBy']
df_incf = df_inc[cols1]
#df_incf.head()

In [266]:
# Rename priority column values
df_incf.loc[df_incf['priority'] == '1 - Critical', 'priority'] = 'P1'
df_incf.loc[df_incf['priority'] == '2 - High', 'priority'] = 'P2'
df_incf.loc[df_incf['priority'] == '3 - Moderate', 'priority'] = 'P3'
df_incf.loc[df_incf['priority'] == '4 - Low', 'priority'] = 'P4'

In [267]:
# Replace values
df_incf['ai_category'] = df_incf['ai_category'].fillna('NOTASSIGNED')
df_incf['ai_sub_category'] = df_incf['ai_sub_category'].fillna('NOTASSIGNED')

In [268]:
# Filter out New and In Progress tickets from state column
ticket_state = ['New', 'In Progress']
df_incf = df_incf.query('state not in @ticket_state').reset_index(drop=True)
print(f'# Records after filter on \'ticket state\' is {df_incf.shape[0]}')

# Records after filter on 'ticket state' is 75612


In [269]:
df_incf = df_incf[df_incf['incCreatedBy'] == 'serviceinsights']


In [270]:
df_incf.info()

<class 'pandas.core.frame.DataFrame'>
Index: 75442 entries, 0 to 75611
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   number                  75442 non-null  object
 1   priority                75442 non-null  object
 2   state                   75442 non-null  object
 3   alert_last_occurred_at  46783 non-null  object
 4   incCreatedAt            75442 non-null  object
 5   resolved_at             44795 non-null  object
 6   alert_id                67806 non-null  object
 7   case_number__pk         21089 non-null  object
 8   case_state              21089 non-null  object
 9   ai_category             75442 non-null  object
 10  ai_sub_category         75442 non-null  object
 11  issue_type              52991 non-null  object
 12  sub_issue_type          35190 non-null  object
 13  device_name             47198 non-null  object
 14  account_id              75442 non-null  object
 15  incCrea

In [271]:
#df_incf.head()

In [287]:
#df_incf['ai_category'].value_counts().reset_index()

In [273]:
#df_incf['issue_type'].value_counts().reset_index()

In [274]:
#df_incf['device_name'].value_counts().reset_index()

In [275]:
#df_incf['account_id'].value_counts().reset_index()

In [276]:
#df_incf['incCreatedBy'].value_counts().reset_index()

In [277]:
df_incf.drop(columns=['incCreatedBy'], inplace=True)

In [278]:
#df_incf.head()

In [279]:
df_incf['state'].value_counts().reset_index()

Unnamed: 0,state,count
0,Resolved,43924
1,Cancelled,21261
2,On Hold,9386
3,Closed,871


In [280]:
cols2 = ['number',  'priority', 'ai_category', 'issue_type', 'device_name', 'account_id', 'state']
df_class = df_inc[cols2]
#df_class.head()

In [281]:
df_class.set_index('number', inplace=True)
#df_class.head()

In [282]:
df_class.info()

<class 'pandas.core.frame.DataFrame'>
Index: 76251 entries, INC1024863 to INC0032354
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   priority     76251 non-null  object
 1   ai_category  72816 non-null  object
 2   issue_type   53295 non-null  object
 3   device_name  47725 non-null  object
 4   account_id   76251 non-null  object
 5   state        76251 non-null  object
dtypes: object(6)
memory usage: 4.1+ MB


In [283]:
df_class['device_name'].fillna('unknown', inplace=True)
df_class['issue_type'].fillna('Other', inplace=True)
df_class['ai_category'].fillna('NOTASSIGNED', inplace=True)

df_class.info()

<class 'pandas.core.frame.DataFrame'>
Index: 76251 entries, INC1024863 to INC0032354
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   priority     76251 non-null  object
 1   ai_category  76251 non-null  object
 2   issue_type   76251 non-null  object
 3   device_name  76251 non-null  object
 4   account_id   76251 non-null  object
 5   state        76251 non-null  object
dtypes: object(6)
memory usage: 4.1+ MB



A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A

In [290]:
df_class['account_id'] = pd.factorize(df_class['account_id'])[0]
df_class.reset_index(drop=True, inplace=True)


In [291]:
df_class.head()

Unnamed: 0,priority,ai_category,issue_type,device_name,account_id,state
0,2 - High,Security,Other,switch,0,New
1,2 - High,Security,Other,switch,1,New
2,1 - Critical,Switch,Wired Connectivity,switch,1,On Hold
3,3 - Moderate,IAP,Other,iap,2,New
4,2 - High,Security,Management,switch,1,On Hold


In [292]:
df_class['state'].value_counts().reset_index()

Unnamed: 0,state,count
0,Resolved,44061
1,Cancelled,21272
2,On Hold,9400
3,Closed,879
4,New,382
5,In Progress,257


Baseline multi-class logistic regression on state

In [293]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Assume df_class is your DataFrame
df = df_class.copy()

# One-hot encode categorical features (excluding target column 'state')
categorical_features = ['priority', 'ai_category', 'issue_type', 'device_name', 'account_id']
df_encoded = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# Encode the target variable
label_encoder = LabelEncoder()
df_encoded['state'] = label_encoder.fit_transform(df['state'])

# Split into features (X) and target (y)
X = df_encoded.drop(columns=['state'])
y = df_encoded['state']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=500)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")






Accuracy: 0.9025
