In [40]:
# Step 1: Install Required Libraries
# !pip install pandas nltk scikit-learn

# Step 2: Import Libraries
import pandas as pd
import nltk
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [41]:
# Step 3: Download Opinion Lexicon
nltk.download('opinion_lexicon')
nltk.download('punkt')

[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     /Users/abyte/nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/abyte/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [42]:
# Step 4: Load Your Dataset
df = pd.read_csv('bugs_Calendar.csv')
bugs_df =  df.head(1000)


In [43]:
bugs_df

Unnamed: 0,Bug ID,Product,Component,Assignee,Status,Resolution,Summary,Change Request,Priority,Severity,Type
0,176202,Calendar,Sunbird Only,ajmalvs@netscape.net,RESOLVED,INVALID,Testing the bug entry,---,P1,normal,enhancement
1,1727061,Calendar,Provider: CalDAV,ben.bucksch@beonex.com,RESOLVED,FIXED,(MS) Microsoft Teams meeting description is ma...,---,P1,S1,defect
2,1685007,Calendar,Calendar Frontend,bugzilla2007@duellmann24.net,RESOLVED,FIXED,Double-clicking an event now shows an unwanted...,---,P1,S3,enhancement
3,458828,Calendar,Provider: CalDAV,dbo.moz@boelzle.org,RESOLVED,FIXED,Many errors when setting up zimbra calendar,---,P1,normal,defect
4,462326,Calendar,General,dbo.moz@boelzle.org,RESOLVED,FIXED,Thunderbird with Lightning leaks on Shutdown.,---,P1,normal,defect
...,...,...,...,...,...,...,...,...,...,...,...
995,287953,Calendar,Internal Components,dmosedale@mozilla.com,RESOLVED,FIXED,calICalendarProvider interface,---,--,normal,defect
996,294443,Calendar,Provider: CalDAV,dmosedale@mozilla.com,RESOLVED,INVALID,mail crashed,---,--,normal,defect
997,295775,Calendar,Internal Components,dmosedale@mozilla.com,RESOLVED,FIXED,calICalendar.modifyItem should require caller ...,---,--,normal,defect
998,297109,Calendar,Internal Components,dmosedale@mozilla.com,RESOLVED,DUPLICATE,change calICalendar.modifyItems to take aOldIt...,---,--,normal,defect


In [44]:

bugs_df = bugs_df[bugs_df["Severity"].str.contains("--")==False].reset_index()

#Dropped rows with Type "Enhancement" and "Task" because they are not a bug but a new feature
indexSevere = bugs_df[ (bugs_df['Type'] == 'enhancement') & (bugs_df['Type'] == 'enhancement') ].index
bugs_df.drop(indexSevere , inplace=True)

indexSevere = bugs_df[ (bugs_df['Type'] == 'task') & (bugs_df['Type'] == 'task') ].index
bugs_df.drop(indexSevere , inplace=True)


#Catagorise the severity level into a Severe and Non Severe to make it a binary problem
bugs_df.loc[bugs_df["Severity"] == "blocker", "Severity"] = 'Severe'
bugs_df.loc[bugs_df["Severity"] == "critical", "Severity"] = 'Severe'
bugs_df.loc[bugs_df["Severity"] == "major", "Severity"] = 'Severe'
bugs_df.loc[bugs_df["Severity"] == "S1", "Severity"] = 'Severe'
bugs_df.loc[bugs_df["Severity"] == "S2", "Severity"] = 'Severe'
bugs_df.loc[bugs_df["Severity"] == "S3", "Severity"] = 'NonSevere'
bugs_df.loc[bugs_df["Severity"] == "normal", "Severity"] = 'NonSevere'
bugs_df.loc[bugs_df["Severity"] == "minor", "Severity"] = 'NonSevere'
bugs_df.loc[bugs_df["Severity"] == "trivial", "Severity"] = 'NonSevere'
bugs_df.loc[bugs_df["Severity"] == "S4", "Severity"] = 'NonSevere'

In [45]:
# Step 5: Preprocess the Data
def calculate_sentiment(text):
    tokens = word_tokenize(text.lower())
    pos_words = set(opinion_lexicon.positive())
    neg_words = set(opinion_lexicon.negative())
    
    pos_score = sum(1 for word in tokens if word in pos_words)
    neg_score = sum(1 for word in tokens if word in neg_words)
    
    return pos_score - neg_score

In [46]:
bugs_df['sentiment_score'] = bugs_df['Summary'].apply(calculate_sentiment)


In [47]:
# Step 6: Prepare Data for Classification
X = bugs_df[['sentiment_score']]
y = bugs_df['Severity'].apply(lambda x: 1 if x == 'Severe' else 0)


In [54]:
X

Unnamed: 0,sentiment_score
1,-1
3,-1
4,-1
5,0
6,0
...,...
958,0
959,-1
960,0
961,0


In [53]:
y

1      1
3      0
4      0
5      1
6      1
      ..
958    0
959    0
960    0
961    0
962    0
Name: Severity, Length: 839, dtype: int64

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
# Step 7: Train a Classification Model
model = LogisticRegression()
model.fit(X_train, y_train)


LogisticRegression()

In [50]:
# Step 8: Evaluate the Model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print('Accuracy:', accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89       135
           1       0.00      0.00      0.00        33

    accuracy                           0.80       168
   macro avg       0.40      0.50      0.45       168
weighted avg       0.65      0.80      0.72       168

Accuracy: 0.8035714285714286


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
