# Software requirement binary classification

Software requirements are classified into functional and non-functional requirements using TensorFlow and neural networks

#### importing libraries

In [1]:
import csv
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
# Read the CSV file and drop any columns with missing values
df = pd.read_csv("nfr.csv")
df = df.dropna(axis=1, how='any')

In [3]:
# Create a new column called "Tag" that indicates whether a requirement is functional (1) or non-functional (0)
df["Tag"] = df.apply(lambda x: 1 if x["Type"] == "F" else 0, axis=1)
df.head()

Unnamed: 0,Type,Requirement,Tag
0,PE,The system shall refresh the display every 60 ...,0
1,LF,The application shall match the color of the s...,0
2,US,If projected the data must be readable. On ...,0
3,A,The product shall be available during normal ...,0
4,US,If projected the data must be understandable...,0


In [4]:
# Split the data into training and test sets
X = df["Requirement"]
y = df["Tag"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Vectorize the training and test data using count vectorization
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

In [6]:
# Transform the vectorized data using TF-IDF
transformer = TfidfTransformer()
X_train_tfidf = transformer.fit_transform(X_train_counts)
X_test_tfidf = transformer.transform(X_test_counts)

In [7]:
# Convert the transformed data to numpy arrays
X_train_tfidf_array = X_train_tfidf.toarray()
X_test_tfidf_array = X_test_tfidf.toarray()
y_train_array = y_train.to_numpy()
y_test_array = y_test.to_numpy()

In [8]:
# Build the neural network model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(units=64, activation='relu', input_shape=(X_train_tfidf_array.shape[1],)))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(units=64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [9]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
# Train the model
model.fit(X_train_tfidf_array, y_train_array, epochs=5, batch_size=32);

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
# Evaluate the model on the test data
scores = model.evaluate(X_test_tfidf_array, y_test_array, verbose=2)
print("Test loss:", scores[0])
print("Test accuracy:", scores[1])

4/4 - 0s - loss: 0.3650 - accuracy: 0.8571 - 158ms/epoch - 40ms/step
Test loss: 0.3649613559246063
Test accuracy: 0.8571428656578064
