## Classification of functional and non-functional software requirements using the followig algorithms:
    1. Logistic Regression
    2. Support Vector Machine
    3. Decision Tree
    4. Random Forest

#### Importing modules

In [1]:
import csv
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

#### Reading the csv file and tagging the functional requirements with 1 and non-functional with 0 value

In [10]:
df = pd.read_csv("nfr.csv")
df = df.dropna(axis=1, how='any')
df["Tag"] = df.apply(lambda x: 1 if x["Type"] == "F" else 0, axis=1)
df.tail()

Unnamed: 0,Type,Requirement,Tag
551,PE,The product interface should be fast. The resp...,0
552,F,The system will use the stored e-mail addresse...,1
553,F,The system will notify affected parties for r...,1
554,F,The system will notify affected parties when ...,1
555,F,The system will notify affected parties when ...,1


#### Splitting the data into training and testing sections

In [3]:
X = df["Requirement"]
y = df["Tag"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Creating bag-of-words from the training data set (creating a vector form of the data)

In [4]:
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

#### Providing frequency and weightage to the bag-of-words

In [5]:
transformer = TfidfTransformer()
X_train_tfidf = transformer.fit_transform(X_train_counts)
X_test_tfidf = transformer.transform(X_test_counts)

#### Grouping all models in a dictionary

In [6]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': svm.SVC(kernel="linear"),
    'Decision Tree': tree.DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

#### Function that fits and evaluates given machine learning models and returns the scores

In [7]:
def run_algorithms(models, x_train, x_test, y_train, y_test):
    np.random.seed(42)
    scores = {}
    for name, model in models.items():
        model.fit(x_train, y_train)
        scores[name] = model.score(x_test, y_test)
    return scores

In [8]:
run_algorithms(models, X_train_tfidf, X_test_tfidf, y_train, y_test)

{'Logistic Regression': 0.8571428571428571,
 'Support Vector Machine': 0.9107142857142857,
 'Decision Tree': 0.7767857142857143,
 'Random Forest': 0.7857142857142857}