In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn import metrics

In [2]:
df = pd.read_csv('./training_data.csv')

In [3]:
df.shape

(98, 2)

In [4]:
df.head()

Unnamed: 0,Text,Action_Key
0,npm list,npm_list
1,list the dependencies,npm_list
2,list packages which are there in app,npm_list
3,what are the dependent packages,npm_list
4,execute list command,npm_list


In [5]:
X = df['Text']
y = df['Action_Key']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [6]:
y_train.value_counts()

npm_list       27
npm_install    23
npm_init       23
Name: Action_Key, dtype: int64

In [7]:
def evaluate_model(clf_model):
    preds = clf_model.predict(X_test)
    
    print("Confusion Matrix:")
    print(metrics.confusion_matrix(y_test, preds))
    
    print("\nClassification Report:")
    print(metrics.classification_report(y_test, preds)) 
    
    print("\nPrediction on texts in X_test:\n")
    for cmd, actual, pred in zip(X_test, y_test, preds):
        error = ''
        if actual != pred:
            error = '<-- Incorrect!'
        print(f"{cmd:{50}} - {pred} {error}")

# KNN Model

In [8]:
from sklearn.neighbors import KNeighborsClassifier

In [19]:
knn_clf = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', KNeighborsClassifier(n_neighbors=3))
])

knn_clf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('tfidf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.float64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,...ki',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform'))])

In [20]:
evaluate_model(knn_clf)

Confusion Matrix:
[[8 0 0]
 [0 8 0]
 [0 0 9]]

Classification Report:
              precision    recall  f1-score   support

    npm_init       1.00      1.00      1.00         8
 npm_install       1.00      1.00      1.00         8
    npm_list       1.00      1.00      1.00         9

   micro avg       1.00      1.00      1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25


Prediction on texts in X_test:

treat this folder as npm project                   - npm_init 
help me setup the package.json                     - npm_init 
resolve all dependencies for this project          - npm_install 
show installed packages                            - npm_list 
add a package                                      - npm_install 
download all packages                              - npm_install 
configure npm in this directory                    - npm_init 
initialise this with npm                           - npm_init 
I w

# Naive Bayes Model

In [21]:
from sklearn.naive_bayes import MultinomialNB

nb_clf = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', MultinomialNB())
])

nb_clf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('tfidf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.float64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,...rue,
        vocabulary=None)), ('clf', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])

In [22]:
evaluate_model(nb_clf)

Confusion Matrix:
[[8 0 0]
 [1 7 0]
 [0 1 8]]

Classification Report:
              precision    recall  f1-score   support

    npm_init       0.89      1.00      0.94         8
 npm_install       0.88      0.88      0.88         8
    npm_list       1.00      0.89      0.94         9

   micro avg       0.92      0.92      0.92        25
   macro avg       0.92      0.92      0.92        25
weighted avg       0.92      0.92      0.92        25


Prediction on texts in X_test:

treat this folder as npm project                   - npm_init 
help me setup the package.json                     - npm_init 
resolve all dependencies for this project          - npm_install 
show installed packages                            - npm_list 
add a package                                      - npm_init <-- Incorrect!
download all packages                              - npm_install 
configure npm in this directory                    - npm_init 
initialise this with npm                           - np

# Linear SVC Model

In [13]:
from sklearn.svm import LinearSVC

lsvc_clf = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LinearSVC())
])

lsvc_clf.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('tfidf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.float64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,...ax_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0))])

In [14]:
evaluate_model(lsvc_clf)

Confusion Matrix:
[[8 0 0]
 [0 8 0]
 [0 0 9]]

Classification Report:
              precision    recall  f1-score   support

    npm_init       1.00      1.00      1.00         8
 npm_install       1.00      1.00      1.00         8
    npm_list       1.00      1.00      1.00         9

   micro avg       1.00      1.00      1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25


Prediction on texts in X_test:

treat this folder as npm project                   - npm_init 
help me setup the package.json                     - npm_init 
resolve all dependencies for this project          - npm_install 
show installed packages                            - npm_list 
add a package                                      - npm_install 
download all packages                              - npm_install 
configure npm in this directory                    - npm_init 
initialise this with npm                           - npm_init 
I w

# Playground for commands

In [18]:
# Run this cell and enter command text to predict
lsvc_clf.predict([input()])

provide info on dependent packages


array(['npm_list'], dtype=object)