In [None]:
import pandas as pd

data = pd.read_csv("/content/output.csv")

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.classify import ClassifierI
from statistics import mode
import pickle

class VoteClassifier(ClassifierI):
  def __init__(self, *classifiers):
    self.classifiers = classifiers

  def classify(self, features):
    votes = []
    for c in self.classifiers:
      v = c.classify(features)
      votes.append(v)
    return mode(votes)

  def confidence(self, features):
    votes = []
    for c in self.classifiers:
      v = c.classify(features)
      votes.append(v)

    choice_votes = votes.count(mode(votes))
    conf = choice_votes / len(votes)
    return conf

In [None]:
import nltk
import os
nltk.download('punkt_tab')

documents = []

for _, row in data.iterrows():
    documents.append((word_tokenize(row['Command']), row[' Label']))
print(documents)

os.makedirs("pickled_algos", exist_ok=True)

save_documents = open("pickled_algos/documents.pickle","wb")
pickle.dump(documents, save_documents)
save_documents.close()

[(['Çizgi', 'formasyonunu', 'yap'], 'Cizgi'), (['V', 'çiz'], 'V'), (['Ok', 'başı', 'yapıver'], 'Ok başı'), (['Üçgen', 'olun'], 'Üçgen'), (['Çizgi', 'çiz'], 'Cizgi'), (['V', 'formasyonu', 'oluştur'], 'V'), (['Ok', 'başı', 'şeklini', 'çiziver'], 'Ok başı'), (['Üçgen', 'haline', 'gelin'], 'Üçgen'), (['Çizgi', 'oluştur'], 'Cizgi'), (['V', 'şekli', 'yap'], 'V'), (['Ok', 'başı', 'şekline', 'bürün'], 'Ok başı'), (['Üç', 'köşeli', 'şekil', 'oluşturun'], 'Üçgen'), (['Çizgi', 'yapıver'], 'Cizgi'), (['V', 'şeklini', 'çiziver'], 'V'), (['Ok', 'başı', 'ol', 'acele'], 'Ok başı'), (['Üçgen', 'oluşturcak', 'hizaya', 'gelin'], 'Üçgen'), (['Oynayın', 'voleybol'], 'voleybol'), (['Top', 'oyna'], 'voleybol'), (['Voleybolcu', 'olun'], 'voleybol'), (['Voleybol', 'formasyonunu', 'gerçekleştirmeniz', 'gerekiyor'], 'voleybol'), (['Voleybol', 'oynamanızı', 'istirham', 'ediyorum'], 'voleybol'), (['çizgi', 'formasyonu', 'oluştur'], 'Cizgi'), (['V', 'çiz'], 'V'), (['ters', 'v', 'formasyonu', 'yap'], 'Ok başı'), (['

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [None]:
from nltk.probability import FreqDist

all_words = []
for doc, label in documents:
  for w in doc:
    all_words.append(w.lower())

all_words = nltk.FreqDist(all_words)
print(all_words.most_common(15))
print(all_words["üçgen"])

[('v', 131), ('voleybol', 97), ('üçgen', 83), ('ok', 75), ('çizgi', 64), ('oluştur', 62), ('.', 50), ('başı', 49), ('formasyonunu', 48), ('formasyonu', 47), ('dronelar', 46), ('bir', 43), ('yap', 39), ('çiz', 30), ('ters', 28)]
83


In [None]:
word_features = list(all_words.keys())[:1000]


save_word_features = open("pickled_algos/word_features5k.pickle","wb")
pickle.dump(word_features, save_word_features)
save_word_features.close()
print(word_features)

['çizgi', 'formasyonunu', 'yap', 'v', 'çiz', 'ok', 'başı', 'yapıver', 'üçgen', 'olun', 'formasyonu', 'oluştur', 'şeklini', 'çiziver', 'haline', 'gelin', 'şekli', 'şekline', 'bürün', 'üç', 'köşeli', 'şekil', 'oluşturun', 'ol', 'acele', 'oluşturcak', 'hizaya', 'oynayın', 'voleybol', 'top', 'oyna', 'voleybolcu', 'gerçekleştirmeniz', 'gerekiyor', 'oynamanızı', 'istirham', 'ediyorum', 'ters', 'bir', 'sürü', 'oluştursun', 'yapılsın', 'oluşturulsun', 'çizsin', 'dronelar', 'yaparsa', 'oldukça', 'müteşekkir', 'olurum', 'droneların', 'oluşturmasını', 'emrediyorum', 'yapmaya', 'ne', 'dersin', 'olsun', 'oynasın', 'sürüye', 'oynat', 'hadi', 'oynayalım', "i̇ha'ları", 'kullanarak', '.', 'kenarlı', 'geometrik', 'çizdirmesini', 'sağla', 'sürüyü', 'güzel', 'tüm', 'dronları', 'düz', 'hat', 'elindeki', 'ihalar', 'ile', "i̇ha'larla", 'şey', 'aga', 'dümdüz', 'bi', 'bana', 'gücünü', 've', 'zekanı', 'harfi', 'attığını', 'düşün', 'okun', 'başındaki', 'dronelara', 'üçlü', 'çektirmek', 'maksadıyla', 'şekillendir

In [None]:
def find_features(document):

  document = " ".join(document) if isinstance(document, list) else document
  words = word_tokenize(document)
  features = {}

  for w in word_features:
    features[w] = (w in words)

  return features

In [None]:
featuresets = [(find_features(rev), category) for (rev, category) in documents]

In [None]:
train_size = int(len(featuresets) * 0.75)
training_test = featuresets[:train_size]
testing_set = featuresets[train_size:]

new_data_ = "iç açıları toplamı 180 derece ,3 kenar 3 köşe şekline gel"
new_data = word_tokenize(new_data_)

In [None]:
classifier = nltk.NaiveBayesClassifier.train(training_test)
prediction = classifier.classify(find_features(new_data))
print("Tahmin Sonucu:", prediction)
print("Naive Bayes Algo Accuracy percent: ", (nltk.classify.accuracy(classifier, testing_set))*100)
classifier.show_most_informative_features(15)


save_classifier = open("pickled_algos/originalnaivebayes5k.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()

Tahmin Sonucu: Üçgen
Naive Bayes Algo Accuracy percent:  78.57142857142857
Most Informative Features
                   şekil = True            Üçgen : Cizgi  =      7.2 : 1.0
                 oluştur = True            Üçgen : voleyb =      6.5 : 1.0
                şeklinde = True                V : voleyb =      6.1 : 1.0
                     bir = True            Cizgi : V      =      5.2 : 1.0
                 şeklini = True                V : Cizgi  =      4.5 : 1.0
                    olan = True            Üçgen : voleyb =      4.4 : 1.0
                    gibi = True           voleyb : V      =      4.1 : 1.0
                     hat = True            Cizgi : V      =      3.7 : 1.0
                     geç = True            Cizgi : Üçgen  =      3.6 : 1.0
                     ciz = True            Cizgi : voleyb =      3.5 : 1.0
            formasyonunu = True            Üçgen : Cizgi  =      3.5 : 1.0
                     ile = True                V : voleyb =      3.2 : 1.0

In [None]:
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB

MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_test)
prediction = MNB_classifier.classify(find_features(new_data))
print("Tahmin Sonucu:", prediction)
print("MultinomialNB Algo Accuracy percent: ", (nltk.classify.accuracy(MNB_classifier, testing_set))*100)


save_classifier = open("pickled_algos/MNB_classifier5k.pickle","wb")
pickle.dump(MNB_classifier, save_classifier)
save_classifier.close()

Tahmin Sonucu: Üçgen
MultinomialNB Algo Accuracy percent:  79.28571428571428


In [None]:
BernaulliNB_classifier = SklearnClassifier(BernoulliNB())
BernaulliNB_classifier.train(training_test)
prediction = BernaulliNB_classifier.classify(find_features(new_data))
print("Tahmin Sonucu:", prediction)
print("BernoulliNB Algo Accuracy percent: ", (nltk.classify.accuracy(BernaulliNB_classifier, testing_set))*100)


save_classifier = open("pickled_algos/BernoulliNB_classifier5k.pickle","wb")
pickle.dump(BernaulliNB_classifier, save_classifier)
save_classifier.close()

Tahmin Sonucu: Üçgen
BernoulliNB Algo Accuracy percent:  75.71428571428571


In [None]:
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC


LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_test)
prediction = LinearSVC_classifier.classify(find_features(new_data))
print("Tahmin Sonucu:", prediction)
print("BernoulliNB Algo Accuracy percent: ", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)


save_classifier = open("pickled_algos/LinearSVC_classifier5k.pickle","wb")
pickle.dump(LinearSVC_classifier, save_classifier)
save_classifier.close()

Tahmin Sonucu: Üçgen
BernoulliNB Algo Accuracy percent:  78.57142857142857


In [None]:
LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(training_test)
prediction = LogisticRegression_classifier.classify(find_features(new_data))
print("Tahmin Sonucu:", prediction)
print("LogisticRegression Algo Accuracy percent: ", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100)


save_classifier = open("pickled_algos/LogisticRegression_classifier5k.pickle","wb")
pickle.dump(LogisticRegression_classifier, save_classifier)
save_classifier.close()

Tahmin Sonucu: Üçgen
LogisticRegression Algo Accuracy percent:  78.57142857142857


In [None]:
SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
SGDClassifier_classifier.train(training_test)
prediction = SGDClassifier_classifier.classify(find_features(new_data))
print("Tahmin Sonucu:", prediction)
print("SGDClassifier Algo Accuracy percent: ", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100)


save_classifier = open("pickled_algos/SGDC_classifier5k.pickle","wb")
pickle.dump(SGDClassifier_classifier, save_classifier)
save_classifier.close()

Tahmin Sonucu: Üçgen
SGDClassifier Algo Accuracy percent:  70.0


In [None]:
vote_classifier = VoteClassifier(classifier,
                                  MNB_classifier,
                                  BernaulliNB_classifier,
                                  SGDClassifier_classifier,
                                  LinearSVC_classifier,
                                  LogisticRegression_classifier)

print("vote_classifier accuracy percent:", (nltk.classify.accuracy(vote_classifier, testing_set))*100)

vote_classifier accuracy percent: 80.0


In [None]:
documents_f = open("pickled_algos/documents.pickle", "rb")
documents = pickle.load(documents_f)
documents_f.close()

word_features5k_f = open("pickled_algos/word_features5k.pickle", "rb")
word_features = pickle.load(word_features5k_f)
word_features5k_f.close()

def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)

    return features

print(len(featuresets))

testing_set = featuresets[10000:]
training_set = featuresets[:10000]

open_file = open("pickled_algos/originalnaivebayes5k.pickle", "rb")
classifier = pickle.load(open_file)
open_file.close()

open_file = open("pickled_algos/MNB_classifier5k.pickle", "rb")
MNB_classifier = pickle.load(open_file)
open_file.close()

open_file = open("pickled_algos/BernoulliNB_classifier5k.pickle", "rb")
BernoulliNB_classifier = pickle.load(open_file)
open_file.close()

open_file = open("pickled_algos/LogisticRegression_classifier5k.pickle", "rb")
LogisticRegression_classifier = pickle.load(open_file)
open_file.close()

open_file = open("pickled_algos/LinearSVC_classifier5k.pickle", "rb")
LinearSVC_classifier = pickle.load(open_file)
open_file.close()

open_file = open("pickled_algos/SGDC_classifier5k.pickle", "rb")
SGDC_classifier = pickle.load(open_file)
open_file.close()

voted_classifier = VoteClassifier(
                                  classifier,
                                  LinearSVC_classifier,
                                  MNB_classifier,
                                  BernoulliNB_classifier,
                                  LogisticRegression_classifier)

def sentiment(text):
    feats = find_features(text)
    return voted_classifier.classify(feats),voted_classifier.confidence(feats)


559


In [None]:
print(sentiment("iç açıları toplamı 180 derece ,3 kenar 3 köşe şekline gel"))
print(sentiment("Rica etsem ok başı şeklinde bir duruş sergiler misin?"))


('Üçgen', 1.0)
('Ok başı', 1.0)
