In [97]:
import pandas as pd
import numpy as np
import re
import os
from nltk.corpus import stopwords
import pickle
from nltk.tokenize import word_tokenize
import nltk
import random
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from nltk.classify import ClassifierI
from statistics import mode

In [10]:
train_file = open("./train.ft.txt", "r", encoding="utf-8")
test_file = open("./test.ft.txt", "r", encoding="utf-8")

In [89]:
training_set = [train_file.readline() for _ in range(8000)]
stop_words = stopwords.words("english")

#  j is adjective, r is adverb, and v is verb
#allowed_word_types = ["J","R","V"]
allowed_word_types = ["J"]
documents = []
all_words = []

for i,line in enumerate(training_set):
    doc = re.split(r"__label__(\d+) ", str(line))[1:]
    sentiment = "nag" if doc[0] == "1" else "pos"
    review = doc[1]
    documents.append([review, sentiment])
    words = [w for w in word_tokenize(review) if w not in stop_words]
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())


In [None]:
all_words = nltk.FreqDist(all_words)

word_features = list(all_words.keys())[:5000] # top occuring words

def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)

    return features

# creates a list of dictionary of all_words with keys and value as 
# (if particular adjective found in that particluar document)
featuresets = [(find_features(rev), category) for (rev, category) in documents]

random.shuffle(featuresets)
testing_set = featuresets[1600:]
training_set = featuresets[:400]

In [102]:
classifier = nltk.NaiveBayesClassifier.train(training_set)
print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set))*100)
classifier.show_most_informative_features(15)

save_classifier = open("pickled_algos/originalnaivebayes5k.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()

MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set))*100)

save_classifier = open("pickled_algos/MNB_classifier5k.pickle","wb")
pickle.dump(MNB_classifier, save_classifier)
save_classifier.close()

BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(training_set)
print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set))*100)

save_classifier = open("pickled_algos/BernoulliNB_classifier5k.pickle","wb")
pickle.dump(BernoulliNB_classifier, save_classifier)
save_classifier.close()

Original Naive Bayes Algo accuracy percent: 70.75
Most Informative Features
               wonderful = True              pos : nag    =      9.6 : 1.0
                    easy = True              pos : nag    =      8.8 : 1.0
                    poor = True              nag : pos    =      7.9 : 1.0
                   video = True              pos : nag    =      7.4 : 1.0
                   worst = True              nag : pos    =      6.6 : 1.0
            disappointed = True              nag : pos    =      5.8 : 1.0
                   wrong = True              nag : pos    =      5.3 : 1.0
                 plastic = True              nag : pos    =      5.3 : 1.0
                      us = True              pos : nag    =      5.3 : 1.0
               excellent = True              pos : nag    =      5.3 : 1.0
                  listen = True              pos : nag    =      5.3 : 1.0
                  return = True              nag : pos    =      4.7 : 1.0
                    best

In [118]:
class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers

    def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)

    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf
    
def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)
    return features


def find_sentiment(document):
    voted_classifier = VoteClassifier(classifier,
                                      MNB_classifier,
                                      BernoulliNB_classifier)
    features = find_features(document)
    return (
        voted_classifier.classify(features),
        voted_classifier.confidence(features)
    )


failed = 0
totalReviews = 1000

test_set = [test_file.readline() for _ in range(totalReviews)]
for i,line in enumerate(test_set):
    doc = re.split(r"__label__(\d+) ", str(line))[1:]
    review_sent_actual = "nag" if doc[0] == "1" else "pos"
    review = doc[1]
    if not review_sent_actual == find_sentiment(review)[0]:
        failed += 1
print("Total failed: ", failed)
print("Total passed: ", totalReviews-failed)
print("accuracy = ", (1-failed/totalReviews)*100, "%")

Total failed:  290
Total passed:  710
accuracy =  71.0


In [1]:
t = int(input())
while(t):
    t = t-1
    n = int(input())
    if(n == 1):
        s = input()
        print("0")
        continue

    vector<int> arr(n, 1);
    mp_list = [1, 2, 3, 5, 6, 7, 10, 11, 14
         15, 21, 22, 30, 33, 35, 42, 55, 66, 70,
         77, 105, 110, 154, 165, 210, 231, 330, 
         385, 462, 770, 1155, 2310]
    mp = {}
    for i in mp_list:
        mp[i] = 0

    for(int k = 0; k < n; k++){
        string str;
        cin >> str;

        bool a,e,i,o,u;
        a = e = i = o = u = false;

        for(int j = 0; j < str.length(); j++){
            if(a and e and i and o and u)
                break;

            if(str[j] == 'a')       a = true;
            else if(str[j] == 'e')  e = true;
            else if(str[j] == 'i')  i = true;
            else if(str[j] == 'o')  o = true;
            else if(str[j] == 'u')  u = true;
        }

        if(a)   arr[k] *= 2;
        if(e)   arr[k] *= 3;
        if(i)   arr[k] *= 5;
        if(o)   arr[k] *= 7;
        if(u)   arr[k] *= 11;

        for(auto const& x: mp)
            if(arr[k] % x.first == 0)
                mp[x.first]+=1;

        // for(auto const& x: mp)
        //     cout << x.first << " " << x.second << endl;
        // cout << endl;
    }

    long long count = 0;
    for(int k = 0; k < n; k++){
        if( mp[ 2310/arr[k] ] > n-1)
            count += n-1;
        else
            count += mp[ 2310/arr[k] ];
    }

    cout << count/2 << endl;
}

return 0;
}

1
35 4
34
