In [4]:
import json
import random
import inflect
import pandas as pd
from gensim.summarization.textcleaner import split_sentences
from gensim.summarization.summarizer import summarize

### Template generation using top-3 aspects, rated by Multipartite ranking 

In [29]:
def generate_template_top_3(comparing_pair, mode='default'):
    """
    Generates template using top-3 aspects for each object
    ---
    
    Parameters
    ---
    
    comparing_pair - dictionary with 'winner', 'loser', 'winner_aspects', "loser_aspects" entities.
    
    comparing_pair['winner'] and comparing_pair['loser'] - are names of comparing objects in string format.
    
    comparing_pair['winner_aspects'] and comparing_pair['loser_aspects'] - lists with names of aspects.
    
    mode - mode of template_generation with 'default' and 'extended' values.
    
    
     
    
    Returns
    ---
    
    response - generated template for comparing pair in string format.
    """
    if mode == "extended" and len(comparing_pair['winner_aspects']) > 0 and len(comparing_pair['loser_aspects']) > 0:
        extended_templates = []
        first_comparing_sentence_parts = [
            "I would prefer to use {} because it is: {}",
            "Looks like {} is better, because: {}",
            "It's simple! {} is better, because: {}",
            "After much thought, I realized that  {} is better, because: {}",
            "I came to the conclusion that {} is better, because: {}",
            "Taking into account the benefits of {} ({}), it is better",
            "{} seems to prevail due to its pros: {}",
            "My favourite is {}. Its advantages ({}) speak for itself",
            "{} is superior as it is: {}",
            "I am prone to choose {}, because: {}",
            "If I were you, I'd choose {} for it is: {}",
            "{} tends to be better as it has such pluses as: {}",
            "I advise choosing {}, for it is superior in the following aspects: {}",
            "It's a hard decision but I choose {}. Its benefits seem to be more significant. It is: {}"
        ]
        second_comparing_sentence_parts = [
            ", but {} is: {}",
            ". But you should know that {} is: {}.",
            ". But it will be useful for you to know that {} is: {}.",
            ". But i should tell you that {} is: {}.",
            ". It is also important to notice that {} has such benefits as {}.",
            ". However, {} has pluses too: {}.",
            ", but you should also take into account the advantages of {}: {}.",
            ". {} has strong sides too: {}.",
            ", though the arguments for {} ({}) are strong too.",
            ". On the other side, {} is {}.",
            ". Yet, {} should also be noticed for it is: {}.",
            ". However, {} is not be ignored, because it is: {}.",
            ". I would like to pay attention to the fact that {} is : {}.",
        ]
        for i in range(len(first_comparing_sentence_parts)):
            for j in range(len(second_comparing_sentence_parts)):
                extended_templates.append(first_comparing_sentence_parts[i] + second_comparing_sentence_parts[j])

        template_index = random.randint(0, len(extended_templates) - 1)
        ordinal = bool(random.getrandbits(1))
#         if ordinal and (len(comparing_pair['winner_aspects']) < 4 or len(comparing_pair['loser_aspects']) < 4):
#             winner_aspects_string = ""
#             loser_aspects_string = ""
#             p = inflect.engine()
#             if len(comparing_pair['winner_aspects']) > 1:
#                 for i in range(len(comparing_pair['winner_aspects']) - 1):
#                     winner_aspects_string += str(p.number_to_words(p.ordinal(i + 1))) + ", " + \
#                                              comparing_pair['winner_aspects'][i] + ", "
#                 winner_aspects_string += str(p.number_to_words(p.ordinal(len(comparing_pair['winner_aspects'])))) + ", " + \
#                                          comparing_pair['winner_aspects'][-1]
#             else:
#                 winner_aspects_string = comparing_pair['winner_aspects'][0]
#             if len(comparing_pair['loser_aspects']) > 1:
#                 for i in range(len(comparing_pair['loser_aspects']) - 1):
#                     loser_aspects_string += str(p.number_to_words(p.ordinal(i + 1))) + ", " + \
#                                             comparing_pair['loser_aspects'][i] + ", "
#                 loser_aspects_string += str(p.number_to_words(p.ordinal(len(comparing_pair['loser_aspects'])))) + ", " + \
#                                         comparing_pair['loser_aspects'][-1]
#             else:
#                 loser_aspects_string = comparing_pair['loser_aspects'][0]
#             response = extended_templates[template_index].format(comparing_pair['winner'], winner_aspects_string,
#                                                                  comparing_pair['loser'], loser_aspects_string)
#         else:
        response = extended_templates[template_index].format(comparing_pair['winner'],
                                                                 ", ".join(comparing_pair['winner_aspects']),
                                                                 comparing_pair['loser'],
                                                                 ", ".join(comparing_pair['loser_aspects']))
    else:
        mode = "default"
    if mode == 'default':
        if len(comparing_pair['winner_aspects']) > 0:
            response = "It seems like {} is better than {} because it is: {}.".format(comparing_pair['winner'],
                                                                                      comparing_pair['loser'],
                                                                                      ", ".join(comparing_pair[
                                                                                                    'winner_aspects']))
        elif len(comparing_pair['loser_aspects']) > 0:
            response = "Looks like {} is better than {}, but {} is: {}.".format(comparing_pair['winner'],
                                                                                comparing_pair['loser'],
                                                                                comparing_pair['loser'], ", ".join(
                    comparing_pair['loser_aspects']))
        else:
            response = "I would prefer {} than {}.".format(comparing_pair['winner'], comparing_pair['loser'])
    return response

In [27]:
data = []
answer_list = []
obj_a_list = []
obj_b_list = []
with open('mined_bow_str.json') as f:
    for line in f:
        data.append(json.loads(line))
answer_list = []
for line in data:
    loser_sentences = pd.DataFrame(columns=['conf', 'sentence'])
    winner_sentences = pd.DataFrame(columns=['conf', 'sentence'])
    comparing_pair = {}
    comparing_pair['winner'] = line['winner']
    if line['object1']['name'] == line['winner']:
        list_of_winner_sentences = line['object1']['sentences']
        list_of_loser_sentences = line['object2']['sentences']
        winner_tag = "Object1"
        loser_tag = 'Object2'
        comparing_pair['loser'] = line['object2']['name']
    else:
        list_of_winner_sentences = line['object2']['sentences']
        list_of_loser_sentences = line['object1']['sentences']
        winner_tag = "Object2"
        loser_tag = "Object1"
        comparing_pair['loser'] = line['object1']['name']

    comparing_pair['winner_aspects'] = line['extractedAspects' + winner_tag]
    comparing_pair['loser_aspects'] = line['extractedAspects' + loser_tag]
    l = len(list_of_loser_sentences)
    for i in range(len(list_of_loser_sentences)-1):
        if list_of_loser_sentences[i]['confidence']>0.5:
            loser_sentences = loser_sentences.append({'conf' : list_of_loser_sentences[i]['confidence'], 'sentence' : list_of_loser_sentences[i]['text']}, ignore_index=True)
        else:
            winner_sentences = winner_sentences.append({'conf' : 1 - list_of_loser_sentences[i]['confidence'], 'sentence' : list_of_loser_sentences[i]['text']}, ignore_index=True)
    for i in range(len(list_of_winner_sentences)-1):
        if list_of_winner_sentences[i]['confidence']>0.5:
            winner_sentences = winner_sentences.append({'conf' : list_of_winner_sentences[i]['confidence'], 'sentence' : list_of_winner_sentences[i]['text']}, ignore_index=True)
        else:
            loser_sentences = loser_sentences.append({'conf' : 1 - list_of_winner_sentences[i]['confidence'], 'sentence' : list_of_winner_sentences[i]['text']}, ignore_index=True)
    loser_sentences = loser_sentences.sort_values(by=['conf'], ascending=False)
    winner_sentences = winner_sentences.sort_values(by=['conf'], ascending=False)
    comparing_pair['winner_aspects'] = []
    comparing_pair['loser_aspects'] = []
    comparing_pair['aspects'] = line['extractedAspects' + winner_tag]
    comparing_pair['aspects'].extend(line['extractedAspects' + loser_tag])
    used_aspects = []
    for i in range(0,winner_sentences.shape[0]):
        for aspect in comparing_pair['aspects']:
            if aspect in winner_sentences.iloc[i]['sentence'] and aspect not in used_aspects:
                if winner_sentences.iloc[i]['sentence'] not in comparing_pair['winner_aspects']:
                    used_aspects.append(aspect)
                    comparing_pair['winner_aspects'].append(aspect)
            if len(comparing_pair['winner_aspects'])==3:
                break
        if len(comparing_pair['winner_aspects'])>=3:
            break
    for i in range(0,loser_sentences.shape[0]):
        for aspect in comparing_pair['aspects']:
            if aspect in loser_sentences.iloc[i]['sentence'] and aspect not in used_aspects:
                used_aspects.append(aspect)
                comparing_pair['loser_aspects'].append(aspect)
            if len(comparing_pair['loser_aspects'])==3:
                break
        if len(comparing_pair['loser_aspects'])>=3:
            break
        # comparing_pair['winner_aspects'].append(winner_sentences.iloc[i]['sentence'])
        # comparing_pair['loser_aspects'].append(loser_sentences.iloc[i]['sentence'])
#     comparing_pair['winner_aspects'] = comparing_pair['winner_aspects'][0:3]
#     comparing_pair['loser_aspects'] = comparing_pair['loser_aspects'][0:3]
    answer = generate_template_top_3(comparing_pair, mode='extended')
    answer_list.append(answer)
    obj_a_list.append(line['object1']['name'])
    obj_b_list.append(line['object2']['name'])
#     print(answer)
#     print()

In [28]:
answer_list

["It's simple! windows 8 is better, because: faster, windows, easier, though the arguments for windows 7 (easier to use, better for me, lighter) are strong too.",
 'After much thought, I realized that  perl is better, because: systems, unix, cleaner. But you should know that awk is: alternatives, time, easier to grok.',
 'It seems like javascript is better than php because it is: timer, function, point.',
 'I advise choosing perl, for it is superior in the following aspects: wiser, older, easier. But it will be useful for you to know that php is: faster, easier to develop in, libraries.',
 'I would prefer to use python because it is: easier, easier to read, simpler. Yet, java should also be noticed for it is: faster, longer, higher.',
 'ruby tends to be better as it has such pluses as: easier, faster, mor, but you should also take into account the advantages of php: memory, consuming, etc.',
 'postgresql is superior as it is: faster, freer, touchier, though the arguments for mysql (lar

### Template generation using comparative sentences

In [47]:
def generate_template_ucs(comparing_pair, mode='default'):
    """
    Generates template using comparative sentences which contains top aspects.
    ---
    
    Parameters
    ---
    
    comparing_pair - dictionary with 'winner', 'loser', 'winner_aspects', "loser_aspects" entities.
    
    comparing_pair['winner'] and comparing_pair['loser'] - are names of comparing objects in string format.
    
    comparing_pair['winner_aspects'] and comparing_pair['loser_aspects'] - lists with sentences which contain top aspects.
    
    mode - mode of template_generation with 'default' and 'extended' values.
    
    
     
    
    Returns
    ---
    
    response - generated template for comparing pair in string format.
    """
    if mode == "extended" and len(comparing_pair['winner_aspects']) > 0 and len(comparing_pair['loser_aspects']) > 0:
        extended_templates = []
        first_comparing_sentence_parts = [
            "I would prefer to use {}. {}",
            "Looks like {} is better. {}",
            "It's simple! {} is better. {}",
            "After much thought, I realized that  {} is better. {}",
            "I came to the conclusion that {} is better. {}",
            "My favourite is {}. {} Its advantages speak for itself",
            "I am prone to choose {}. {}",
            "If I were you, I'd choose {}. {}",
            "{} tends to be better as it has such pluses. {}",
            "I advise choosing {}. {}",
            "It's a hard decision but I choose {}. Its benefits seem to be more significant. {}"
        ]
        second_comparing_sentence_parts = [
            " But {} is {}",
            " But you should know that {} is {}",
            " But it will be useful for you to know that {} is {}",
            " But i should tell you that {} is {}",
            " It is also important to notice that {} has such benefits as {}",
            " However, {} has pluses too. {}",
            " But you should also take into account the advantages of {}. {}",
            " {} has strong sides too. {}",
            " On the other side, {} is {}",
            " However, {} is not be ignored, because it is {}",
            " I would like to pay attention to the fact that {} is {}",
        ]
        for i in range(len(first_comparing_sentence_parts)):
            for j in range(len(second_comparing_sentence_parts)):
                extended_templates.append(first_comparing_sentence_parts[i] + second_comparing_sentence_parts[j])

        template_index = random.randint(0, len(extended_templates) - 1)
        ordinal = bool(random.getrandbits(1))
        response = extended_templates[template_index].format(comparing_pair['winner'],
                                                                 "".join(comparing_pair['winner_aspects']),
                                                                 comparing_pair['loser'],
                                                                 "".join(comparing_pair['loser_aspects']))
    else:
        mode = "default"
    if mode == 'default':
        if len(comparing_pair['winner_aspects']) > 0:
            response = "It seems like {} is better than {} because {}.".format(comparing_pair['winner'],
                                                                                      comparing_pair['loser'],
                                                                                      "".join(comparing_pair[
                                                                                                    'winner_aspects']))
        elif len(comparing_pair['loser_aspects']) > 0:
            response = "Looks like {} is better than {}, but {} {}.".format(comparing_pair['winner'],
                                                                                comparing_pair['loser'],
                                                                                comparing_pair['loser'], "".join(
                    comparing_pair['loser_aspects']))
        else:
            response = "I would prefer {} than {}".format(comparing_pair['winner'], comparing_pair['loser'])
    return response

In [48]:
data = []
answer_list = []
obj_a_list = []
obj_b_list = []
winners_list = []
losers_list = []
all_list = []
with open('mined_bow_str.json') as f:
    for line in f:
        data.append(json.loads(line))
answer_list = []
for line in data:
    loser_sentences = pd.DataFrame(columns=['conf', 'sentence'])
    winner_sentences = pd.DataFrame(columns=['conf', 'sentence'])
    comparing_pair = {}
    comparing_pair['winner'] = line['winner']
    if line['object1']['name'] == line['winner']:
        list_of_winner_sentences = line['object1']['sentences']
        list_of_loser_sentences = line['object2']['sentences']
        winner_tag = "Object1"
        loser_tag = 'Object2'
        comparing_pair['loser'] = line['object2']['name']
    else:
        list_of_winner_sentences = line['object2']['sentences']
        list_of_loser_sentences = line['object1']['sentences']
        winner_tag = "Object2"
        loser_tag = "Object1"
        comparing_pair['loser'] = line['object1']['name']

    comparing_pair['winner_aspects'] = line['extractedAspects' + winner_tag]
    comparing_pair['loser_aspects'] = line['extractedAspects' + loser_tag]
    l = len(list_of_loser_sentences)
    for i in range(len(list_of_loser_sentences)-1):
        if list_of_loser_sentences[i]['confidence']>0.5:
            loser_sentences = loser_sentences.append({'conf' : list_of_loser_sentences[i]['confidence'], 'sentence' : list_of_loser_sentences[i]['text']}, ignore_index=True)
        else:
            winner_sentences = winner_sentences.append({'conf' : 1 - list_of_loser_sentences[i]['confidence'], 'sentence' : list_of_loser_sentences[i]['text']}, ignore_index=True)
    for i in range(len(list_of_winner_sentences)-1):
        if list_of_winner_sentences[i]['confidence']>0.5:
            winner_sentences = winner_sentences.append({'conf' : list_of_winner_sentences[i]['confidence'], 'sentence' : list_of_winner_sentences[i]['text']}, ignore_index=True)
        else:
            loser_sentences = loser_sentences.append({'conf' : 1 - list_of_winner_sentences[i]['confidence'], 'sentence' : list_of_winner_sentences[i]['text']}, ignore_index=True)
    loser_sentences = loser_sentences.sort_values(by=['conf'], ascending=False)
    winner_sentences = winner_sentences.sort_values(by=['conf'], ascending=False)
    comparing_pair['winner_aspects'] = []
    comparing_pair['loser_aspects'] = []
    comparing_pair['aspects'] = line['extractedAspects' + winner_tag]
    comparing_pair['aspects'].extend(line['extractedAspects' + loser_tag])
    used_aspects = []
    for i in range(0,winner_sentences.shape[0]):
        for aspect in comparing_pair['aspects']:
            if aspect in winner_sentences.iloc[i]['sentence'] and aspect not in used_aspects:
                if winner_sentences.iloc[i]['sentence'] not in comparing_pair['winner_aspects']:
                    used_aspects.append(aspect)
                    comparing_pair['winner_aspects'].append(winner_sentences.iloc[i]['sentence'])
            if len(comparing_pair['winner_aspects'])>=3:
                break
    for i in range(0,loser_sentences.shape[0]):
        for aspect in comparing_pair['aspects']:
            if aspect in loser_sentences.iloc[i]['sentence'] and aspect not in used_aspects:
                used_aspects.append(aspect)
                comparing_pair['loser_aspects'].append(loser_sentences.iloc[i]['sentence'])
            if len(comparing_pair['loser_aspects'])>=3:
                break
        # comparing_pair['winner_aspects'].append(winner_sentences.iloc[i]['sentence'])
        # comparing_pair['loser_aspects'].append(loser_sentences.iloc[i]['sentence'])
#     comparing_pair['winner_aspects'] = comparing_pair['winner_aspects'][0:3]
#     comparing_pair['loser_aspects'] = comparing_pair['loser_aspects'][0:3]
    answer = generate_template(comparing_pair, mode='extended')
    answer_list.append(answer)
    obj_a_list.append(line['object1']['name'])
    obj_b_list.append(line['object2']['name'])
#     print(answer)
#     print()

In [49]:
answer_list

['My favourite is windows 8. Reports indicate that Windows 8 is considerably faster than Windows 7.Windows 8 pro is way better than windows 7.Windows 8 DA is much easier to install than Windows 7.Originally Posted by GoodBytes Windows 8 is cheaper than Windows 7. Its advantages speak for itself windows 7 has strong sides too. I suspect Windows 7 is easier to use than Windows 8.Windows 7 works better for me than Windows 8.Windows 7 was already lighter than Vista, and now Windows 8 will be lighter than Windows 7.',
 'It\'s a hard decision but I choose perl. Its benefits seem to be more significant. Personally I use awk for a lot of my scripting, simply because when I first started using Unix systems, awk was always installed, but Perl was not guaranteed.awk would probably be a fine choice but I don\'t know it very well at all (I started using perl on windows about a year ago but am just now learning "unix").To put it straight: Using sed, awk and friends is fine, but you will find you can

In [21]:
# df = pd.DataFrame({'answers' : answer_list, 'object_a' : obj_a_list, 'object_b' : obj_b_list})
# df.to_csv("tmplAnswersFull.csv", index=False)

In [26]:
# answer_ser = pd.Series(answer_list)
# answer_ser.to_csv("Answers.txt", index=False)

  


### Text-rank

In [None]:
data = []
answer_list = []
winners_list = []
losers_list = []
all_list = []

with open('mined_bow_str.json') as f:
    for line in f:
        data.append(json.loads(line))
answer_list = []
for line in data:
    loser_sentences = pd.DataFrame(columns=['conf', 'sentence'])
    winner_sentences = pd.DataFrame(columns=['conf', 'sentence'])
    comparing_pair = {}
    comparing_pair['winner'] = line['winner']
    if line['object1']['name'] == line['winner']:
        list_of_winner_sentences = line['object1']['sentences']
        list_of_loser_sentences = line['object2']['sentences']
    else:
        list_of_winner_sentences = line['object2']['sentences']
        list_of_loser_sentences = line['object1']['sentences']
    l = len(list_of_loser_sentences)
    for i in range(len(list_of_loser_sentences)-1):
        if list_of_loser_sentences[i]['confidence']>0.5:
            loser_sentences = loser_sentences.append({'conf' : list_of_loser_sentences[i]['confidence'], 'sentence' : list_of_loser_sentences[i]['text']}, ignore_index=True)
        else:
            winner_sentences = winner_sentences.append({'conf' : 1 - list_of_loser_sentences[i]['confidence'], 'sentence' : list_of_loser_sentences[i]['text']}, ignore_index=True)
    for i in range(len(list_of_winner_sentences)-1):
        if list_of_winner_sentences[i]['confidence']>0.5:
            winner_sentences = winner_sentences.append({'conf' : list_of_winner_sentences[i]['confidence'], 'sentence' : list_of_winner_sentences[i]['text']}, ignore_index=True)
        else:
            loser_sentences = loser_sentences.append({'conf' : 1 - list_of_winner_sentences[i]['confidence'], 'sentence' : list_of_winner_sentences[i]['text']}, ignore_index=True)
    loser_sentences = loser_sentences.sort_values(by=['conf'], ascending=False)
    winner_sentences = winner_sentences.sort_values(by=['conf'], ascending=False)
    used_aspects = []
        # comparing_pair['winner_aspects'].append(winner_sentences.iloc[i]['sentence'])
        # comparing_pair['loser_aspects'].append(loser_sentences.iloc[i]['sentence'])
#     comparing_pair['winner_aspects'] = comparing_pair['winner_aspects'][0:3]
#     comparing_pair['loser_aspects'] = comparing_pair['loser_aspects'][0:3]
    loser_text = ""
    winner_text = ""
    all_text = ""
    for i in range (len(list_of_loser_sentences)):
        if list_of_loser_sentences[i]['text'] not in loser_text:
            loser_text += list_of_loser_sentences[i]['text'] + " "
            all_text += list_of_loser_sentences[i]['text'] + " "
    for i in range (len(list_of_winner_sentences)):
        if list_of_winner_sentences[i]['text'] not in winner_text:
            winner_text += list_of_winner_sentences[i]['text'] + " "
            all_text += list_of_winner_sentences[i]['text'] + " "
#         loser_text = split_sentences(loser_text)
#         winner_text = split_sentences(winner_text)
    if len(split_sentences(winner_text))>10:
        win = summarize(winner_text, split=False, word_count=60)
        winners_list.append(win)
    if len(split_sentences(loser_text))>10:
        lose = summarize(loser_text, split=False, word_count=60)
        losers_list.append(lose)
    if len(split_sentences(all_text))>10:
        all_t = summarize(all_text, split=False, word_count=60)
        all_list.append(all_t)

In [None]:
print(winners_list[0])
print()
print(losers_list[0])
print()
print(all_list[0])