In [18]:
import tweepy
import csv
import pandas as pd
import numpy as np
import networkx as nx
import os
from Events_NER.TweetSegmenter import SEDTWikSegmenter
import pickle
from rouge import Rouge 
import re
import json

In [3]:
wiki_titles_file = "Events_NER/data/final.txt"
segmenter = SEDTWikSegmenter(wiki_titles_file, 4, 3, False)

Initializing SEDTWik Segmenter
SEDTWik Segmenter Ready



In [4]:
class Tweet():
    
    def __init__(self, status_response):
        self.id = status_response.id
        self._get_text(status_response)
        self.user_info = status_response.user
        self.entitites = status_response.entities
        self.json = status_response._json
        self.json['text'] = self.text
        arr = []
        for users in self.json['entities']['user_mentions']: 
            arr += [users['name']]
        self.json['entities']['user_mentions'] = arr
        arr = []
        for users in self.json['entities']['hashtags']: 
            arr += [users['text']]
        self.json['entities']['hashtags'] = arr
        
    def _get_text(self, status):
        if hasattr(status, "retweeted_status"):  # Check if Retweet
            try:
                self.text = status.retweeted_status.extended_tweet["full_text"]
            except AttributeError:
                self.text = status.retweeted_status.full_text
        else:
            try:
                self.text = status.extended_tweet["full_text"]
            except AttributeError:
                self.text = status.full_text
    
    #code for generating named entities of each tweet
    def _get_named_entities(self):
        return segmenter.tweet_segmentation(self.json)
        
    #code for generating event phrases
    def _get_event_phrases(self):
        return []
    
    def get_graph_entities(self):
        return self._get_named_entities() + self._get_event_phrases()
    
    def __hash__(self):
        return self.id
    def __eq__(self, other):
        return self.id == other.id


class TweetRetriever():

    def __init__(self):
        consumer_key = '95cMtk1vJvEEW2rlMR0kIU9lE'
        consumer_secret = 'pMQFi7LBdcudKDNZOokUJGS8mDxQanUv8spxBDdTLiwSZBuUOM'
        access_token = '1036313393114767360-BZ8Qpi02ghRvehhcITEIyl7SmGWmU6'
        access_token_secret = 'C7VAqGDhTdB424iBtEwF1CJI9YPTcvNvLjFmaCXENNv3G'
        auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
        #auth.set_access_token(access_token, access_token_secret)
        self.api = tweepy.API(auth,wait_on_rate_limit=True, wait_on_rate_limit_notify = True)
    
    def _filterDuplicates(self, tweets):
        tweet_text = set()
        filtered_tweets = []
        for tweet in tweets:
            if tweet.text not in tweet_text:
                filtered_tweets += [tweet]
                tweet_text.add(tweet.text)
        return filtered_tweets

    def getTweets(self, hashtag, count = 10):
        tweets = []
        for status in tweepy.Cursor(self.api.search, q = hashtag + " -filter:retweets", count = count, tweet_mode = 'extended',
                                    lang = 'en',).items():
            tweets.append(Tweet(status))
        return self._filterDuplicates(tweets)

    

In [5]:
class GraphNode():
    
    def __init__(self, name):
        self.name = name
        self.tweets = set()
        self.value = 0
        
    def add_tweet(self, tweet):
        self.tweets.add(tweet)
    
    def common_tweets(self, other):
        return len(self.tweets.intersection(other.tweets))
    
    def __hash__(self):
        return hash(self.name)
    def __eq__(self, other):
        return self.name == other.name
#     def __print__(self):
#         print(self.name)
    
class TweetGraph():
    
    def __init__(self, topic):
        self.topic = topic
        self.nodes = {}
        self.edge_map = {}
        
    def add_entity(self, name, tweet_ref):
        if name not in self.nodes:
            self.nodes[name] = GraphNode(name)
        self.nodes[name].add_tweet(tweet_ref)
    
    def add_edge(self, node1, node2):
        assert node1.name in self.nodes
        assert node2.name in self.nodes
        weight = node1.common_tweets(node2)
        self.edge_map.setdefault(node1.name, {}).setdefault(node2.name, weight)
        self.edge_map.setdefault(node2.name, {}).setdefault(node1.name, weight)
    
    def compute_all_edges(self):
        for node1 in self.nodes.values():
            for node2 in self.nodes.values():
                self.add_edge(node1, node2)
    
    def _get_pagerank_matrix(self):
        x = [[0 for _ in range(len(self.nodes))] for _ in range(len(self.nodes))]
        for i, node1 in enumerate(self.nodes.values()):
            wsum = 0
            for node2 in self.nodes.values():
                wsum += self.edge_map.get(node1.name, {}).get(node2.name, 0)
            for j, node2 in enumerate(self.nodes.values()):
                x[i][j] = self.edge_map.get(node1.name, {}).get(node2.name, 0)/wsum
        return np.array(x)
    
    def set_textrank_values(self, d = 0.85):
        rank_graph = nx.from_numpy_array(self._get_pagerank_matrix())
        node_scores = nx.pagerank(rank_graph, alpha = d)
        for i, node in enumerate(self.nodes.values()):
            node.value = node_scores[i]
    
    def get_weight(self, node1, node2):
        return self.edge_map.get(node1.name, {}).get(node2.name, 0)
    
    def get_topic_similarity(self, node):
        if node.name in self.topic:
            return len(node.tweets)
        return 1
    
    def get_all_node_values(self):
        arr = []
        for node in self.nodes.values():
            arr.append((node.name, node.value))
        return sorted(arr, key = lambda x: x[1])
    
    def get_avg_thres(self):
        values = self.get_all_node_values()
        return sum([i[1] for i in values])/len(values)
        
    def get_nodes_above_thres(self, thres = 1):
        nodes = []
        value_sum = 0
        for node in self.nodes.values():
            if node.value > thres:
                nodes.append(node)
                value_sum += node.value
        return nodes, value_sum
        

In [6]:
def createGraph(topic, tweets):
    tweetGraph = TweetGraph(topic)
    for tweet in tweets:
        graph_entities = tweet.get_graph_entities()
        for name in graph_entities:
            tweetGraph.add_entity(name, tweet)
    tweetGraph.compute_all_edges()
    tweetGraph.set_textrank_values()
    return tweetGraph

def partitionGraph(tweetGraph, alpha, beta, high_rank_thres = 1):
    
    #initialize highly ranked nodes and their total values sum
    high_ranked_nodes, total_value_sum = tweetGraph.get_nodes_above_thres(high_rank_thres)
    high_ranked_nodes = sorted(high_ranked_nodes, key = lambda x: x.value)
    partitions = []
    
    #partitioning loop
    while len(high_ranked_nodes):
        #entity set is the nodes in the partition
        entity_set = set()
        repr_node = high_ranked_nodes.pop()
        entity_set.add(repr_node)
        repr_node_topic_similarity = tweetGraph.get_topic_similarity(repr_node)
        value_sum = repr_node.value
        
        for node in high_ranked_nodes:
            node_edge_weight = tweetGraph.get_weight(repr_node, node)
            node_topic_similarity = tweetGraph.get_topic_similarity(node)
            
            if node_edge_weight/repr_node_topic_similarity > alpha and \
                node_topic_similarity/repr_node_topic_similarity > alpha:
                entity_set.add(node)
                value_sum += node.value
        
        if value_sum/total_value_sum > beta:
            temp = []
            for node in high_ranked_nodes:
                if node not in entity_set:
                    temp.append(node)
            high_ranked_nodes = temp
            
            partitions.append([])
            for node in list(entity_set):
                partitions[-1].append(node)
            
    return partitions
    

In [7]:
ret = TweetRetriever()

In [8]:
tweets = ret.getTweets('#xmen', 1)

In [9]:
graph = createGraph(['xmen'], tweets)

In [10]:
avg = graph.get_avg_thres()

In [12]:
partitions = partitionGraph(graph, 0.05, 0.01, high_rank_thres=avg)

In [13]:
def summarization(partitions, tweet_cutoff = 1):
    summary = []
    #print(partitions)
    for part in partitions:
        tweet_set = set()
        for node in part:
            tweet_set = tweet_set.union(node.tweets)
        node_entity_count = []
        tweet_set = list(tweet_set)
        #print(tweet_set)
        for i, tweet in enumerate(tweet_set):
            count = 0
            for node in part:
                if node.name in tweet.text:
                    count += 1
            node_entity_count += [(i, count)]
        node_entity_count = sorted(node_entity_count, key = lambda x: x[1], reverse = True)
        #print(node_entity_count)
        for i in range(min(tweet_cutoff, len(tweet_set))):
            summary += [tweet_set[node_entity_count[i][0]].text]
    return summary

In [21]:
hashtags = "#x1, #mamavote, #welcomex1tothailand, #got7, #just_dohyon_day, #springday1000days, #exo, #darkbluekissep5, #bts, #peing, #nct127, #igot7, #nct, #ab6ix, #uwmaep1, #happy_dohyon_day, #straykids, #bigil, #monsta_x, #choseungyoun, #got7_callmyname, #kampsingapore2019, #fgo, #rt, #bolivia, #ps4share, #ssmbreignbeginssoon, #ateez, #thefacementhailand3, #got7now, #doyoung, #ayodhyaverdict, #sowetoderby, #sarileruneekevvaru, #astronaut, #johnny, #case1485, #sex, #yuta, #obsession, #kimwooseok, #kpopfestainbkk, #txt, #newprofilepic, #ffpl, #darkbluekiss, #tomorrow_x_together, #monstax, #kartarpurcorridor, #nctzenselcaday, #lulalivre, #x1flyhightothailand, #goloud, #exodeux, #ongseongwu, #jungkook, #magachallenge, #sidharthshukla, #thankyougamefreak, #jimin, #superm, #biggboss13, #fcbbvb, #peckpalitchoke, #happyleoday, #sozlesmelisehadet, #nowplaying, #1millonsmdqmq, #bambam, #jaehyun, #leeeunsang, #kimyohan, #bb13, #follow, #seventeen, #mark, #baekhyun, #sehun, #nintendoswitch, #amakhosi4life, #leiars, #punbnk48, #deathstranding, #chile, #singapore, #fgo_ep7, #kristperawat, #dutchieyoghurt, #porn, #namdohyon, #gmmtv, #sao_anime, #10kasim, #rmliga, #dearpresidenttrump, #salmankhan, #sb29psg, #bnk48, #soobin, #hanseungwoo"
hashtags = [tags.strip() for tags in hashtags.split(',')]

In [22]:
for hasht in hashtags:
    tweets = ret.getTweets(hasht, 1)
    print("***********************",hasht,"*******************************")
    for tweet in tweets:
        print(tweet.text)
        print("=====================")
    print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

*********************** #x1 *******************************
Little king of fanservice performing his signature song 🧡

#MAMAVOTE #x1 #엑스원

@x1official101 / @x1members

#SONDONGPYO #dongpyo
#손동표 #동표 #ㅅㄷㅍ

https://t.co/E7yMaJxVrt
Like always 1/2
#X1 #KPOPFESTAinBKK https://t.co/6UQUfu8LVK
One-it here for X1 😊♥️

@x1members 
@x1official101 
#X1 #Oneit https://t.co/3bWyRoHbNt
That's enough Seungwoo, you're killing us. 🙈

#KPOPFESTAinBKK #x1 #HanSeungWoo https://t.co/qVI92HsZ4R
minhee need to choose one 😤 it's confusing now 😤 #MINHEE #X1
TODAY IS YOUR BIRTHDAY DO!! I WANT YOU TO BE HAPPY ALWAYS AND ONLY TEARS OF JOY!! LETS WALK FLOWER PATH TOGETHER❤️
@x1official101 @x1members #MAMAVOTE #x1 #엑스원 #찬란한열여섯남도현_생일축하해 #HAPPY_DOHYON_DAY  https://t.co/1SMixFp3xC
@seungwoolicious @PHENOMENAL_94 Thank you, Han Seungwoo. You have been an inspiration, not only for me but for a lot of One Its and Alices worldwide. I know you are strong and may be burdened sometimes but cry if you must and lean to other p

*********************** #springday1000days *******************************
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*********************** #exo *******************************
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*********************** #darkbluekissep5 *******************************
@Tawan_V @new_thitipoom My favourite scene from the whole #Kiss series. The dialogue, the acting . Perfect. #DarkBlueKissEP5 https://t.co/BEajeswPer
While watching #DarkBlueKissEP5 also listening to #ไม่มีนิยาม​ separate left ear, and right ear
Love this EP again... Actually love every EP of them 💙💙💙#DarkBlueKissEP5
My Ship 😍 #SunMork #DarkBlueKiss #DarkBlueKissEP5 https://t.co/gUUcLDx8s5
Full of Smile then suddenly someone appear 😂 he see a ghost 😳 #PeteKao #DarkBlueKiss #DarkBlueKissEP5 https://t.co/v3DnMJcm6U
Finally here guys!! can't wait for the Eng Sub \(^^)/ So happy to see t

*********************** #nct *******************************
191109 KAMP Red Carpet 

#KAMPSingapore2019 #KAMPGlobal #KAMPSG #NCT #NCT127 #DOYOUNG #도영 https://t.co/n79VwwuOcD
NCT VINES https://t.co/BbfSH2Ubzt
you may watch 😅🙂
#NCT #NCTDREAM #NCT127 #NCTU #WayV https://t.co/JAtRukKPgk
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*********************** #ab6ix *******************************
From boy to man ..woojin has grown up a lot in these 3 years

#woojin #AB6IX @AB6IX_MEMBERS @AB6IX https://t.co/oe5lj1UzRS
1st day concert

🐿: woongie hyung making a heart again
🐻: youngmin hyung did it first! Why? You jealous?
🐿: ah what~ 
#AB6IX https://t.co/wyC7rFxVKr
2019.11.09 

Cr. Way to you 
#AB6IX #JEONWOONG #WOONG #에이비식스 #전웅 #웅 https://t.co/MTKeUhmLR7
(cont.)

AB6IX will always be for ABNEW. Thank you for granting all our firsts, I love you❤"

trans by JeonWoongPH
#전웅 #웅 #JEONWOONG #에이비식스 #AB6IX https://t.co/Q9yViNqTFS
2019.11.09

Cr. 

*********************** #monsta_x *******************************
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
*********************** #choseungyoun *******************************
191109 😭
#X1 #엑스원 #조승연 #CHOSEUNGYOUN

cr. Lion_csy
 https://t.co/B4x6nHtamG
191109 🖤💛

#조승연 #CHOSEUNGYOUN

cr. Fairypitta_csy
 https://t.co/YGtRdCWp5m
191110 Kpop Festa
U GOT IT

 (maybe this video has better quality😂and less blurry) 
#조승연 #승연 #CHOSEUNGYOUN #SEUNGYOUN #スンヨン #曹承衍
#엑스원 #X1
@x1members https://t.co/MoIkpat5sc https://t.co/kVQFIC4bGc
Do you always drink something like that, Seungyoun ah? 🤧. How can you being that cute and hot at the same time?! Now, I love you even more, Oh my god! What should I do~ 😖💕

#KPOPFESTAinBKK #X1 #조승연 🙆 
#CHOSEUNGYOUN #BigLoveForX1 https://t.co/22u2TZFMAW
he looks happier, waving n smiling at one it. i hope Cho Seungyoun always in happiness😁
#X1 #CHOSEUNGYOUN #KPOPFESTAinBKK #X1FlyHighToThailand #WelcomeX1toThailan

*********************** #kampsingapore2019 *******************************
This is the guy i saw at #KAMPSingapore2019_Day2 and he is a #wjsn #우주소녀 fan and i didnt wanna talk to him because im scared lolol anyways can kpop stan twt do their magic and find his @ because sis no one i know stans wjsn so hard like this guy lmao #KAMPSingapore2019 https://t.co/avua1w3K95
How minho looks in this outfit : 💣💀☠🔪💣💀☠🔪🗡💀💣💦🔥

How minho acts : 😊🌸💕🌈🌺💫🦋💞🍨🍭🌼🌈💫🦋🌹

#StrayKids @Stray_Kids
#KAMPSingapore2019
https://t.co/zD7eEhcb7z
oh my god skz also look at felix at the end 🥺😰
#KAMPSingapore2019 #StrayKids #스트레이키즈 #straykidsinsg https://t.co/HsB1Uz5mJ1
Stray Kids - Double Knot performance at Kamp Singapore 2019
 https://t.co/ekxQCwLRvl via @YouTube
#KampSingapore2019 @Stray_Kids
My boys😭😍🥰❤️💖 @Stray_Kids #KAMPSingapore2019 #스트레이키즈 #StrayKids https://t.co/dSDjcNGRbY
PEOPLE AT THE FRONT U GUYS ARE STILL OKAY? OMG WHAT DID U DO MR. HWANG #KAMPSingapore2019
#StrayKids https://t.co/qCXArddoPd
straykids kamp he

*********************** #thefacementhailand3 *******************************
#TheFaceMenThailand3 is just a TV show! 📺
ME SAVE ซบน.
#TheFaceMenThailand3
Campaign : Stay cool Timmy The Face Men Thailand 3 #TheFaceMenThailand3 #timmysanner #timmythefacemen @timmysanner #TeamARTBINA https://t.co/Sk0iLMdzxr
Lipe is very good.🤟 There are white sand beaches and clear blue water.🌊 Beautiful view, I love Lipe💕
#ReviewThailand #Thailand #kohlipe #ThailandNeedsLoverTour #TheFaceMenThailand3 #satun #AirAsia0บาทตัวจริง  #รีวิวสตูล #เกาะหลีเป๊ะ #ร้อยเดียวเที่ยวทั่วไทย #klook https://t.co/Y1WE9VzHZw
Graduate ' Knack N°13 ' Presentation 
Faculty of Fine and Applied Arts,
Suan Suanandha Rajabhat University #TheFaceMenThailand3 #อี้จ้าน #찬란한열여섯남도현_생일축하해 https://t.co/XiFMHorsxS
Timmy The Face Men Thailand 3
#TheFaceMenThailand3 #timmysanner #timmychitpolsanner https://t.co/2LNQPSFX8V
2019-11-10 00:03:00
6. WELOSW
7. THE DREAM SHOW
8. #SpringDay1000Days
9. #sincitynct
10. #TheFaceMenThailand3
2019-11-09 

*********************** #sowetoderby *******************************
Thanks to Kaizer Chiefs and Orlando Pirates especially their fans for the greatest spectacle and good advertisement of African Football. Your footprint is there for all of us to follow.👊

#SowetoDerby 🔥

#OrlandoPirates☠️#KaizerChiefs✌️ https://t.co/tyru8VhdMQ
Hate crime....5 black boys assault elderly white lady.
#SowetoDerby #ThankYouGameFreak #PopeyesChickenSandwich https://t.co/zinZPvKBF7
...the sooner the better people accepts that hitting the post means you are off target, that penalties are an integral part of the game...and that what counts are all on target goals ...#SowetoDerby
Thank you @KaizerChiefs 3:2 @orlandopirates #SowetoDerby #KaizerChiefs #SpringboksTour https://t.co/Pga7PMuuHd
@KaizerChiefs Rugby team is ready to face the @Springboks . Done playing cows like @orlandopirates and @Masandawana #SowetoDerby https://t.co/mfPegMrhAR
Did you ever win a #SowetoDerby ? https://t.co/Gc6zQLYsRz
Even at games 

Rate limit reached. Sleeping for: 610


KeyboardInterrupt: 

In [21]:
#Rouge one scores

In [4]:
from rouge import Rouge 
import re

hypothesis = "Dr K Sivan and the entire @isro team both in the present &amp; past have strived for this incredible effort. Bit of slip from cup to lip but I'm sure these brilliant scientists with relentless dedication will persevere &amp; get us to the moon. Jai Hind. -Sg #Chandrayaan2 #ISRO", "Jst Finished Watching #MissionMangal on @StarGoldIndia and No words to Describe this BLOCKBUSTER movie..Becoz of some Reason I couldn't watched it in theatres feeling sad about it..@akshaykumar sir U r G.O.A.T in Acting❤ #MissionMangalOnTV #JaiHind 🇮🇳 #ISRO 🙏."

reference = "Chandrayaan-2 is an Indian lunar mission to explore the unchartered south pole of the celestial body by landing a rover. On September 7, India attempted to make a soft landing on to the lunar surface. However, lander Vikram missed the primary landing site and went for the second. The visuals went missing henceforth."

# print(hypothesis)
hyp = re.sub(r'\"','',hypothesis[0])

rouge = Rouge()
scores = rouge.get_scores(hyp, reference)
print(scores)

[{'rouge-1': {'f': 0.1149425237415777, 'p': 0.1111111111111111, 'r': 0.11904761904761904}, 'rouge-2': {'f': 0.019999995008001245, 'p': 0.020833333333333332, 'r': 0.019230769230769232}, 'rouge-l': {'f': 0.09173612245953273, 'p': 0.08888888888888889, 'r': 0.09523809523809523}}]


In [14]:
import json
import os
def createDataset(hashtags, count, json_save_file = None, pickle_loc = None):
    
    summary_dataset = {}
    if json_save_file is not None and os.path.exists(json_save_file):
        with open(json_save_file, "r") as fp:
            summary_dataset = json.load(fp)
            
    for hasht in hashtags:
        
        if not os.path.exists(pickle_loc):
            os.mkdir(pickle_loc)
            
        tweets = ret.getTweets(hasht, count)
        summary_dataset[hasht] = {"tweets": []}
        it = 1
        for tweet in tweets:
            pickle_file_name = hasht + "_" + str(it)
            print(tweet.text)
            summary_dataset[hasht]["tweets"].append(tweet.text)
            with open(pickle_loc + "/" + pickle_file_name, "wb") as fp:
                pickle.dump(tweet, fp)
            it += 1
            print("=================")
        
        print("Enter the topic")
        topic = str(input())
        print("Please Enter User summary")
        user_summary = str(input())
        summary_dataset[hasht]["topic"] = topic
        summary_dataset[hasht]["user_summary"] = user_summary
        with open(json_save_file, "w") as fp:
            json.dump(summary_dataset, fp, indent = 4)

In [None]:
hashtags = ["#article370"]
createDataset(hashtags, 1, "dataset2.json", "Datasets")

All done. When @INCIndia mocked #ABV for not mentioning #RamMandir #Article370 #UCC He said those are not part of the CMP as @BJP4India didn't have majority. Look what happened now. Thank you @narendramodi ji for making these words true
Video via WA https://t.co/GDSnsvalUz
After seeing this two photos and their end results, liberals will be busy searching the old photos of @narendramodi ji 😂
#AYODHYAVERDICT #Article370 https://t.co/b2e2K9B9eZ
@RanaAyyub Much Like #kashmir After #Article370 Gone !!!!
.
.
.
#BabriMasjidverdict
Challenges in the Valley
#JammuAndKashmir #JammuKashmir #Jammu #KashmirStillCrying #Kashmir #KashmirStillUnderCurfew #KashmirWantsFreedom #Article370 #Ladakh #terrorism #Ambedkar #Nehru #GeneralThimmayya #AmitShah #valley #Kashmirvalley
https://t.co/XtH5ksBlSQ
#TopStory | #UniformCivilCode demand gains momentum

@sumita11 #Article370 @Madhavbhandari_ #TripleTalaq @atullondhe #AyodhyaHearing #UCC #AyodhyaVerdict #AyodhyaJudgment

https://t.co/Knx5DP7kBa
#AVEdit | La

kashmir srinagar article 370 1947
Please Enter User summary


In [21]:
def getpickeledTweets(pickle_dir, hashtag, count):
    it = 1
    tweets = []
    while it < count:
        try:
            file = pickle_dir + "/" + hashtag + "_" + str(it)
            with open(file, 'rb') as fp:
                tweets.append(pickle.load(fp))
        except:
            raise
        it += 1
    return tweets
                
def getRouge1Score(dataset, pickle_dir, alpha, beta, tweets_summary_count):
    
    with open(dataset, 'r') as fp:
        dataset = json.load(fp)
    
    rouge_data = []
    for hashtag, data in dataset.items():
        tweets = getpickeledTweets(pickle_dir, hashtag, len(data["tweets"]))
        topic = data['topic'].split(' ')
        graph = createGraph(topic, tweets)
        avg = graph.get_avg_thres()
        partitions = partitionGraph(graph, alpha, beta, high_rank_thres=avg)
        
        summary = summarization(partitions, tweet_cutoff = tweets_summary_count)
        user_summary = data['user_summary']
        
        summary = ' '.join(summary)
        #print("========")
        #print(summary)
        summary = re.sub(r'http\S+', '', summary)
        summary = re.sub('[^A-Za-z0-9]+', ' ', summary)
        user_summary = re.sub('[^A-Za-z0-9]+', ' ', user_summary)
        #print("=========")
        #print(summary)
        #print("=========")
        #print(user_summary)
        
        rouge = Rouge()
        scores = rouge.get_scores(summary, user_summary)
        print(scores)
        rouge_data.append((hashtag,scores))
        

In [22]:
getRouge1Score("dataset.json", "Datasets", 0.1, 0.5, 2)

[{'rouge-1': {'f': 0.22222221752098775, 'p': 0.17857142857142858, 'r': 0.29411764705882354}, 'rouge-2': {'f': 0.07547169329120715, 'p': 0.06349206349206349, 'r': 0.09302325581395349}, 'rouge-l': {'f': 0.13979155034364193, 'p': 0.125, 'r': 0.20588235294117646}}]
[{'rouge-1': {'f': 0.12121211667125821, 'p': 0.09302325581395349, 'r': 0.17391304347826086}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.10375897200888745, 'p': 0.09302325581395349, 'r': 0.17391304347826086}}]
[{'rouge-1': {'f': 0.12499999511250019, 'p': 0.10869565217391304, 'r': 0.14705882352941177}, 'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0}, 'rouge-l': {'f': 0.0718384074934583, 'p': 0.06521739130434782, 'r': 0.08823529411764706}}]
[{'rouge-1': {'f': 0.2696629164903422, 'p': 0.23076923076923078, 'r': 0.32432432432432434}, 'rouge-2': {'f': 0.061855665337443244, 'p': 0.05084745762711865, 'r': 0.07894736842105263}, 'rouge-l': {'f': 0.19165956467794126, 'p': 0.17307692307692307, 'r': 0.24324324324324326}}]
[{'roug