# TextRank
- 그래프 기반의 문서 요약 기법
- PageRank 알고리즘을 기반으로 한다.
- 해당 문장에서 나오는 단어들의 similarity 를 기반으로 가중치를 부여한다.

In [12]:
import networkx as nx
import numpy as np

from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
 
def textrank(document):
    sentences = document.split('\n')
 
    convert_to_binary_ls = CountVectorizer().fit_transform(sentences)
    tfidf_vetorized_ls = TfidfTransformer().fit_transform(convert_to_binary_ls)
 
    similarity_graph = tfidf_vetorized_ls * tfidf_vetorized_ls.T
 
    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)

    scores = nx.pagerank(nx_graph)
    return sorted(((scores[i],s) for i,s in enumerate(sentences)),
                  reverse=True)

In [1]:
title = "American Accused of Spying in Russia Is a Marine Veteran, Family Says"
content = """
LONDON — The American man who was arrested last week in Russia on a spying charge is a Marine Corps veteran who was in Moscow to attend a wedding, his family said on Tuesday.
Russia’s Federal Security Service, known as the F.S.B., said on Monday that the American, Paul N. Whelan, had been detained on Friday “during an act of espionage,” and that a criminal case had been opened against him. 
Conviction on a spying charge in Russia carries a prison sentence of up to 20 years.
“We noticed that he was not in communication,” his parents and siblings said in a statement, “which was very much out of character for him even when he was traveling.”
They said they had not learned of his arrest until it was reported by the news media on Monday. 
Since then, they have contacted an array of United States government offices.
“We are deeply concerned for his safety and well-being,” the family statement said. “His innocence is undoubted and we trust that his rights will be respected.”
Mr. Whelan, 48, worked in corporate security for BorgWarner, an auto parts maker based near Detroit, his twin brother, David, wrote in an email.
“We understand that the U.S. government will see him within a 72-hour window that has already begun,” David Whelan wrote.
“Paul was attending a wedding for a fellow former Marine,” he wrote. 
“The wedding party were staying at the Metropol hotel” in Moscow.
He added that the family did not know of “anyone who has seen or interacted with him since just before 5 p.m. in Moscow on the 28th.”
The arrest of Mr. Whelan came 15 days after a Russian woman, Maria Butina, pleaded guilty in Washington to conspiring to act as a foreign agent, working with Russian officials to influence American political figures. 
The Kremlin has claimed she is innocent, insisting that Ms. Butina never acted as an agent for Moscow.
Tensions between the two capitals have been high over a series of revelations about Russia’s clandestine efforts to influence United States politics and the 2016 election.
Russian officials have denied any involvement in such efforts.
While it is not yet clear what prompted Mr. Whelan’s arrest, Russia has been known to arrest foreigners with an eye toward trading them for Russians held abroad.
"""

In [7]:
ranked = textrank(content)
ranked[:5]

[(0.0720242711848012,
  'LONDON — The American man who was arrested last week in Russia on a spying charge is a Marine Corps veteran who was in Moscow to attend a wedding, his family said on Tuesday.'),
 (0.06846993725514333,
  'Russia’s Federal Security Service, known as the F.S.B., said on Monday that the American, Paul N. Whelan, had been detained on Friday “during an act of espionage,” and that a criminal case had been opened against him. '),
 (0.066459113033833,
  'He added that the family did not know of “anyone who has seen or interacted with him since just before 5 p.m. in Moscow on the 28th.”'),
 (0.06598041794775444,
  '“We noticed that he was not in communication,” his parents and siblings said in a statement, “which was very much out of character for him even when he was traveling.”'),
 (0.06081929506261168,
  'They said they had not learned of his arrest until it was reported by the news media on Monday. ')]

In [None]:
from gensim.summarization import summarize

In [None]:
summarize(content)