## Analyse all house of commons speeches by current MPs

In [1]:
import spacy
import pandas as pd
from bs4 import BeautifulSoup
from collections import Counter
import requests

# Store TheyWorkForYou API key in separate config file
from config import TWFY_API_KEY

In [2]:
# Get MP -> theyworkforyou_id map
# Probably need to modify after general election to point to previous batch of MPs
mps = pd.read_csv("https://www.theyworkforyou.com/mps/?f=csv")

In [3]:
def get_mp_speeches(mp_id):
    """Get speeches of a particular MP based on TheyWorkForYou id and convert data into long format pandas data frame.
    Each row represents one speech at a particular date and time"""
    all_speeches = pd.DataFrame()
    rows = [1]
    page_no=1
    while len(rows) > 0:
        t = requests.get("https://www.theyworkforyou.com/api/getDebates?key={api_key}&\
                     type=commons&person={person}&results_per_page=1000&num={num}&page={page}&output=js".format(api_key=TWFY_API_KEY,
                                                                                                               person=mp_id,
                                                                                                               num=1000,
                                                                                                               page=page_no))
        rows = t.json()["rows"]
        speeches = []
        # Loop over each row
        for row in rows:
            speeches.append({
                    'speech_id':row["gid"],
                    'speech_url':row["listurl"],
                    'mp_name':row["speaker"]["name"],
                    'mp_constituency':row["speaker"]["constituency"],
                    'mp_party':row["speaker"]["party"],
                    'mp_id':row["person_id"],
                    'date':pd.to_datetime(row["hdate"], format="%Y-%m-%d"),
                    'time':row["htime"],
                    'section_id':row["section_id"],
                    'subsection_id':row["subsection_id"],
                    'debate_title':row["parent"]["body"],
                    'body':BeautifulSoup(row["body"], "html5lib").get_text()
                })
        speeches = pd.DataFrame(speeches)

        # Concatenate onto complete speeches dataframe
        all_speeches = pd.concat([all_speeches, speeches], ignore_index=True)
        # Increment page_counter
        page_no += 1
    
    print("Got speeches for MP {0}".format(mp_id))
    return all_speeches

In [186]:
## Download all MP speeches if this is set to True
if False:
    # Parallelise downloading of MP speeches
    from multiprocessing import Pool

    # Number of threads to use to fetch
    NUM_THREADS = 8
    # Make list of mp ids
    list_of_mp_ids = list(mps["Person ID"])

    # Create pool of threads
    pool = Pool(NUM_THREADS)
    # Use pool.map to download speeches mp by mp
    results = pool.map(get_mp_speeches, list_of_mp_ids)
    pool.close()
    pool.join()

    # Concatenate all mps into one dataframe
    all_mp_speeches = pd.concat(results)
    
    # Write data to a file to save it
    all_mp_speeches.to_hdf("mp_speeches.h5", "speeches")
else:
    ## Read in mp speeches that have been previously downloaded
    all_mp_speeches = pd.read_hdf("mp_speeches.h5", "speeches")


Interpreting naive datetime as local 2017-05-04 18:29:16.416683. Please add timezone info to timestamps.



In [187]:
all_mp_speeches["time_"] = pd.to_datetime(all_mp_speeches.time, format="%H:%M:%S", errors="coerce")


Interpreting naive datetime as local 2017-05-04 18:29:18.363361. Please add timezone info to timestamps.



In [188]:
all_mp_speeches["time_hour"] = all_mp_speeches.time.str.split(":", expand=True).get(0)


Interpreting naive datetime as local 2017-05-04 18:29:18.648593. Please add timezone info to timestamps.



In [7]:
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf

### Run data through NLP

In [8]:
import pyLDAvis
import pyLDAvis.gensim

In [9]:
# Load english language model from spacy
nlp = spacy.load("en")


Interpreting naive datetime as local 2017-05-04 15:28:21.353920. Please add timezone info to timestamps.



In [34]:
from gensim.models import Phrases
from gensim.models.word2vec import LineSentence
import itertools
import codecs
import os


Interpreting naive datetime as local 2017-05-04 15:41:21.487053. Please add timezone info to timestamps.



In [189]:
# Directory to store Phrase models
intermediate_directory = os.path.join('.', 'intermediate')


Interpreting naive datetime as local 2017-05-04 18:29:36.346173. Please add timezone info to timestamps.



In [190]:
all_mp_speeches_sample = all_mp_speeches.sample(frac=0.5)


Interpreting naive datetime as local 2017-05-04 18:29:47.776056. Please add timezone info to timestamps.



In [191]:
del all_mp_speeches


Interpreting naive datetime as local 2017-05-04 18:29:49.429808. Please add timezone info to timestamps.



In [192]:
def punct_space(token):
    """
    helper function to eliminate tokens
    that are pure punctuation or whitespace
    """
    
    return token.is_punct or token.is_space

def line_review(filename):
    """
    generator function to read in speeches from the file
    and un-escape the original line breaks in the text
    """
    
    with codecs.open(filename, encoding='utf_8') as f:
        for review in f:
            yield review.replace('\\n', '\n')

def lemmatized_sentence_corpus(filename):
    """
    generator function to use spaCy to parse speeches,
    lemmatize the text, and yield sentences
    """
    
    for parsed_speech in nlp.pipe(line_speech(filename),
                                  batch_size=10000, n_threads=4):
        
        for sent in parsed_speech.sents:
            yield u' '.join([token.lemma_ for token in sent
                             if not punct_space(token)])


Interpreting naive datetime as local 2017-05-04 18:29:54.637134. Please add timezone info to timestamps.



In [193]:
#small_sample=all_mp_speeches_sample.sample(frac=0.1)


Interpreting naive datetime as local 2017-05-04 18:29:56.891450. Please add timezone info to timestamps.



In [194]:
# Save speeches to txt file first
speeches_filepath = os.path.join(intermediate_directory, "speeches.txt")
if True:
    with codecs.open(speeches_filepath, "w", encoding="utf_8") as f:
        for speech in all_mp_speeches_sample["body"]:
            f.write(speech + "\n")


Interpreting naive datetime as local 2017-05-04 18:29:58.290823. Please add timezone info to timestamps.



In [195]:
unigram_sentences_filepath = os.path.join(intermediate_directory, 'unigram_sentences_all.txt')


Interpreting naive datetime as local 2017-05-04 18:30:07.985484. Please add timezone info to timestamps.



In [196]:
%%time
## Lemmatize all speeches and store them in text file
# this is a bit time consuming - make the if statement True
# if you want to execute data prep yourself.
if True:
    with codecs.open(unigram_sentences_filepath, 'w', encoding='utf_8') as f:
        for sentence in lemmatized_sentence_corpus(speeches_filepath):
            f.write(sentence + '\n')


Interpreting naive datetime as local 2017-05-04 18:30:10.975190. Please add timezone info to timestamps.



CPU times: user 1h 4min 41s, sys: 55 s, total: 1h 5min 36s
Wall time: 31min 56s


In [198]:
unigram_sentences = LineSentence(unigram_sentences_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.214897. Please add timezone info to timestamps.



In [199]:
bigram_model_filepath = os.path.join(intermediate_directory, 'bigram_model_all')


Interpreting naive datetime as local 2017-05-04 18:44:13.218711. Please add timezone info to timestamps.



In [200]:
%%time

## Convert unigrams to bigrams
# this is a bit time consuming - make the if statement True
# if you want to execute modeling yourself.
if True:

    bigram_model = Phrases(unigram_sentences)

    bigram_model.save(bigram_model_filepath)
    
# load the finished model from disk
bigram_model = Phrases.load(bigram_model_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.227362. Please add timezone info to timestamps.



CPU times: user 2min 9s, sys: 1.18 s, total: 2min 10s
Wall time: 2min 9s


In [201]:
bigram_sentences_filepath = os.path.join(intermediate_directory, 'bigram_sentences_all.txt')


Interpreting naive datetime as local 2017-05-04 18:44:13.230248. Please add timezone info to timestamps.



In [202]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to execute data prep yourself.
if True:
    with codecs.open(bigram_sentences_filepath, 'w', encoding='utf_8') as f: 
        for unigram_sentence in unigram_sentences:
            bigram_sentence = u' '.join(bigram_model[unigram_sentence])
            f.write(bigram_sentence + '\n')


Interpreting naive datetime as local 2017-05-04 18:44:13.234049. Please add timezone info to timestamps.


For a faster implementation, use the gensim.models.phrases.Phraser class



CPU times: user 4min 15s, sys: 1.28 s, total: 4min 17s
Wall time: 4min 25s


In [203]:
bigram_sentences = LineSentence(bigram_sentences_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.236954. Please add timezone info to timestamps.



In [204]:
trigram_model_filepath = os.path.join(intermediate_directory, 'trigram_model_all')


Interpreting naive datetime as local 2017-05-04 18:44:13.240640. Please add timezone info to timestamps.



In [205]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to execute modeling yourself.
if True:

    trigram_model = Phrases(bigram_sentences)

    trigram_model.save(trigram_model_filepath)
    
# load the finished model from disk
trigram_model = Phrases.load(trigram_model_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.243775. Please add timezone info to timestamps.



CPU times: user 2min 18s, sys: 1.71 s, total: 2min 19s
Wall time: 2min 24s


In [206]:
trigram_sentences_filepath = os.path.join(intermediate_directory, 'trigram_sentences_all.txt')


Interpreting naive datetime as local 2017-05-04 18:44:13.247229. Please add timezone info to timestamps.



In [207]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to execute data prep yourself.
if True:
    with codecs.open(trigram_sentences_filepath, 'w', encoding='utf_8') as f:
        for bigram_sentence in bigram_sentences:
            trigram_sentence = u' '.join(trigram_model[bigram_sentence])
            f.write(trigram_sentence + '\n')


Interpreting naive datetime as local 2017-05-04 18:44:13.250478. Please add timezone info to timestamps.


For a faster implementation, use the gensim.models.phrases.Phraser class



CPU times: user 4min 59s, sys: 1.74 s, total: 5min 1s
Wall time: 5min 25s


In [208]:
trigram_sentences = LineSentence(trigram_sentences_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.253278. Please add timezone info to timestamps.



In [209]:
trigram_speeches_filepath = os.path.join(intermediate_directory, 'trigram_transformed_speeches_all.txt')


Interpreting naive datetime as local 2017-05-04 18:44:13.255827. Please add timezone info to timestamps.



In [210]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to execute data prep yourself.
if True:
    with codecs.open(trigram_speeches_filepath, 'w', encoding='utf_8') as f:  
        for parsed_speech in nlp.pipe(line_review(speeches_filepath),
                                      batch_size=10000, n_threads=4):
            # lemmatize the text, removing punctuation and whitespace
            unigram_speech = [token.lemma_ for token in parsed_speech
                              if not punct_space(token)]
            
            # apply the first-order and second-order phrase models
            bigram_speech = bigram_model[unigram_speech]
            trigram_speech = trigram_model[bigram_speech]
            
            # remove any remaining stopwords
            trigram_speech = [term for term in trigram_speech
                              if term not in spacy.en.language_data.STOP_WORDS]
            
            # write the transformed review as a line in the new file
            trigram_speech = u' '.join(trigram_speech)
            f.write(trigram_speech + '\n')


Interpreting naive datetime as local 2017-05-04 18:44:13.259139. Please add timezone info to timestamps.


For a faster implementation, use the gensim.models.phrases.Phraser class



CPU times: user 1h 15min 43s, sys: 52.9 s, total: 1h 16min 36s
Wall time: 44min


In [211]:
print(u'Original:')

for review in itertools.islice(line_review(speeches_filepath), 30, 33):
    print(review)

print(u'Transformed:')

with codecs.open(trigram_speeches_filepath, encoding='utf_8') as f:
    for review in itertools.islice(f, 30, 33):
        print(review)

Original:
Will the right hon. Gentleman give way?

I completely agree with the hon. Gentleman: it is absolutely essential that sport is seen to be clean. That is something for which we in this country have, until now, had a very good reputation, and I hope that we will still have a good reputation. We are talking to all the UK sports bodies, and we intend to draw up proposals, which I hope all of them will adopt. Beyond that, we are taking a lead internationally. The Prime Minister is holding an anti-corruption summit next month, and this is one of the issues that will be discussed.

Regeneration was never an issue in the years that I represented Basildon because of the development corporation and the new town commission, which were entirely responsible for the vibrant economy that we all experienced then. What a contrast life is in Southend, where assistance is urgently needed with the regeneration of the wonderful seaside town, part of which I represent, together with my hon. Friend 


Interpreting naive datetime as local 2017-05-04 18:44:13.261872. Please add timezone info to timestamps.



### LDA Topic Modelling

In [212]:
from gensim.corpora import Dictionary, MmCorpus
from gensim.models.ldamulticore import LdaMulticore

import pyLDAvis
import pyLDAvis.gensim
import warnings


Interpreting naive datetime as local 2017-05-04 18:44:13.265665. Please add timezone info to timestamps.



In [213]:
trigram_dictionary_filepath = os.path.join(intermediate_directory, 'trigram_dict_all.dict')


Interpreting naive datetime as local 2017-05-04 18:44:13.268752. Please add timezone info to timestamps.



In [214]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to learn the dictionary yourself.
if True:
    trigram_speeches = LineSentence(trigram_speeches_filepath)

    # learn the dictionary by iterating over all of the speeches
    trigram_dictionary = Dictionary(trigram_speeches)
    
    # filter tokens that are very rare or too common from
    # the dictionary (filter_extremes) and reassign integer ids (compactify)
    trigram_dictionary.filter_extremes(no_below=10, no_above=0.4)
    trigram_dictionary.compactify()

    trigram_dictionary.save(trigram_dictionary_filepath)
    
# load the finished dictionary from disk
trigram_dictionary = Dictionary.load(trigram_dictionary_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.271932. Please add timezone info to timestamps.



CPU times: user 52.5 s, sys: 168 ms, total: 52.7 s
Wall time: 58.3 s


In [215]:
trigram_bow_filepath = os.path.join(intermediate_directory, 'trigram_bow_corpus_all.mm')


Interpreting naive datetime as local 2017-05-04 18:44:13.294442. Please add timezone info to timestamps.



In [216]:
def trigram_bow_generator(filepath):
    """
    generator function to read speeches from a file
    and yield a bag-of-words representation
    """
    
    for speech in LineSentence(filepath):
        yield trigram_dictionary.doc2bow(speech)


Interpreting naive datetime as local 2017-05-04 18:44:13.303736. Please add timezone info to timestamps.



In [217]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to build the bag-of-words corpus yourself.
if True:
    # generate bag-of-words representations for
    # all speches and save them as a matrix
    MmCorpus.serialize(trigram_bow_filepath,
                       trigram_bow_generator(trigram_speeches_filepath))
    
# load the finished bag-of-words corpus from disk
trigram_bow_corpus = MmCorpus(trigram_bow_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.307576. Please add timezone info to timestamps.



CPU times: user 1min 2s, sys: 536 ms, total: 1min 3s
Wall time: 1min 3s


In [218]:
lda_model_filepath = os.path.join(intermediate_directory, 'lda_model_all')


Interpreting naive datetime as local 2017-05-04 18:44:13.311190. Please add timezone info to timestamps.



In [219]:
%%time

# this is a bit time consuming - make the if statement True
# if you want to train the LDA model yourself.
if True:

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        
        # workers => sets the parallelism, and should be
        # set to your number of physical cores minus one
        lda = LdaMulticore(trigram_bow_corpus,
                           num_topics=50,
                           id2word=trigram_dictionary,
                           workers=4)
    
    lda.save(lda_model_filepath)
    
# load the finished LDA model from disk
lda = LdaMulticore.load(lda_model_filepath)


Interpreting naive datetime as local 2017-05-04 18:44:13.315599. Please add timezone info to timestamps.



CPU times: user 10min 20s, sys: 1min 3s, total: 11min 24s
Wall time: 16min 18s


In [221]:
def explore_topic(topic_number, topn=25):
    """
    accept a user-supplied topic number and
    print out a formatted list of the top terms
    """
        
    print('{:20} {}'.format('term', 'frequency'))

    for term, frequency in lda.show_topic(topic_number, topn=25):
        print('{:20} {:.3f}'.format(term, round(frequency, 3)))


Interpreting naive datetime as local 2017-05-04 18:44:13.328071. Please add timezone info to timestamps.



In [222]:
LDAvis_prepared = pyLDAvis.gensim.prepare(lda, trigram_bow_corpus,
                                              trigram_dictionary)


Interpreting naive datetime as local 2017-05-04 18:44:13.332279. Please add timezone info to timestamps.



In [223]:
pyLDAvis.display(LDAvis_prepared)


Interpreting naive datetime as local 2017-05-04 18:44:13.370209. Please add timezone info to timestamps.



In [264]:
def lda_description(speech_text, topic_no=-1, min_topic_freq=0.05):
    """
    accept the original text of a review and (1) parse it with spaCy,
    (2) apply text pre-proccessing steps, (3) create a bag-of-words
    representation, (4) create an LDA representation, and
    (5) print a sorted list of the top topics in the LDA representation
    """
    
    # parse the review text with spaCy
    parsed_speech = nlp(speech_text)
    
    # lemmatize the text and remove punctuation and whitespace
    unigram_speech = [token.lemma_ for token in parsed_speech
                      if not punct_space(token)]
    
    # apply the first-order and secord-order phrase models
    bigram_speech = bigram_model[unigram_speech]
    trigram_speech = trigram_model[bigram_speech]
    
    # remove any remaining stopwords
    trigram_speech = [term for term in trigram_speech
                      if not term in spacy.en.language_data.STOP_WORDS]
    
    # create a bag-of-words representation
    speech_bow = trigram_dictionary.doc2bow(trigram_speech)
    
    # create an LDA representation
    speech_lda = lda[speech_bow]
    
    # sort with the most highly related topics first
    speech_lda = sorted(speech_lda, key=lambda x: -x[1])
    
    if topic_no != -1:
        return speech_lda
    else:
        for topic_number, freq in speech_lda:
            if freq < min_topic_freq:
                break

            # print the most highly related topic names and frequencies
            print('{:25} {}'.format(topic_number,
                                    round(freq, 3)))


Interpreting naive datetime as local 2017-05-05 00:36:37.604253. Please add timezone info to timestamps.



In [265]:
lda_description(all_mp_speeches_sample.iloc[4]["body"], 1)


Interpreting naive datetime as local 2017-05-05 00:36:39.671648. Please add timezone info to timestamps.


For a faster implementation, use the gensim.models.phrases.Phraser class



[(13, 0.28859557203382086),
 (31, 0.25768020934678981),
 (30, 0.20350623017383998),
 (41, 0.18888465511221689)]

In [245]:
all_mp_speeches_sample.groupby(["section_id", "mp_name"])


Interpreting naive datetime as local 2017-05-04 22:35:26.719278. Please add timezone info to timestamps.



<pandas.core.groupby.DataFrameGroupBy object at 0x7f67075c0128>

In [256]:
all_mp_speeches_sample[all_mp_speeches_sample.debate_title.str.contains("Article 50")]


Interpreting naive datetime as local 2017-05-05 00:09:15.546564. Please add timezone info to timestamps.



Unnamed: 0,body,date,debate_title,mp_constituency,mp_id,mp_name,mp_party,section_id,speech_id,speech_url,subsection_id,time,time_,time_hour
257,What it requires is leaving the European Union...,2017-01-24,Article 50,Haltemprice and Howden,10162,David Davis,Conservative,24068085,2017-01-24b.168.3,/debates/?id=2017-01-24b.161.0&amp;s=speaker%3...,24068085,12:39:00,1900-01-01 12:39:00,12
240,I am quite happy with the Government consultin...,2016-07-21,Attorney General: Article 50 of the Treaty on ...,Bury North,24877,David Nuttall,Conservative,24144136,2016-07-21c.949.1,/debates/?id=2016-07-21c.947.10&amp;s=speaker%...,24144138,00:00:00,1900-01-01 00:00:00,00
9,As one who campaigned to remain in the Europea...,2017-01-24,Article 50,North East Bedfordshire,10770,Alistair Burt,Conservative,24068085,2017-01-24b.172.0,/debates/?id=2017-01-24b.161.0&amp;s=speaker%3...,24068085,12:39:00,1900-01-01 12:39:00,12
145,I do not wish to be unkind to the right hon. G...,2017-03-29,Article 50: Points of Order,Buckingham,10040,John Bercow,Speaker,24169788,2017-03-29c.300.2,/debates/?id=2017-03-29c.300.0&amp;s=speaker%3...,24170054,15:21:00,1900-01-01 15:21:00,15
30,"I am tempted to point out that, as I said earl...",2017-02-02,Attorney General: Legal Costs: Article 50,Kenilworth and Southam,11791,Jeremy Wright,Conservative,24084053,2017-02-02b.1161.0,/debates/?id=2017-02-02b.1158.7&amp;s=speaker%...,24084085,00:00:00,1900-01-01 00:00:00,00
311,"The Chairman of the Select Committee, who is n...",2017-01-24,Article 50,Haltemprice and Howden,10162,David Davis,Conservative,24068085,2017-01-24b.185.2,/debates/?id=2017-01-24b.161.0&amp;s=speaker%3...,24068085,12:39:00,1900-01-01 12:39:00,12
258,"The hon. Lady, as ever, goes right to the hear...",2017-01-24,Article 50,Haltemprice and Howden,10162,David Davis,Conservative,24068085,2017-01-24b.168.5,/debates/?id=2017-01-24b.161.0&amp;s=speaker%3...,24068085,12:39:00,1900-01-01 12:39:00,12
3,I warmly welcome the tone of the Prime Ministe...,2017-03-29,Article 50,Eddisbury,25363,Antoinette Sandbach,Conservative,24169788,2017-03-29c.279.0,/debates/?id=2017-03-29c.250.5&amp;s=speaker%3...,24169788,12:50:00,1900-01-01 12:50:00,12
586,One—but not the only—reason why we are taking ...,2016-11-07,Article 50,Haltemprice and Howden,10162,David Davis,Conservative,23954680,2016-11-07a.1281.6,/debates/?id=2016-11-07a.1254.6&amp;s=speaker%...,23954680,15:44:00,1900-01-01 15:44:00,15
317,I have been here for 30 years. If the hon. Gen...,2017-01-24,Article 50,Haltemprice and Howden,10162,David Davis,Conservative,24068085,2017-01-24b.187.8,/debates/?id=2017-01-24b.161.0&amp;s=speaker%3...,24068085,12:39:00,1900-01-01 12:39:00,12


In [269]:
all_mp_speeches_sample[all_mp_speeches_sample.debate_title.str.contains("Article 50")].apply(lambda x: print(lda_description(x.body, 1)), 1)


Interpreting naive datetime as local 2017-05-05 00:39:24.051374. Please add timezone info to timestamps.


For a faster implementation, use the gensim.models.phrases.Phraser class



[(13, 0.755)]
[(10, 0.60006690516346117), (40, 0.19023074929970185), (12, 0.0981708304055635), (13, 0.076146899746658572)]
[(28, 0.29639312898537623), (49, 0.17177989792100437), (23, 0.15897490660287361), (13, 0.14246895004834731), (48, 0.11408631623546206), (39, 0.041740357959914549), (35, 0.03248564557617882), (0, 0.026798069398114757)]
[(40, 0.48370276549072383), (12, 0.31883536558152625), (6, 0.15566521988611198), (36, 0.024759612004600702)]
[(46, 0.43449697571851759), (40, 0.20121543844507056), (6, 0.18839961937606817), (35, 0.090136958050528113), (42, 0.064322436981244444)]
[(13, 0.28889757791340842), (40, 0.28267906641191709), (3, 0.23929407227958974), (28, 0.088357839569015897), (6, 0.067897861591553199), (5, 0.022271172595961057)]
[(34, 0.55364450240798424), (13, 0.18111926285831054), (40, 0.16588971528210253), (18, 0.05934651945160322)]
[(23, 0.28074746569843217), (15, 0.25114094494463801), (32, 0.22554814244386245), (46, 0.20848937283899369)]
[(40, 0.61472776172636245), (46,

257     None
240     None
9       None
145     None
30      None
311     None
258     None
3       None
586     None
317     None
185     None
281     None
19      None
158     None
15      None
374     None
587     None
125     None
10      None
229     None
528     None
60      None
58      None
573     None
63      None
1227    None
191     None
11      None
118     None
164     None
        ... 
113     None
57      None
12      None
323     None
45      None
322     None
12      None
334     None
32      None
62      None
55      None
3       None
31      None
69      None
585     None
21      None
32      None
174     None
173     None
29      None
292     None
2       None
43      None
55      None
193     None
157     None
151     None
120     None
273     None
128     None
dtype: object