In [None]:
# default_exp analyzers

In [None]:
#hide
#all_notest
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Analyzers

> Text analyzers to help create text-based covariates, treatments, or outcomes for causal analyses.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export

import math
import warnings
import numpy as np
import pandas as pd

def list2chunks(a, n):
    k, m = divmod(len(a), n)
    return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n))

In [None]:
#export

class ZeroShotClassifier():
    """
    Interface to Zero Shot Topic Classifier
    """

    def __init__(self, model_name='facebook/bart-large-mnli', device=None):
        """
        ZeroShotClassifier constructor

        **Args:**
          - model_name(str): name of a BART NLI model
          - device(str): device to use (e.g., 'cuda', 'cpu')
        """
        if 'mnli' not in model_name and 'xnli' not in model_name:
            raise ValueError('ZeroShotClasifier requires an MNLI or XNLI model')
        try:
            import torch
        except ImportError:
            raise Exception('ZeroShotClassifier requires PyTorch to be installed.')
        self.torch_device = device
        if self.torch_device is None: self.torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
        from transformers import AutoModelForSequenceClassification, AutoTokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name).to(self.torch_device)


    def predict(self, docs, labels=[], include_labels=False, multilabel=True,
               max_length=512, batch_size=8, nli_template='This text is about {}.',  topic_strings=[]):
        """
        This method performs zero-shot text classification using Natural Language Inference (NLI).


        **Parameters**:
          - docs(list|str): text of document or list of texts
          - labels(list): a list of strings representing topics of your choice
                          Example:
                           labels=['political science', 'sports', 'science']
          - include_labels(bool): If True, will return topic labels along with topic probabilities
          - multilabel(bool): If True, labels are considered independent and multiple labels can predicted true for document and be close to 1.
                            If False, scores are normalized such that probabilities sum to 1.
          - max_length(int): truncate long documents to this many tokens
          - batch_size(int): batch_size to use. default:8
                           Increase this value to speed up predictions - especially
                           if len(topic_strings) is large.
          - nli_template(str): labels are inserted into this template for use as hypotheses in natural language inference
          - topic_strings(list): alias for labels parameter for backwards compatibility
          
        **Returns:**
        
        
          inferred probabilities or list of inferred probabilities if doc is list
        """

        # error checks
        is_str_input = False
        if not isinstance(docs, (list, np.ndarray)): 
            docs = [docs]
            is_str_input = True
        if not isinstance(docs[0], str): raise ValueError('docs must be string or a list of strings representing document(s)')
        if len(labels) > 0 and len(topic_strings) > 0: raise ValueError('labels and topic_strings are mutually exclusive')
        if not labels and not topic_strings: raise ValueError('labels must be a list of strings')
        if topic_strings: 
            labels = topic_strings


        # convert to sequences
        sequence_pairs = []
        for premise in docs:
            sequence_pairs.extend([[premise, nli_template.format(label)] for label in labels])
        if batch_size  > len(sequence_pairs): batch_size = len(sequence_pairs)
        if len(sequence_pairs) >= 100 and batch_size==8:
            warnings.warn('TIP: Try increasing batch_size to speedup ZeroShotClassifier predictions')
        num_chunks = math.ceil(len(sequence_pairs)/batch_size)
        sequence_chunks = list2chunks(sequence_pairs, n=num_chunks)

        # inference
        import torch
        with torch.no_grad():
            outputs = []
            for sequences in sequence_chunks:
                batch = self.tokenizer.batch_encode_plus(sequences, return_tensors='pt', max_length=max_length, truncation='only_first', padding=True).to(self.torch_device)
                logits = self.model(batch['input_ids'], attention_mask=batch['attention_mask'], return_dict=False)[0]
                outputs.extend(logits.cpu().detach().numpy())
        outputs = np.array(outputs)
        outputs = outputs.reshape((len(docs), len(labels), -1))

        # process outputs
        if multilabel:
            # softmax over the entailment vs. contradiction dim for each label independently
            entail_contr_logits = outputs[..., [0, -1]]
            scores = np.exp(entail_contr_logits) / np.exp(entail_contr_logits).sum(-1, keepdims=True)
            scores = scores[..., 1]
        else:
            # softmax the "entailment" logits over all candidate labels
            entail_logits = outputs[..., -1]
            scores = np.exp(entail_logits) / np.exp(entail_logits).sum(-1, keepdims=True)
        scores = scores.tolist()
        if include_labels:
            scores = [list(zip(labels, s)) for s in scores]
        if is_str_input: scores = scores[0]
        return scores

In [None]:
show_doc(ZeroShotClassifier.predict)

<h4 id="ZeroShotClassifier.predict" class="doc_header"><code>ZeroShotClassifier.predict</code><a href="__main__.py#L29" class="source_link" style="float:right">[source]</a></h4>

> <code>ZeroShotClassifier.predict</code>(**`docs`**, **`labels`**=*`[]`*, **`include_labels`**=*`False`*, **`multilabel`**=*`True`*, **`max_length`**=*`512`*, **`batch_size`**=*`8`*, **`nli_template`**=*`'This text is about {}.'`*, **`topic_strings`**=*`[]`*)

This method performs zero-shot text classification using Natural Language Inference (NLI).


**Parameters**:
  - docs(list|str): text of document or list of texts
  - labels(list): a list of strings representing topics of your choice
                  Example:
                   labels=['political science', 'sports', 'science']
  - include_labels(bool): If True, will return topic labels along with topic probabilities
  - multilabel(bool): If True, labels are considered independent and multiple labels can predicted true for document and be close to 1.
                    If False, scores are normalized such that probabilities sum to 1.
  - max_length(int): truncate long documents to this many tokens
  - batch_size(int): batch_size to use. default:8
                   Increase this value to speed up predictions - especially
                   if len(topic_strings) is large.
  - nli_template(str): labels are inserted into this template for use as hypotheses in natural language inference
  - topic_strings(list): alias for labels parameter for backwards compatibility
  
**Returns:**


  inferred probabilities or list of inferred probabilities if doc is list

In [None]:
zsl = ZeroShotClassifier()
labels=['politics', 'elections', 'sports', 'films', 'television']
doc = 'I am extremely dissatisfied with the President and will definitely vote in 2020.'
preds = zsl.predict(doc, labels=labels, include_labels=True)

In [None]:
preds

[('politics', 0.979189932346344),
 ('elections', 0.9874580502510071),
 ('sports', 0.0005765454261563718),
 ('films', 0.002292441902682185),
 ('television', 0.001054605352692306)]

In [None]:
d = dict(preds)
assert d['politics'] > 0.9
assert d['elections'] > 0.9
assert d['sports'] < 0.1
assert d['films'] < 0.1
assert d['television'] < 0.1

In [None]:
#export

#from sentence_transformers import SentenceTransformer, util

class TextEncoder():
    """
    Tiny wrapper to sentence-transformers
    """

    def __init__(self, model_name='stsb-roberta-large', device=None):
        """
        TextEmbedder constructor.

        **Args:**
          - model_name(str): name of fine-tuned model for embeddings
          - device(str): device to use (e.g., 'cuda', 'cpu')
        """
        try:
            from sentence_transformers import SentenceTransformer, util
        except ImportError:
            raise Exception('TextEncoder requires: pip install sentence-transformers')
        try:
            import torch
        except ImportError:
            raise Exception('PyTorch must be installed.')

        self.torch_device = device
        if self.torch_device is None: self.torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = SentenceTransformer(model_name)
        
    def encode(self, texts, batch_size=32, normalize=False, show_progress_bar=False):
        """Generate embedding for supplied text"""
        if isinstance(texts, str): texts = [texts]
        return self.model.encode(texts, batch_size=batch_size,
                                 show_progress_bar=show_progress_bar, normalize_embeddings=normalize,
                                 convert_to_tensor=False, device=self.torch_device )

In [None]:
te = TextEncoder()

Downloading:   0%|          | 0.00/748 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.90k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/674 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [None]:
e = te.encode('The moon is bright.')

In [None]:
assert e.shape[0] == 1
assert e.shape[1] == 1024

In [None]:
#export

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
import math
import numpy as np
DEFAULT_TOKEN_PATTERN = (r"\b[a-zA-Z][a-zA-Z0-9]*(?:[_/&-][a-zA-Z0-9]+)+\b|"
                         r"\b\d*[a-zA-Z][a-zA-Z0-9][a-zA-Z0-9]+\b")

class TopicModel():


    def __init__(self,texts=None, n_topics=None, n_features=10000, 
                 min_df=5, max_df=0.5,  stop_words='english',
                 model_type='lda',
                 lda_max_iter=5, lda_mode='online',
                 token_pattern=None, verbose=1,
                 hyperparam_kwargs=None
    ):
        """
        Fits a topic model to documents in <texts>.
        Example:
            tm = ktrain.text.get_topic_model(docs, n_topics=20, 
                                            n_features=1000, min_df=2, max_df=0.95)
        Args:
            texts (list of str): list of texts
            n_topics (int): number of topics.
                            If None, n_topics = min{400, sqrt[# documents/2]})
            n_features (int):  maximum words to consider
            max_df (float): words in more than max_df proportion of docs discarded
            stop_words (str or list): either 'english' for built-in stop words or
                                      a list of stop words to ignore
            model_type(str): type of topic model to fit. One of {'lda', 'nmf'}.  Default:'lda'
            lda_max_iter (int): maximum iterations for 'lda'.  5 is default if using lda_mode='online'.
                                If lda_mode='batch', this should be increased (e.g., 1500).
                                Ignored if model_type != 'lda'
            lda_mode (str):  one of {'online', 'batch'}. Ignored if model_type !='lda'
            token_pattern(str): regex pattern to use to tokenize documents. 
            verbose(bool): verbosity
        """
        self.verbose=verbose

        # estimate n_topics
        if n_topics is None:
            if texts is None:
                raise ValueError('If n_topics is None, texts must be supplied')
            estimated = max(1, int(math.floor(math.sqrt(len(texts) / 2))))
            n_topics = min(400, estimated)
            print('n_topics automatically set to %s' % (n_topics))

        # train model
        if texts is not None:
            (model, vectorizer) = self.train(texts, model_type=model_type,
                                             n_topics=n_topics, n_features=n_features,
                                             min_df = min_df, max_df = max_df, 
                                             stop_words=stop_words,
                                             lda_max_iter=lda_max_iter, lda_mode=lda_mode,
                                             token_pattern=token_pattern,
                                             hyperparam_kwargs=hyperparam_kwargs)
        else:
            vectorizer = None
            model = None



        # save model and vectorizer and hyperparameter settings
        self.vectorizer = vectorizer
        self.model = model
        self.n_topics = n_topics
        self.n_features = n_features
        if verbose: print('done.')

        # these variables are set by self.build():
        self.topic_dict = None
        self.doc_topics = None
        self.bool_array = None

        self.scorer = None       # set by self.train_scorer()
        self.recommender = None  # set by self.train_recommender()
        return


    def train(self,texts, model_type='lda', n_topics=None, n_features=10000,
              min_df=5, max_df=0.5,  stop_words='english',
              lda_max_iter=5, lda_mode='online',
              token_pattern=None, hyperparam_kwargs=None):
        """
        Fits a topic model to documents in <texts>.
        """
        if hyperparam_kwargs is None:
            hyperparam_kwargs = {}
        alpha = hyperparam_kwargs.get('alpha', 5.0 / n_topics)
        beta = hyperparam_kwargs.get('beta', 0.01)
        nmf_alpha = hyperparam_kwargs.get('nmf_alpha', 0)
        l1_ratio = hyperparam_kwargs.get('l1_ratio', 0)
        ngram_range = hyperparam_kwargs.get('ngram_range', (1,1))

         # preprocess texts
        if self.verbose: print('preprocessing texts...')
        if token_pattern is None: token_pattern = DEFAULT_TOKEN_PATTERN
        #if token_pattern is None: token_pattern = r'(?u)\b\w\w+\b'
        vectorizer = CountVectorizer(max_df=max_df, min_df=min_df,
                                 max_features=n_features, stop_words=stop_words,
                                 token_pattern=token_pattern, ngram_range=ngram_range)
        

        x_train = vectorizer.fit_transform(texts)

        # fit model

        if self.verbose: print('fitting model...')
        if model_type == 'lda':
            model = LatentDirichletAllocation(n_components=n_topics, max_iter=lda_max_iter,
                                              learning_method=lda_mode, learning_offset=50.,
                                              doc_topic_prior=alpha,
                                              topic_word_prior=beta,
                                              verbose=self.verbose, random_state=0)
        elif model_type == 'nmf':
            model = NMF(
                n_components=n_topics,
                max_iter=lda_max_iter,
                verbose=self.verbose,
                alpha=nmf_alpha,
                l1_ratio=l1_ratio,
                random_state=0)
        else:
            raise ValueError("unknown model type:", str(model_type))
        model.fit(x_train)

        # save model and vectorizer and hyperparameter settings
        return (model, vectorizer)


    @property
    def topics(self):
        """
        convenience method/property
        """
        return self.get_topics()


    def get_document_topic_distribution(self):
        """
        Gets the document-topic distribution.
        """
        self._check_build()
        return self.doc_topics


    def get_sorted_docs(self, topic_id):
        """
        Returns all docs sorted by relevance to <topic_id>.
        """
        docs = self.get_docs()
        d = {}
        for doc in docs: d[doc['doc_id']] = doc
        m = self.get_document_topic_distribution()
        doc_ids = (-m[:,topic_id]).argsort()
        return [d[doc_id] for doc_id in doc_ids]



    def get_word_weights(self, topic_id, n_words=100):
        """
        Returns a list tuples of the form: (word, weight) for given topic_id.
        """
        self._check_model()
        if topic_id+1 > len(self.model.components_): 
            raise ValueError('topic_id must be less than %s' % (len(self.model.components_)))
        feature_names = self.vectorizer.get_feature_names()
        word_probs = self.model.components_[topic_id]
        word_ids = [i for i in word_probs.argsort()[:-n_words - 1:-1]]
        words = [feature_names[i] for i in word_ids]
        probs = [word_probs[i] for i in word_ids]
        return list( zip(words, probs) )


    def get_topics(self, n_words=10, as_string=True):
        """
        Returns a list of discovered topics
        """
        self._check_model()
        feature_names = self.vectorizer.get_feature_names()
        topic_summaries = []
        for topic_idx, topic in enumerate(self.model.components_):
            summary = [feature_names[i] for i in topic.argsort()[:-n_words - 1:-1]]
            if as_string: summary = " ".join(summary)
            topic_summaries.append(summary)
        return topic_summaries


    def print_topics(self, n_words=10, show_counts=False):
        """
        print topics
        """
        topics = self.get_topics(n_words=n_words, as_string=True)
        if show_counts:
            self._check_build()
            topic_counts = sorted([ (k, topics[k], len(v)) for k,v in self.topic_dict.items()], 
                                    key=lambda kv:kv[-1], reverse=True)
            for (idx, topic, count) in topic_counts:
                print("topic:%s | count:%s | %s" %(idx, count, topic))
        else:
            for i, t in enumerate(topics):
                print('topic %s | %s' % (i, t))
        return


    def build(self, texts):
        """
        Builds the document-topic distribution showing the topic probability distirbution
        """
        doc_topics = self.predict(texts)
        self.doc_topics = doc_topics

        self.topic_dict = self._rank_documents(texts, doc_topics=doc_topics)
        return
                           

    
    def get_docs(self, topic_ids=[], doc_ids=[], rank=False):
        """
        Returns document entries for supplied topic_ids.           
        """
        self._check_build()
        if not topic_ids:
            topic_ids = list(range(self.n_topics))
        result_texts = []
        for topic_id in topic_ids:
            if topic_id not in self.topic_dict: continue
            texts = [{'text':tup[0], 'doc_id':tup[1], 'topic_proba':tup[2], 'topic_id':topic_id} for tup in self.topic_dict[topic_id] 
                                                                                                     if not doc_ids or tup[1] in doc_ids]
            result_texts.extend(texts)
        if not rank:
            result_texts = sorted(result_texts, key=lambda x:x['doc_id'])
        return result_texts


    def get_doctopics(self,  topic_ids=[], doc_ids=[]):
        """
        Returns a topic probability distribution for documents
        """
        docs = self.get_docs(topic_ids=topic_ids, doc_ids=doc_ids)
        return np.array([self.doc_topics[idx] for idx in [x['doc_id'] for x in docs]])




    def predict(self, texts, threshold=None, harden=False):
        """
        predict topics for doucments
        """
        self._check_model()
        transformed_texts = self.vectorizer.transform(texts)
        X_topics = self.model.transform(transformed_texts)
        return X_topics

    def _rank_documents(self,
                       texts,
                       doc_topics=None):
        """
        Rank documents by topic score.
        """
        if doc_topics is not None:
            X_topics = doc_topics
        else:
            if self.verbose: print('transforming texts to topic space...')
            X_topics = self.predict(texts)
        topics = np.argmax(X_topics, axis=1)
        scores = np.amax(X_topics, axis=1)
        doc_ids = np.array([i for i, x in enumerate(texts)])
        result = list(zip(texts, doc_ids, topics, scores))
        if self.verbose: print('done.')
        result = sorted(result, key=lambda x: x[-1], reverse=True)
        result_dict = {}
        for r in result:
            text = r[0]
            doc_id = r[1]
            topic_id = r[2]
            score = r[3]
            lst = result_dict.get(topic_id, [])
            lst.append((text, doc_id, score))
            result_dict[topic_id] = lst
        return result_dict

    
    def _check_build(self):
        self._check_model()
        if self.topic_dict is None: 
            raise Exception('Must call build() method.')


    def _check_model(self):
        if self.model is None or self.vectorizer is None:
            raise Exception('Must call train()')


In [None]:
from sklearn.datasets import fetch_20newsgroups

# we only want to keep the body of the documents!
remove = ('headers', 'footers', 'quotes')

# fetch train and test data
newsgroups_train = fetch_20newsgroups(subset='train', remove=remove)
newsgroups_test = fetch_20newsgroups(subset='test', remove=remove)

# compile the texts
texts = newsgroups_train.data +  newsgroups_test.data

# let's also store the newsgroup category associated with each document
# we can display this information in visualizations
targets = [target for target in list(newsgroups_train.target) + list(newsgroups_test.target)]
categories = [newsgroups_train.target_names[target] for target in targets]

In [None]:
tm = TopicModel(texts, n_features=10000)

n_topics automatically set to 97
preprocessing texts...
fitting model...
iteration: 1 of max_iter: 5
iteration: 2 of max_iter: 5
iteration: 3 of max_iter: 5
iteration: 4 of max_iter: 5
iteration: 5 of max_iter: 5
done.


In [None]:
tm.print_topics()

topic 0 | tape adam tim case moved bag quote mass marked zionism
topic 1 | image jpeg images format programs tiff files jfif save lossless
topic 2 | alternative movie film static cycles films philips dynamic hou phi
topic 3 | hell humans poster frank reality kent gerard gant eternal bell
topic 4 | air phd chz kit cbc ups w-s rus w47 mot
topic 5 | dog math great figure poster couldn don trying rushdie fatwa
topic 6 | collaboration nazi fact end expression germany philly world certified moore
topic 7 | gif points scale postscript mirror plane rendering algorithm polygon rayshade
topic 8 | fonts font shell converted iii characters slight composite breaks compress
topic 9 | power station supply options option led light tank plastic wall
topic 10 | transmission rider bmw driver automatic shift gear japanese stick highway
topic 11 | tyre ezekiel ruler hernia appeared appointed supreme man land power
topic 12 | space nasa earth data launch surface solar moon mission planet
topic 13 | israel j

In [None]:
tm.build(texts)

done.


In [None]:
texts[1]

"A fair number of brave souls who upgraded their SI clock oscillator have\nshared their experiences for this poll. Please send a brief message detailing\nyour experiences with the procedure. Top speed attained, CPU rated speed,\nadd on cards and adapters, heat sinks, hour of usage per day, floppy disk\nfunctionality with 800 and 1.4 m floppies are especially requested.\n\nI will be summarizing in the next two days, so please add to the network\nknowledge base if you have done the clock upgrade and haven't answered this\npoll. Thanks."

In [None]:
tm.doc_topics[1]

array([0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.05935853, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.04939132, 0.00105197,
       0.00105197, 0.00105197, 0.04181867, 0.00105197, 0.00105197,
       0.00105197, 0.21681858, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.02146013, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105197,
       0.00105197, 0.00105197, 0.00105197, 0.00105197, 0.00105

In [None]:
tm.topics[ np.argmax(tm.doc_topics[1])]

'card memory windows board ram bus drivers driver cpu problem'

In [None]:
tm.predict(['Elon Musk leads Space Exploration Technologies (SpaceX), where he oversees '  +
            'the development and manufacturing of advanced rockets and spacecraft for missions ' +
            'to and beyond Earth orbit.'])

array([[0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.65009096, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.06185567, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00303214, 0.00303214,
        0.00303214, 0.00303214, 0.00303214, 0.00

In [None]:
tm.topics[ np.argmax(tm.predict(['Elon Musk leads Space Exploration Technologies (SpaceX), where he oversees '  +
            'the development and manufacturing of advanced rockets and spacecraft for missions ' +
            'to and beyond Earth orbit.']))]

'space nasa earth data launch surface solar moon mission planet'

In [None]:
#hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_causalinference.ipynb.
Converted 01_autocoder.ipynb.
Converted 02_analyzers.ipynb.
Converted 03_key_driver_analysis.ipynb.
Converted 04_preprocessing.ipynb.
Converted 05a_meta.base.ipynb.
Converted 05b_meta.tleaerner.ipynb.
Converted 05c_meta.slearner.ipynb.
Converted 05d_meta.xlearner.ipynb.
Converted 05e_meta.rlearner.ipynb.
Converted 05f_meta.utils.ipynb.
Converted 05g_meta.explainer.ipynb.
Converted 05h_meta.propensity.ipynb.
Converted 05i_meta.sensitivity.ipynb.
Converted 99_examples.ipynb.
Converted index.ipynb.
