In [1]:
from transformers import AutoTokenizer, pipeline

tokenizer_gen = AutoTokenizer.from_pretrained("distilgpt2", padding_side="left")
generator = pipeline(
    "text-generation",
    "./results_causal/checkpoint-39000",
    tokenizer=tokenizer_gen,
    pad_token_id=50256,
)

tokenizer_ref = AutoTokenizer.from_pretrained("distilroberta-base", padding_side="left")
reflector = pipeline(
    "feature-extraction",
    "./results_masked/checkpoint-39000",
    tokenizer=tokenizer_ref,
    pad_token_id=50256,
)

Some weights of RobertaModel were not initialized from the model checkpoint at ./results_masked/checkpoint-39000 and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
samples = generator(
    text_inputs="Bayesian", num_return_sequences=1000, max_length=35, truncation=True
)

In [3]:
import numpy as np
outputs = reflector([s["generated_text"] for s in samples])
outputs = np.array([output[-1][-1] for output in outputs])

In [4]:
from sklearn.metrics.pairwise import cosine_distances
V = np.load("vectors_samples.npy")
D = cosine_distances(outputs, V)

In [64]:
idx_novelty = D.min(axis=1) >= 0.001
idx_novelty.sum()

988

In [67]:
idx_feasibility = (D <= 0.003).sum(axis=1) >= 50
idx_feasibility.sum()

881

In [68]:
idx_objective = idx_novelty & idx_feasibility
idx_objective.sum()

869

### Generated Samples that passed

In [69]:
import pandas as pd
pd.set_option("max_colwidth", 1000)

pd.DataFrame(samples)[idx_objective].head(10)

Unnamed: 0,generated_text
0,"Bayesian Neural Networks for Model-based Nonparametric Estimation and Reconstruction in the Real World Using a Single Tree Search Methodology, a Benchmark, and the Knowledge Graph"
2,Bayesian Contrastive Learning For Deep Learning Evaluation of Deep Reinforcement Learning System with Limited Data Bases Using Differentiable Models with Applications to Heterogeneous Hardware Systems Evaluation Workflow
3,Bayesian Optimization of Multi-Graph Networks with Bayesian Inference with Continuous and Continuous Data Mining: A Comparative Study (Exam) and Applications in Bayesian Rein
4,Bayesian Contrastive Learning with Multiple Convex Optimization for Efficient Neural Networks Classification Problem: An Overview and Analysis of the Case of Dense Computing in Real-World
5,Bayesian Neural Networks for Learning to Generate Data for Multi-Sensor Data and Dataset Knowledge Networks for Smartphones via Deep Neural Networks for Micro-Sensor Imaging and Diagn
6,Bayesian deep learning and its evaluation as an adversarial attack on Riemannian random forest theory and its applications to online image classification and machine learning applications in clinical image classification
7,Bayesian Inference with a Differential Privacy for Learning Structured Relationships of Class Tasks using Probabilistic Bayes' Modeling System in the Cloud: a Data
9,Bayesian Optimization and the Effect of Bayes Approxima in Dynamic Data Augmentation and Data Estimation via Randomized Weight-Bias Analysis and a Bias-
10,Bayesian Algebraically-Aware Learning with Discriminative Convexity and Its Utility Function for Nonintrusive Features of Gaussian Processes: A
12,Bayesian Inference for Cross-Modal Meta-Learning of Multi-task Learning Sets with Limited Information Bottleneck Methods with Limited Information Bottleneck Costs: A Data Mining Approach


### Generated Samples that didn't pass

In [70]:
pd.DataFrame(samples)[~idx_objective].sample(10)

Unnamed: 0,generated_text
589,"Bayesian Inference in Kernel Reinforcement Learning Networks with Applications to Autonomous Robot Driving in the Edge-Network Environment? The Case of Software-Diversity, Application to Intelligent"
63,Bayesian Linear Decomposition and its application to machine learning applications in medicine discovery and development of new products and services (LSTF) and medicine discovery in the US.
964,Bayesian Graph-based Generative Models for Multi-view Object Classification in the Wild: An Overview from the Field Survey and Review at BERT 2018 Competition (SXAS
582,Bayesian Optimization with Applications to Reinforcement Learning in Bayesian Inference (SINF) Games with Application to a Systematic Literature Review (BIRS) Dat
104,Bayesian Inference Attacks: A Solution to a Critical Error in Learning Multi-View Autonomous Vehicles in Urban Traffic Conditions via Bayesian Inference Models and Online Learning Techniques.
807,Bayesian Optimization of Stochastic Neural Networks Using Local Variance-Aware Policy Gradient Estimation and Adaptive Control Under Deep Reinforcement Learning Mechanism of Linear
912,Bayesian Optimization for Bayesian Optimization with Application to Deep Quantum Networks on Graphs' Power Flow: A Review of the Applications to the Physics of Computational Architecture and
559,Bayesian Inference with Multiple Instance Updates for Improved Online Learning in the COVID-19 Pandemic Using Deep Neural Networks (DNNs) and Machine Learning approaches (
16,Bayesian Multi-agent Deep Learning for Drug Discovery: A Survey on Drug Discovery for Drug Discovery and Clinical Study on Drug Application in Drug Discovery and Clinical Trial Results With Alzheimer's
129,Bayesian Reinforcement Learning with Sparsity-Aware Feedback Mechanism (AVER): the Perspective of Reinforcement Learning and Control over the Internet of Things and Machine Learning Appro
