In [1]:
import faiss
import pyterrier as pt
import ujson
import numpy as np

import itertools
import threading
import queue

from colbert.modeling.inference import ModelInference
from colbert.evaluation.loaders import load_colbert
from pyterrier_colbert import load_checkpoint
# monkeypatch to use our downloading version
import colbert.evaluation.loaders

colbert.evaluation.loaders.load_checkpoint = load_checkpoint
colbert.evaluation.loaders.load_model.__globals__['load_checkpoint'] = load_checkpoint
from colbert.utils.utils import print_message
import pickle
from colbert.indexing.index_manager import IndexManager
from warnings import warn
from transformers import AutoTokenizer, AutoModelForMaskedLM


In [2]:
pt.init()

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [3]:
from pyterrier_colbert.preprocessing import DatasetPreprocessor, TokenRemover, HFTokenizer, NLTKTokenizer

In [4]:
class Object():
    pass

In [5]:
checkpoint="http://www.dcs.gla.ac.uk/~craigm/colbert.dnn.zip"

In [6]:
args = Object()
args.similarity = 'cosine'
args.dim = 128
args.query_maxlen = 32
args.doc_maxlen = 180
args.checkpoint = checkpoint
args.mask_punctuation = False

In [7]:
dataset = pt.get_dataset('vaswani')

In [8]:
wordpiece = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
hf_tokenizer = HFTokenizer(tokenizer=wordpiece)
nltk_tokenizer = NLTKTokenizer(tokenizer_type='treebank')

In [9]:
en_remover = TokenRemover(tokenizer=nltk_tokenizer, stopwords_files=['stopwords/stopwords-en.txt'])
en2_remover = TokenRemover(tokenizer=nltk_tokenizer, stopwords_files=['stopwords/stopwords-en.txt'], stopword_max_length=2)
en4_remover = TokenRemover(tokenizer=nltk_tokenizer, stopwords_files=['stopwords/stopwords-en.txt'], stopword_max_length=4)
en_punc_remover = TokenRemover(tokenizer=nltk_tokenizer, stopwords_files=['stopwords/stopwords-en.txt', 'stopwords/stopwords-punctuations.txt'])
lim_remover = TokenRemover(tokenizer=nltk_tokenizer, stopwords_files=['stopwords/stopwords-limited.txt'])
lim_punc_remover = TokenRemover(tokenizer=nltk_tokenizer, stopwords_files=['stopwords/stopwords-limited.txt', 'stopwords/stopwords-punctuations.txt'])

In [10]:
en2_remover.stopwords = en2_remover.stopwords | set(['the'])

In [12]:
cleaners = [('en', en_remover),('en2', en2_remover),('en4', en4_remover) ,('few', lim_remover)]

In [13]:
datasets_cleaned = {name: DatasetPreprocessor(dataset=dataset, tokenizer=wordpiece, preprocessor=cleaner) for name, cleaner in cleaners}

In [14]:
lx = 0
for doc in datasets_cleaned['few']:
    lx = lx + len(doc['text'])
    print(doc)

{'text': 'compact memories have flexible capacities digital data storage system with capacity up bits and random and or sequential access described', 'docno': '1'}
{'text': 'electronic analogue computer for solving systems linear equations mathematical derivation operating principle and stability conditions for computer consisting amplifiers', 'docno': '2'}
{'text': 'electronic coordinate transformer circuit details given for construction electronic calculating unit which enables polar coordinates vector modulus and cosine or sine argument be derived from those rectangular system axes', 'docno': '3'}
{'text': 'british computer society report conference held cambridge june', 'docno': '4'}
{'text': 'millimicrosecond digital computer logic system fast pulse logic described which combines efficiency transformer coupled stages with digit delay tolerances approaching that dc coupled systems logical circuits for or and inverter and reclock shown together with driver which permits fan out fact

{'text': 'approximation errors diode function generators errors resulting from fitting piecewise linear function smooth curve discussed relation analogue computor applications simple integral based best fis criteria will give relation between number segments and error and also breakpoints between segments numerical results given for typical functions', 'docno': '400'}
{'text': 'theoretical study and method operation logarithmic integrator description computer circuit based diode pump action', 'docno': '401'}
{'text': 'construction digital computing system from basic transistor circuit computer uses identical basic circuits convert decimal numbers and from binary scale diagrams basic circuit and system given printed circuits used', 'docno': '402'}
{'text': 'light pen links computer operator using photodiode read dots produced tube enables operator control associated computer pointing light pen certain dots information can be written into computer', 'docno': '403'}
{'text': 'computers ai

{'text': 'stabilization magnetic field electromagnet system described for controlling both rapid and slow variations magnetic field means transistorized control stage and feedback winding slow variations detected magnetic resonance protons ferric nitrate solution using technique similar that described earlier', 'docno': '918'}
{'text': 'precision thermostat temperature sensitive resistance bridge controls crystal oven give short period temperature stability', 'docno': '919'}
{'text': 'stable thermostat for measurement noise and drift semiconductor circuit elements rectified output bridge circuit containing resistance thermometer used control heating stirred oil bath noise voltage transistors power supply circuit and zener reference diode have been measured very low and audio frequencies using apparatus described', 'docno': '920'}
{'text': 'experimental impedance relay using hall effect semiconductor description definite impedance relay for protection power transmission systems hall out

{'text': 'image parameter theory for mechanical quadripoles compressional or torsional oscillation equivalence parameters mechanical system transmitting compressional or torsional oscillations and parameters electrical transmission line established design mechanical filters described and design formulae with response characteristics given for basic filter sections', 'docno': '1474'}
{'text': 'special band pass half sections design crystal filters branch network type described', 'docno': '1475'}
{'text': 'decimal attenuator network combining properties conventional voltage or current divider with those constant resistance attenuator', 'docno': '1476'}
{'text': 'multiple isolated input metoork with common output design equations developed for general case n isolated inputs with common output measurements made two input and three input units given', 'docno': '1477'}
{'text': 'frequency three phase rc coupled oscillator part two inductive anode load resistance', 'docno': '1478'}
{'text': '

{'text': 'helical coordinate system and its applications electromagnetic theory system described enables problems involving helical symmetry be solved exactly', 'docno': '1891'}
{'text': 'edge condition diffraction problems diffraction em wave perfectly conducting solid considered', 'docno': '1892'}
{'text': 'theory wave scattering periodically uneven surfaces description six approximate mathematical methods for calculating scattering sound or em waves over sea or uneven ground references', 'docno': '1893'}
{'text': 'magnetic double refraction microwaves paramagnetics investigation rotation plane polarization wave circular waveguide containing paramagnetic salt and its dependence intensity static magnetic field perpendicular direction wave propagation', 'docno': '1894'}
{'text': 'spin wave analysis ferromagnetic resonance polycrystalline ferrites dipolar interaction taken into account means spin wave formalism crystalline anisotropy and polycrystalline nature material cause homogeneous

{'text': 'reflexion radio waves from stratified ionosphere modified weak irregularities consideration given scattered wave which accompanies reflexion from stratified ionosphere which there weak irregularities considering these irregularities be confined thin layer near given height possibility examined that they might produce considerably enhanced scattering if they were situated near reflexion level calculated basis geometrical optics it found that they would not have very much greater effect this level it also shown that if electron collision frequency order likely be encountered real ionosphere there would be little enhancement resonance effects kind suggested herlofson', 'docno': '2411'}
{'text': 'irregularities refraction radio waves and large inhomogeneities ionosphere description method and results measurements vertical refraction radio waves ionosphere irregularities refraction produced inhomogeneities dimensions about km f region diurnal variation inhomogeneities analysed and

{'text': 'pulse generator uses junction transistors transistor circuit described giving pulses duration repetition rates pulses with internal delays', 'docno': '2962'}
{'text': 'distortion transistor amplifiers analysis distortion low level grounded emitter stages due input circuit nonlinearity and variations collector base current gain', 'docno': '2963'}
{'text': 'low frequency transistor oscillators theoretical treatment junction transistor oscillator for frequencies few frequency and amplitude stability conversion efficiency and conditions governing starting considered practical circuit design derived and experimental results given', 'docno': '2964'}
{'text': 'build up large signals with elimination reflections magneto strictive storage lines greater reliability and increased storage capacity obtained substituting twelve energizing coils for usual one with this arrangement steel wire delay line can be used see also', 'docno': '2965'}
{'text': 'transistor gating matrix for simulated 

{'text': 'theoretical views drift measurements conversion amplitude pattern drift ground sampled three closely spaced receivers true drift ionosphere requires ionosphere requires determination auto and cross correlation functions problems relating ground pattern drift ionospheric drift stated', 'docno': '3568'}
{'text': 'drift ionized layer presence geomagnetic field efficiency air winds causing ionization drift decreases with atmospheric pressure thus f region electric fields may be main cause drift region electric fields may contribute equally with air winds d region air winds would be main cause effect winds and fields causing drifting cylindrical irregularities each region given', 'docno': '3569'}
{'text': 'height variation horizontal drift velocities e region drift was measured two adjacent frequencies corresponding heights reflection differing about km diurnal change ns and ew velocity components differs phase two heights', 'docno': '3570'}
{'text': 'large scale movements layers 

{'text': 'radio studies during international geophysical year studies discussed under five headings vertical soundings ionospheric drift measurements backscatter radio noise and atmospheric studies and rockets and satellites history program and organization igy briefly outlined', 'docno': '4154'}
{'text': 'theoretical analysis doppler radio signals from earth satellites analysis briefly described and its application calculation orbits two satellites sputnik and explorer from isolated observations made one station given', 'docno': '4155'}
{'text': 'observations us satellites explorers and cw reflection passage satellites explorer and may be detected increased signal strength result ionization from satellite paths see also', 'docno': '4156'}
{'text': 'continuous phase difference measurements earth satellites two similar receivers used operated from common local oscillator phase meter compares af tones from two receivers see also', 'docno': '4157'}
{'text': 'interpretation doppler effect 

{'text': 'design construction and applications electronic digital computers survey whole field with comprehensive bibliography wide range actual and projected applications science and industry discussed including process control and data analysis', 'docno': '4710'}
{'text': 'transistor digital fast multiplier with magnetostrictive storage', 'docno': '4711'}
{'text': 'electrostatic pulse generator pulse patterns required electronic computers generated presenting probe feeding tuned amplifier suitably figured track insulated rotating drum carrying voltage western electronic show and convention los angeles california full text or abstract given papers various components equipment and techniques presented convention abstracts all papers published', 'docno': '4712'}
{'text': 'resistance sheets strips wires tubes and coils various materials frequencies between method calculation applicable materials with widely different conductivities ranging from presented values resistance per square foun

{'text': 'novel circuit for stable variable frequency oscillator circuit designed reduce effect resonator frequency drift uses multiloop feedback arrangement with three oscillators maintained frequencies such that one sum other two sum frequency being controlled quartz crystal while other two controlled variable resonators circuit and frequency drift temperature characteristics experimental oscillator shown', 'docno': '5257'}
{'text': 'hard tube pulsers for radar developments design high vacuum valves make possible construction blocking oscillator pulse modulators comparable size and efficiency with line type thyratron modulators with advantages stability resulting from pulse shape', 'docno': '5258'}
{'text': 'current derived resistance capacitance oscillators using junction transistors two circuits using phase shift networks suitable for use very low frequencies and one with phase shift network giving maximum frequency described', 'docno': '5259'}
{'text': 'pulse scaler circuit with h

{'text': 'two electron example ferromagnetism method dealing with problems ferromagnetism basis energy band theory vol applied simple case', 'docno': '5788'}
{'text': 'law induction survey literature indicates lack clarity this subject law induction formulated generally terms geometrical entity called flux line which exists te field solenoidal vector', 'docno': '5789'}
{'text': 'induction phenomena consequent movement material primary magnetic fields and their experimental applications part fundamental theory for very general cases theory applicable any type motion developed', 'docno': '5790'}
{'text': 'motion electron magnetic undulator rigorous solutions obtained for equations obtained for equations motion electron millimetre wave generator type described motz et al', 'docno': '5791'}
{'text': 'frequencies and power waves radiated magnetic undulator continuation investigation noted above dimensions undulator for generating mm waves determined from rigorous calculations electron traje

{'text': 'galactic radio sources large angular diameter preliminary results given measurements apparent angular width sources previously observed may', 'docno': '6313'}
{'text': 'wide band radio interferometer use wide band amplifier with frequency characteristic given conjunction with aerial system consisting two or four aerials line or aerials arranged symmetrically square results very high resolution example quoted radio sources separated angle ca be resolved theory interferometer given and general form directional characteristic shown', 'docno': '6314'}
{'text': 'geomagnetic bay disturbances and their nonuniform induced components within earth bay disturbance considered field travelling dipole auroral zone direction travel depending time day induced component within earth exhibits local variations depending conductivity dynamic anomaly observed area central europe vertical component being maximum north and minimum south when ionosphere current directed southward', 'docno': '6315'}


{'text': 'coaxial transmission line filters analysis for coaxial line band pass filters given based tem mode transmission influence nature and spacing obstacles bandwidth discussed measurements filters various constructions support theoretical results te mode high pass filter also discussed for narrow band applications this type filter inferior cavity type', 'docno': '6795'}
{'text': 'nonlinear filtering and waveshape multiplexing method providing two channels one carrier consists differentiating combined signal clipping remove one component and re integrating restore other removed component recovered subtracting retained component from original input experimental circuits and waveforms obtained illustrated two components being respectively sine wave and square wave', 'docno': '6796'}
{'text': 'functional circuit diagrams shortened version paper abstracted january', 'docno': '6797'}
{'text': 'transistors airborne equipment discussion advantages using transistors instead valves aircraft

{'text': 'initial rise brightness electroluminescent substances under action alternating field observed phenomena can be explained assumption excitation luminescence centres conduction band electrons that have acquired sufficient energy these electrons reach conduction band from electron donor levels few tenths electron volt below and then accelerated', 'docno': '7306'}
{'text': 'influence electric fields luminescence brief report investigation luminescence zns cu phosphor excited near ultraviolet radiation and subjected alternating electric fields frequencies up procedure used was similar that destriau', 'docno': '7307'}
{'text': 'effect electric field continuously excited phosphor when alternating electric field applied zns cu phosphor excited steady state luminescence momentary increase luminescence observed with subsequent drop and slow change new steady state periodic intensity ripple double field frequency e being superposed switching off field generally results new momentary int

{'text': 'electronic band structure solids brief report conference held radar research establishment malvern september which criticisms energy band theory were advanced', 'docno': '7844'}
{'text': 'isotropic component cosmic radio frequency radiation possible sources isotropic component deduced westerhout and oort february discussed integrated radition from normal extragalactic nebulae shown be inadequate account for observations which may be explained basis collisions between galaxies intensity be expected depends strongly cosmological theory assumed hence accurate measurements extragalactic component may yield information value for distinguishing between different theories', 'docno': '7845'}
{'text': 'prediction present sunspot cycle reliability prediction method presented previously examined with reference observed sunspot data for period', 'docno': '7846'}
{'text': 'observation total eclipse sun june hagaby sweden and partial eclipse using radio telescope meudon observatory curve o

{'text': 'relation between intensity distributions over radiating systems and their directivity characteristics fourier transform analysis used show that contrary fischers result directivity characteristic linear or plane radiator finite dimensions uniquely related amplitude distribution over radiator theory extended consideration propagation media with local inhomogeneities acting secondary radiators and investigation made relation between distribution inhomogeneities and directivity characteristic scattered radiation', 'docno': '8383'}
{'text': 'study electromagnetic field vicinity reflecting surface analysis given for general case plane incident wave with elliptical polarization if reflecting surface infinite plane reflected wave also plane modifications reflected wave other types reflecting surface mentioned', 'docno': '8384'}
{'text': 'simultaneous partial absorption reflection and transmission nonuniform plane wave thin metal film theory developed for uniform waves extended nonun

{'text': 'interaction terrestrial magnetic field with solar corpuscular radiation shape cavity between earth and neutral solar stream differs only from hemisphere radius earth radii incident side but elongated other side distance order earth radii cavity idented polar latitudes where current layer reverses', 'docno': '9024'}
{'text': 'cause magnetic storms and bays circulating currents set up around regions proton and electron precipitation hall effect suggested cause magnetic bays and storms', 'docno': '9025'}
{'text': 'daytime enhancement amplitude geomagnetic sudden impulses equatorial region igy data from equatorial stations provide some evidence that sudden impulses and sudden commencements caused similar mechanisms', 'docno': '9026'}
{'text': 'evidence quasi perpendicular propagation hydromagnetic waves caused nuclear explosions over johnston island', 'docno': '9027'}
{'text': 'verification earths pear shape gravitational harmonic errors predictions orbit transit ib satellite acc

{'text': 'effect series resistance photovoltaic solar energy conversion series resistance photovoltaic cell divided into two components contact and sheet resistance each components examined theoretically and experimentally and qualitative agreement between theory and experiment shown it concluded that contact resistance reduces conversion efficiency more than sheet resistance', 'docno': '9664'}
{'text': 'considerations photoemissive energy converters efficiency o solar energy converter consisting emitter and ag collector calculated taking into account initial velocities photoelectrons but ignoring space charge efficiencies between and obtained for output voltages between and efficiency increases percentage blue and ultraviolet radiation source increased minimize space charge effects emitter collector spacing must be order or less', 'docno': '9665'}
{'text': 'optimum capacitor charging efficiency for space systems several theorems for perfect time shaped source voltages which optimize e

{'text': 'average forces electromagnetic systems for ar linear and loss free system it shown that time average force can be expressed directly measurable circuit theory parameters', 'docno': '10231'}
{'text': 'supplement study electromagnetic point point sources supplement previously published paper establishing that there only two fundamental fields which can effectively be replaced point source corresponding distribution concentric about its origin', 'docno': '10232'}
{'text': 'cherenkov radiation for dipole moment medium with spatial dispersion treatment electric and magnetic dipoles isotropic nongyrotropic medium radiation from ring current such medium also considered', 'docno': '10233'}
{'text': 'radiation charged particles passing through electron plasma external magnetic field coupling between longitudinal plasma wave and transverse em wave modifies nature radiation particle velocities greater than that required for excitation plasma waves non cherenkov type radiation will occur

{'text': 'radiation from modulated electron beam with plasma background expressions derived for electromagnetic radiation current modulated electron beams which emerge from plasma simple expressions derived case where dielectric constant plasma zero or infinity', 'docno': '10924'}
{'text': 'diffraction electromagnetic wates sound waves diffraction amplitude calculated from borns approximation results measurements microwaves affected standing sound frequency and polarization em waves', 'docno': '10925'}
{'text': 'transmission and reflection electromagnetic waves plasma boundary for arbitrary angles incidence plane em wave horizontally or vertically polarized incident arbitrary angle flat plasma boundary investigated numerical results showing variation attenuation and phase power entering plasma and ellipticity reflected wave presented', 'docno': '10926'}
{'text': 'some features transverse propagation high frequency waves magneto active plasma mathematical analysis based kinetic equation

In [15]:
lx

2649415

In [16]:
ly = 0
for doc in datasets_cleaned['en2']:
    ly = ly + len(doc['text'])
    print(doc)

{'text': 'compact memories have flexible capacities digital data storage system with capacity bits and random and sequential access described', 'docno': '1'}
{'text': 'electronic analogue computer for solving systems linear equations mathematical derivation operating principle and stability conditions for computer consisting amplifiers', 'docno': '2'}
{'text': 'electronic coordinate transformer circuit details are given for construction electronic calculating unit which enables polar coordinates vector modulus and cosine sine argument derived from those rectangular system axes', 'docno': '3'}
{'text': 'british computer society report conference held cambridge june', 'docno': '4'}
{'text': 'millimicrosecond digital computer logic system fast pulse logic described which combines efficiency transformer coupled stages with digit delay tolerances approaching that dc coupled systems logical circuits for and inverter and reclock are shown together with driver which permits fan out factor tran

{'text': 'model phosphors basis quantum mechanics transition probabilities with constant defects and discrete spectrum', 'docno': '52'}
{'text': 'quantum mechanical theory dielectric orientation polarization gases orientation dipolar gas consisting symmetric spin molecules attenuating electric field', 'docno': '53'}
{'text': 'influence thomson effect relationship for constrictive resistance thermal equilibrium', 'docno': '54'}
{'text': 'investigation highfrequency resonant discharge experimental study discharge magnetic field frequency range theoretical and experimental analysis made breakdown conditions and characteristics and properties plasma discharge', 'docno': '55'}
{'text': 'application schottkys diffusion theory discharges with several types ions and excited neutral particles', 'docno': '56'}
{'text': 'low pressure plane symmetric discharge', 'docno': '57'}
{'text': 'pulse technique for probe measurements gas discharges', 'docno': '58'}
{'text': 'frequency multiplication second

{'text': 'image parameter theory simple aid realization crystal band pass filters branch network type design formulae and curves are given for ladder type networks with crystal shunt arms', 'docno': '654'}
{'text': 'new coaxial resonator filter band pass filter with derived type characteristic described consisting loop coupled coaxial resonators reentrant arrangement performance may described terms equivalent lattice network good agreement shown between experimental and theoretical insertion loss characteristics', 'docno': '655'}
{'text': 'analysis active networks admittance matrices admittance matrices valves and transistors are derived directly and from their indefinite admittance matrices', 'docno': '656'}
{'text': 'considerations oscillator stability methods reducing microphony effects and temperature dependence oscillators television tuners', 'docno': '657'}
{'text': 'procedure for tuning bridge stabilized crystal oscillator meacham bridge circuit used which lamp replaced thermist

{'text': 'note diffraction infinite slit previous results for plane waves longer wavelengths are extended obtain transmission coefficients oblique angles incidence cylindrical waves are considered briefly', 'docno': '717'}
{'text': 'diffraction spheroid diffracted field determined two methods for wavelengths small compared spheroid results are applied evaluate back scattered fields', 'docno': '718'}
{'text': 'investigations artificial dielectrics microwave frequencies part two expression for phase change transmission wave incident parallel plate type artificial dielectric function plate spacing has been verified experimentally', 'docno': '719'}
{'text': 'reflection electromagnetic waves from medium excited acoustic waves theory developed for variation density and temperature liquid caused acoustic wave generating regions maximum and minimum dielectric constant modulation wave reflected from acoustically disturbed water surface observed', 'docno': '720'}
{'text': 'confinement charged pa

{'text': 'resolving time and flipping time magnetoresistive flip flops time constant exponential approach stable states shown fair approximation for flipping time bridge type magnetoresistive flip flop order for materials present available method calculating flipping time extended more general networks and dependence flipping time incoming pulse considered', 'docno': '1329'}
{'text': 'impulse governed oscillator techniques part one general principles techniques are discussed special circuits and theoretical data will given subsequent issues', 'docno': '1330'}
{'text': 'monochromaticity and noise regenerative electrical oscillator expressions are derived for departure from monochromaticity regenerative oscillator represented equivalent rlc circuit haing negative resistance parallel with intensity and band width thermal noise generated oscillator resistance are also considered', 'docno': '1331'}
{'text': 'frequency stability stable multivibrators using transistors dependence multivibrato

{'text': 'experimental investigation effects confusion survey localized radio sources surveys sources have been made using total power equipment and interferometer results are compared', 'docno': '1386'}
{'text': 'radio emission from gygnus loop good positional agreement has been obtained between several radio and optical features but intense extended source lying south centre loop not correlated with any outstanding optical feature', 'docno': '1387'}
{'text': 'spectral analysis radio sources cygnus results support conclusions other workers showing that emission from optically thin regions except for strongest source region tie source which was found have nonthermal component', 'docno': '1388'}
{'text': 'ionosphere jupiter approximate analysis based chapman theory suggests that jupiter may possess ionosphere critical frequency existing few hundred kilometres above visible clouds', 'docno': '1389'}
{'text': 'radio emission from jupiter report measurements made four element interferomete

{'text': 'determination orbit artificial satellite theory method depending doppler measurements four stations initial assumptions about orbit are required', 'docno': '2022'}
{'text': 'new method tracking artificial earth satellites photoelectric method optical tracking has been developed which appears have precision comparable with astronomical observations record observations rocket sputnik july tatsfield surrey shown', 'docno': '2023'}
{'text': 'tracking orbits man made moons description minitrack interferometer type system designed measure satellite position accuracy within seconds arc with time precision one millisecond', 'docno': '2024'}
{'text': 'doppler satellite measurements note records obtained during successful and unsuccessful launchings', 'docno': '2025'}
{'text': 'further radio observations artificial satellites report data obtained florence italy from radio observations satellites and', 'docno': '2026'}
{'text': 'signal strength recordings satellite sputnik college alask

{'text': 'ferroelectrics tune electronic circuits circuits are described which carying bias used change capacitance ferroelectric capacitors used tuning elements', 'docno': '2060'}
{'text': 'direct voltage instantaneous breakdown oil impregnated paper capacitors function area experimental results given show that variation mean breakdown voltage with increasing area represented adequately milnors equation coefficient variation appears independent area but decreases with increasing thickness dielectric', 'docno': '2061'}
{'text': 'comparison wool wax and petroleum jelly impregnants for paper capacitors results given show that wool wax impregnants give about higher capacitance without any serious loss performance', 'docno': '2062'}
{'text': 'equivalent circuit oscillating piezoelectric crystal rods formulae are derived for calculating circuit parameters for longitudinal torsional and flexural oscillations taking account internal damping and air gap between electrodes and crystal', 'docno'

{'text': 'present applications semiconductors description semiconductor applications with particular reference dc amplification for analogue computers', 'docno': '2700'}
{'text': 'wide band amplification high frequencies transmitter incorporating wide band amplifier covering described distributed amplification used with artificial transmission lines which shunt capacitances are provided interelectrode capacitances values', 'docno': '2701'}
{'text': 'design cathode follower circuits detailed summary design principles and operating characteristics with description several circuit applications', 'docno': '2702'}
{'text': 'fundamentals low noise hf preamplifier for circuit described uses disk seal triode grounded grid connection and intended for radio astronomy applications wavelength and with bandwidth input noise figure', 'docno': '2703'}
{'text': 'contribution study operation magnetic amplifiers operation basic magnetic amplifier circuit analysed assuming square wave input and rectangul

{'text': 'transistor relays have low idling current two transistor circuits for remote control are shown which operate electromechanical relay from output signal receiver this may carrier modulated pulsed microwave signal using transistors idling current few microamps', 'docno': '3361'}
{'text': 'magnetic regulation transistor power supply method uses saturable reactors input and load circuit supply for close regulation silicon diode reference voltage may used', 'docno': '3362'}
{'text': 'compact supplies have wide range regulation use solid state rectifiers and high current regulator tubes eliminates power transformer and reduces size and cost power supplies units have regulation better than ripple and stacking factor about capacity per inch rack space', 'docno': '3363'}
{'text': 'method designing small power transformers for communication equipment', 'docno': '3364'}
{'text': 'effect capacitance power supply filter bounce discussion design power supply filters', 'docno': '3365'}
{'te

{'text': 'note positive ion effects pulsed electron beams experimental results are given for electron beams duration under conditions normally encountered high power microwave valves', 'docno': '3996'}
{'text': 'energy spectrum electron beam after passing through thin metallic film energy loss determined for films and', 'docno': '3997'}
{'text': 'very high voltage electron diffraction description diffractograph operating voltages', 'docno': '3998'}
{'text': 'use electron diffraction for magnetic analysis brief note method evaluating magnetic field strength from eccentricity superposed diffraction rings', 'docno': '3999'}
{'text': 'electron scattering phenomena new methods investigations are reviewed', 'docno': '4000'}
{'text': 'diffusion and elastic collision losses fast electrons plasmas when energy spectrum hg pressure can separated into primary high energy and secondary low energy maxwellian group then mean free path fast electrons can evaluated use diffusion law found independent p

{'text': 'space charge effects electron optical systems discussion spherical aberration beams exhibiting waist conditions for transition from waist cross over are examined experimental results support theory', 'docno': '4646'}
{'text': 'mean free path for discrete electron energy losses metallic foils', 'docno': '4647'}
{'text': 'transmission electrons energies below aluminium oxide films thickness', 'docno': '4648'}
{'text': 'critical examination theory plasmas based mean free path light method based distribution function derived solving boltzmanns equation discussion indicates that distribution function method more suitable than mean free path method for calculating conductivity anisotropic plasmas velocity analysis thermionic emission from single crystal tungsten suggested that departure from maxwellian distribution observed hutson due finite resolution analyser', 'docno': '4649'}
{'text': 'spin deviation theory ferromagnetism', 'docno': '4650'}
{'text': 'origin ferromagnetism trans

{'text': 'concavity resistance functions short proof given theorem presented shannon and hagelbarger', 'docno': '5243'}
{'text': 'ferromagnetic coupling between crossing coils discussion operation devices comprising toroidal and solenoidal coil wound common cylindrical saturated ferromagnetic core gating coincidence and signal storage applications are mentioned', 'docno': '5244'}
{'text': 'self healing metallized paper capacitors physical and manufacturing techniques involved production truly self healing single layer metallized paper capacitors are described', 'docno': '5245'}
{'text': 'decomposition derivative impedance for two terminal network extension theory presented previously', 'docno': '5246'}
{'text': 'design data for ladder networks design data for networks with nine branches and attenuation curves for networks with eleven branches are presented expansion previous work', 'docno': '5247'}
{'text': 'directional channel separation filters networks are discussed which combine pr

{'text': 'self consistent calculation dissociation oxygen upper atmosphere part three body recombinations continuation work noted results based assumption that recombination occurs mainly three body nonradiative process indicate that dissociation occurs level about higher than that previously calculated', 'docno': '5808'}
{'text': 'bifurcation region retardation time relative uhf reference signal radio signal frequency about above critical frequency was measured rocket borne instruments electron density altitude graphs were derived from retardation time time flight records bifurcation layer clearly indicated results are close agreement with those calculated from records obtained with nbs model ionosphere recorder separation electron density maxima was', 'docno': '5809'}
{'text': 'direction finding studies large scale ionospheric irregularities continuation work noted report given measurements pulse modulated transmissions frequency range reflected from ionosphere both vertical incidenc

{'text': 'generalized equations for rc phase shift oscillators general solutions are obtained for three and four section phase shift networks these solutions are reduced design equations for number specific circuit configurations and complete solution given for four section phase shift oscillator', 'docno': '6357'}
{'text': 'oscillator feedback networks minimum attenuation single valve oscillators with feedback networks comprising three resistors and three capacitors are discussed optimum values for components are determined analysis and are plotted functions valve output resistance for given value valve input resistance both shunt and shunt networks are considered former being recommended for very low frequencies and latter for relatively high ones', 'docno': '6358'}
{'text': 'classification quartz crystal oscillator circuits classification based quadripole representation two main classes are distinguished according circuit represented one two quadripoles feedback line first class inc

{'text': 'nevitron mercury arc rectifier description new type rectifier with hg pool cup with external cooling fins provided cup wetted hg give concave meniscus cathode spot runs continuous line round edge hg cooling system ensures excessive emission vapour auxiliary electrode lifted from hg means solenoid serves start arc voltage drop across arc weight nevitron only excluding ignition solenoid types with grid control have also been tested power required grid circuit for full control only one thousandth that for multianode rectifier', 'docno': '6862'}
{'text': 'improved stabilization from voltage regulator tube using suitable barretter place resistor usually connected series with voltage regulator tube definite improvement performance obtained barretter type circuit most useful when load current relatively high', 'docno': '6863'}
{'text': 'stabilizer for control high direct voltages development and performance stabilizer for direct voltages order several thousands volts are described a

{'text': 'more rc oscillator', 'docno': '7383'}
{'text': 'frequency spectrum pulled oscillator conclusions reached buchanan are presented simpler and more precise form', 'docno': '7384'}
{'text': 'blocking oscillators use blocking oscillators generators voltage current pulses repetitive sawtooth voltage current waveforms examined some detail with description thd mode operation vz various circuits considered and discussion factors affecting their practical design', 'docno': '7385'}
{'text': 'impulse governed oscillator system for frequency stabilization', 'docno': '7386'}
{'text': 'improved lock amplifier stable sinusoidal voltage drives square wave generator whose output applied anodes pair diodes phase opposition signal applied anodes pair diodes phase opposition signal applied diode cathodes equal phase dc output voltage developed between two cathodes directly proportional signal and largely independent input noise level value stable voltage and valve parameters', 'docno': '7387'}
{'

{'text': 'study gaseous discharges magnetic resonance measurements are reported which indicate that technique magnetic resonance may prove useful for studying characteristics low pressure gas discharges', 'docno': '7930'}
{'text': 'relaxation oscillations and noise from low current arc discharges noise measurements were made arcs using various electrode materials and gases over frequency range results for electrodes air are plotted comparison with computed spectrum pulses generated circuit formed series limiting resistor and capacitance across discharge are considered responsible for most noise', 'docno': '7931'}
{'text': 'formative time cathodic space charge criterion for evaluating formative time based observations rise time and overshoot intermittent discharges different kinds intermittent discharges are reviewed', 'docno': '7932'}
{'text': 'intermittent discharges air low pressure experimental investigation reported dependence discharge frequency gas pressure and electrode separati

{'text': 'direct measurement ellipticity rf cotton effect quarter wave method measurements were made using circular waveguide diameter with aerial one end connected klystron and rotatable probe other end two copper pins lengths and respectively are fixed apart axial insulator rod making angles and respectively with direction electric field variation with wavelength plane polarization and degree extinction obtainable are shown graphically shown that wave elliptically polarized inserting quarter wave unit produce linear polarization', 'docno': '8387'}
{'text': 'electron spin resonance absorption metals part experimental measurements were made pd and over temperature range using cavity resonator technique frequencies and dimensions specimens were some cases large and others small compared with skin depth results are compared with those predicted dysons theory', 'docno': '8388'}
{'text': 'electron spin resonance absorption metals part theory electron diffusion and skin effect', 'docno': '8

{'text': 'some studies bifurcation ionospheric layer into and ionograms recorded frequent intervals cambridge and show how layers develop sunrise quiet and disturbed days and different seasons', 'docno': '8922'}
{'text': 'pre sunrise phenomenon and presence negative ions region ionosphere increases layer electron density which occur about two hours before ground sunrise might due photodetachment electrons from negative ions', 'docno': '8923'}
{'text': 'test procedure for easy estimation representative monthly electron density profiles for ionosphere profile produced from mean daily virtual height curves well within one standard deviation mean individual profiles', 'docno': '8924'}
{'text': 'study observed spread examination large number ionograms shows effect using different receiver gain settings and reveals night phenomenon degree spreading varies with magnetic dip', 'docno': '8925'}
{'text': 'possible importance nitric oxide formation during polar cap ionospheric absorption events p

{'text': 'penetration microwaves into rarer medium total reflection using right angle prisms separated narrow air gap transmission and reflection coefficients paraffin sulphur and have bee measured function gap width', 'docno': '9466'}
{'text': 'refraction and diffraction pulses solution for reflection and refraction plane pulse plane surface obtained fourier synthesis using method which also valid when total reflection occurs', 'docno': '9467'}
{'text': 'spectrometer for paramagnetic electron resonance with different methods detection equipment described can used for klystron frea spectrometer for paramagnetic electron resonance with different methods detection equipment described can used for klystron frequency modulation and magnetic sweep methods measurement', 'docno': '9468'}
{'text': 'theory ferro and antiferromagnetic resonance absorption', 'docno': '9469'}
{'text': 'longitudinal ferrimagnetic resonance susceptibility components are calculated for triangular ferrimagnetic system

{'text': 'surface distributions fixed charges and poynting vector system permanent volume currents theoretical study conditions hollow cylindrical conductor', 'docno': '9972'}
{'text': 'present state theory electric breakdown solid dielectrics survey theory first stage dielectric breakdown loss electric strength can probably given best terms kinetic equation for electrons solid effective cross sections ionization for low energy electrons probability scattering medium energy electrons vibrating lattice and effective cross section recombination must determined theory second stage actual breakdown has not yet been given suggested that cause breakdown could connected with processes leading production shock wave analogous that occurring breakdown gases high pressures references include over russian literature', 'docno': '9973'}
{'text': 'mechanism discharge between plane plates problem considered using diffusion theory positive column with term added take account character discharge measure

{'text': 'radar boxcar circuits suing nuvistors transistors and zeners two versions waveform sampling device with zener diode biasing are described which give signal noise ratio room temperature', 'docno': '10633'}
{'text': 'amplifiers with band filter coupling design multistage amplifiers with flat tchebycheff type response considered design formulae and response cuf curves are given', 'docno': '10634'}
{'text': 'transistor amplifier stages with prescribed gain and sensitivity design procedure developed for circuits meeting prescribed gain and sensitivity criteria terms anticipated operational variations and available component data', 'docno': '10635'}
{'text': 'engineering approach design transistor feedback amplifiers technique developed for amplifier design which produces very accurate expressions for transmittance without requiring any quantitative information about transistors', 'docno': '10636'}
{'text': 'dielectric harmonic amplifier controllable harmonic generator circuits inc

{'text': 'influence differential transformer attenuation characteristic differential bridge filters', 'docno': '11314'}
{'text': 'duality nonplanar electric circuits without use ideal transformers method constructing dual any linear lumped nonplanar network which does not include magnetic coupling given without using ideal transformers', 'docno': '11315'}
{'text': 'hall effect microwave mixer signal frequency current and magnetic field local oscillator frequency produce hall effect emf across semiconducting plate minimum conversion loss achieved', 'docno': '11316'}
{'text': 'survey very low frequency oscillators methods generating frequencies below for vibration work and for testing servomechanisms are outlined major characteristics instruments are tabulated', 'docno': '11317'}
{'text': 'high stability transistor oscillator for design development construction and testing oscillator are described power output stable within and frequency within ove temperature range', 'docno': '11318'}
{

In [17]:
import pyterrier_colbert.indexing
import torch
import os

In [18]:
ly

2649949

In [19]:
if not os.path.exists('./indexes/index.base.vaswani/'):
    base_indexer = pyterrier_colbert.indexing.ColBERTIndexer(checkpoint, "./indexes", "index.base.vaswani", chunksize=3, num_partitions=512)
    base_indexer.index(dataset.get_corpus_iter(), )

In [20]:
clean_indexers = {}
for name, data_iter in datasets_cleaned.items():
    if not os.path.exists(f'./indexes/index.clean.{name}.vaswani/'):
        cleaned_indexer = pyterrier_colbert.indexing.ColBERTIndexer(checkpoint, f'./indexes', f'index.clean.{name}.vaswani', chunksize=3, num_partitions=256)
        cleaned_indexer.index(data_iter)
        clean_indexers[name] = cleaned_indexer
        clean_indexer = None
        torch.cuda.empty_cache()

[Mar 04, 17:11:31] [0] 		 #> Local args.bsize = 128
[Mar 04, 17:11:31] [0] 		 #> args.index_root = ./indexes
[Mar 04, 17:11:31] [0] 		 #> self.possible_subset_sizes = [69905]


Some weights of ColBERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['linear.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[Mar 04, 17:11:34] #> Loading model checkpoint.
[Mar 04, 17:11:34] #> Loading checkpoint http://www.dcs.gla.ac.uk/~craigm/colbert.dnn.zip




[Mar 04, 17:11:42] #> checkpoint['epoch'] = 0
[Mar 04, 17:11:42] #> checkpoint['batch'] = 44500




[Mar 04, 17:11:43] #> Note: Output directory ./indexes already exists




[Mar 04, 17:11:43] #> Creating directory ./indexes/index.clean.en.vaswani 


[Mar 04, 17:13:41] [0] 		 #> Completed batch #0 (starting at passage #0) 		Passages/min: 5.8k (overall),  6.0k (this encoding),  7690.4M (this saving)
[Mar 04, 17:13:41] [0] 		 [NOTE] Done with local share.
[Mar 04, 17:13:41] [0] 		 #> Joining saver thread.
[Mar 04, 17:13:41] [0] 		 #> Saved batch #0 to ./indexes/index.clean.en.vaswani/0.pt 		 Saving Throughput = 4.4M passages per minute.

#> num_embeddings = 341244
[Mar 04, 17:13:41] #> Starting..
[Mar 04, 17:13:41] #> Processing slice #1 of 1 (range 0..1).
[Mar 04, 17:13:41] #> Will write to ./indexes/index.clean.en.vaswani/ivfpq.256.faiss.
[Mar 04, 17:13:41] #> Loading ./indexes/index.clean.en.vaswani/0.sample ...
#> Sample has shape (17062, 128)
[Mar 04, 17:13:41] Preparing resources f

Some weights of ColBERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['linear.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[Mar 04, 17:13:56] #> Loading model checkpoint.
[Mar 04, 17:13:56] #> Loading checkpoint http://www.dcs.gla.ac.uk/~craigm/colbert.dnn.zip
[Mar 04, 17:14:03] #> checkpoint['epoch'] = 0
[Mar 04, 17:14:03] #> checkpoint['batch'] = 44500




[Mar 04, 17:14:04] #> Note: Output directory ./indexes already exists




[Mar 04, 17:14:04] #> Creating directory ./indexes/index.clean.en2.vaswani 


[Mar 04, 17:16:07] [0] 		 #> Completed batch #0 (starting at passage #0) 		Passages/min: 5.6k (overall),  5.8k (this encoding),  7629.2M (this saving)
[Mar 04, 17:16:07] [0] 		 [NOTE] Done with local share.
[Mar 04, 17:16:07] [0] 		 #> Joining saver thread.
[Mar 04, 17:16:07] [0] 		 #> Saved batch #0 to ./indexes/index.clean.en2.vaswani/0.pt 		 Saving Throughput = 3.4M passages per minute.

#> num_embeddings = 442442
[Mar 04, 17:16:07] #> Starting..
[Mar 04, 17:16:07] #> Processing slice #1 of 1 (range 0..1).
[Mar 04, 17:16:07] #> Will write to ./indexes/index.clean.en2.vaswani/ivfpq.256.faiss.
[Mar 04,

Some weights of ColBERT were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['linear.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[Mar 04, 17:16:25] #> Loading model checkpoint.
[Mar 04, 17:16:25] #> Loading checkpoint http://www.dcs.gla.ac.uk/~craigm/colbert.dnn.zip
[Mar 04, 17:16:32] #> checkpoint['epoch'] = 0
[Mar 04, 17:16:32] #> checkpoint['batch'] = 44500




[Mar 04, 17:16:33] #> Note: Output directory ./indexes already exists




[Mar 04, 17:16:33] #> Creating directory ./indexes/index.clean.en4.vaswani 


[Mar 04, 17:18:35] [0] 		 #> Completed batch #0 (starting at passage #0) 		Passages/min: 5.6k (overall),  5.8k (this encoding),  8637.2M (this saving)
[Mar 04, 17:18:35] [0] 		 [NOTE] Done with local share.
[Mar 04, 17:18:35] [0] 		 #> Joining saver thread.
[Mar 04, 17:18:35] [0] 		 #> Saved batch #0 to ./indexes/index.clean.en4.vaswani/0.pt 		 Saving Throughput = 4.2M passages per minute.

#> num_embeddings = 378275
[Mar 04, 17:18:35] #> Starting..
[Mar 04, 17:18:35] #> Processing slice #1 of 1 (range 0..1).
[Mar 04, 17:18:35] #> Will write to ./indexes/index.clean.en4.vaswani/ivfpq.256.faiss.
[Mar 04,

In [21]:
print('gg')

gg
