In [2]:
%pip install --upgrade --quiet --proxy=http://127.0.0.1:2080 langchain langchain-community langchainhub langchain-openai langchain-chroma bs4

Note: you may need to restart the kernel to use updated packages.


In [1]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from typing import Any, Dict, Iterator, List, Mapping, Optional
from langchain_core.embeddings import Embeddings
from langchain_core.language_models.llms import LLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms.ollama import Ollama

In [2]:
import transformers
from torch import cuda, bfloat16

from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

from sentence_transformers import SentenceTransformer

In [3]:
# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=bfloat16,
# )


# aya_checkpoint = "CohereForAI/aya-101"
e5_checkpoint = "intfloat/multilingual-e5-large-instruct"

# aya_tokenizer = AutoTokenizer.from_pretrained(aya_checkpoint)
# aya_model = AutoModelForSeq2SeqLM.from_pretrained(
#     aya_checkpoint, quantization_config=bnb_config
# )

e5_model = SentenceTransformer(e5_checkpoint)

jina_model = SentenceTransformer("jinaai/jina-embeddings-v2-base-en")
jina_model.max_seq_length = 8096

Some weights of BertModel were not initialized from the model checkpoint at jinaai/jina-embeddings-v2-base-en and are newly initialized: ['embeddings.position_embeddings.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.0.output.LayerNorm.weight', 'encoder.layer.0.output.dense.bias', 'encoder.layer.0.output.dense.weight', 'encoder.layer.1.intermediate.dense.bias', 'encoder.layer.1.intermediate.dense.weight', 'encoder.layer.1.output.LayerNorm.bias', 'encoder.layer.1.output.LayerNorm.weight', 'encoder.layer.1.output.dense.bias', 'encoder.layer.1.output.dense.weight', 'encoder.layer.10.intermediate.dense.bias', 'encoder.layer.10.intermediate.dense.weight', 'encoder.layer.10.output.LayerNorm.bias', 'encoder.layer.10.output.LayerNorm.weight', 'encoder.layer.10.output.dense.bias', 'encoder.layer.10.output.dense.weight', 'encoder.layer.11.intermediate.dense.bias', 'encoder.layer.11.intermedi

In [4]:
class E5Embeddings(Embeddings):
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        global e5_model
        embeddings = e5_model.encode(
            texts, convert_to_tensor=True, normalize_embeddings=True
        )
        return [embedding.tolist() for embedding in embeddings]

    def embed_query(self, text: str) -> List[float]:
        task_description = (
            "Given a web search query, retrieve relevant passages that answer the query"
        )
        query = f"Instruct: {task_description}\nQuery: {text}"
        resp = self.embed_documents([query])[0]
        return resp


class JinaEmbeddings(Embeddings):
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        global jina_model
        embeddings = jina_model.encode(
            texts, convert_to_tensor=True, normalize_embeddings=True
        )
        return [embedding.tolist() for embedding in embeddings]

    def embed_query(self, text: str) -> List[float]:
        resp = self.embed_documents([text])[0]
        return resp


class Aya101LLM(LLM):
    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        global aya_model
        global aya_tokenizer

        inputs = aya_tokenizer.encode(prompt, return_tensors="pt").to("cuda")
        outputs = aya_model.generate(inputs, max_new_tokens=1024)
        resp_content = aya_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return resp_content

    @property
    def _llm_type(self) -> str:
        return "aya101"

In [6]:
llm = Aya101LLM()
# embeddings = E5Embeddings()
# llm = Ollama(model="mistral")
# embeddings = OllamaEmbeddings(model="nomic-embed-text")
embeddings = E5Embeddings()

In [14]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
# vectorstore.delete_collection()
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    (
        "human",
        """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
        Question: {question} 
        Context: {context} 
        Answer:""",
    ),
)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def translate_prompt(prompt: str):
    return Aya101LLM()._call(
        f"Translate the following text from Persian to English: {prompt}"
    )

def translate_result(prompt: str):
    return Aya101LLM()._call(
        f"Translate the following text from English to Persian: {prompt}"
    )



In [28]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | translate_prompt
    | llm
    | translate_result
    | StrOutputParser()
)

In [29]:
rag_chain.invoke("tell me about ReAct")

'ReACT (Rapid Action and Calculation Technology) یک پروژه تحقیقاتی شرکت مایکروسافت است که بر توسعه سیستم هایی که قادر به انجام محاسبات و اقدامات در زمان واقعی بر اساس ورودی کاربر هستند تمرکز دارد. این پروژه از الگوریتم های یادگیری ماشین برای درک موقعیت و نیت استفاده می کند و پاسخ های سریع را فراهم می کند. ReACT طراحی شده است تا در زمان واقعی سوالات زبان طبیعی را پردازش کند و با استفاده از پایگاه داده های بزرگ و تکنیک های تحلیلی زبان طبیعی، پاسخ های دقیق و مرتبط را ارائه دهد. این پروژه هدف خود را بهبود کارایی کلی با کاهش نیاز به جستجوی دستی و بررسی داده ها دارد.'

In [39]:
data = ' This blog post provides instructions on how to implement an architecture by writing all the necessary code, following a specific markdown format and file naming convention. It emphasizes the importance of making sure each file is functional, follows best practices for different languages, and contains all imports and dependencies.'

Aya101LLM()._call(f"Translate the following text from english to Persian: {data}")

'این پست وبلاگ دستورالعمل هایی را در مورد چگونگی پیاده سازی یک طراحی با نوشتن تمام کد مورد نیاز، با استفاده از یک فرمت خاص Markdown و استاندارد نام گذاری فایل ارائه می دهد. آن بر اهمیت اطمینان از اینکه هر فایل عملکردی باشد، با بهترین شیوه ها برای زبان های مختلف مطابقت داشته باشد، و تمامی واردات و وابستگی ها را شامل شود، تاکید می کند.'

In [20]:
[x.page_content for x in retriever.invoke("tell me about ReAct")]

['ReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\nThought: ...\nAction: ...\nObservation: ...\n... (Repeated many times)',
 'ReAct (Yao et al. 2023) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language.\nThe ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as:\nThought: ...\nAction: ...\nObservation: .

In [2]:
large_text = """Chapter Two: RADAR SYSTEMS
Subchapter: Introduction
Note The following overview of radar is intended for the non-specialist reader. It is written in such a way as to be accessible and can in no way be taken as a full description of what is an extremely complex technology. 

Radar (standing for RAdio Direction And Ranging) functions by generating an output of microwave (see following) energy which is focused into a beam and illuminates an object in space. A small proportion of this energy is reflected back towards the radar where it is detected by an integral receiver. Microwave energy falls between infrared radiation and radio waves within the electromagnetic spectrum and has frequency and wavelength values in the range 0.03 to 100 Gigahertz (abbreviated to GHz) and 1 m to 3 mm respectively. It should also be noted that the microwave segment of the electromagnetic spectrum is a subset of its Radio Frequency (abbreviated to RF) segment which has a frequency range of approximately 104 to 1011 Hertz (see following).

Stepping back a stage, frequency may be defined as the number of complete oscillations or cycles of a periodic quantity in unittime and its unit of measurementthe Hertz (abbreviated to Hz) - as the frequency of a periodic phenomenon that has a period of one second. Within the RF spectrum, frequency values are usually expressed in Kilohertz (abbreviated to kHz), Megahertz (abbreviated to MHz) and the already noted GHz. These multiples represent 103 , 106 and 109 Hertz respectively. Wavelength is defined as being the distance between two displacements of the same phase (that is, the stage or state of development of a regularly recurring quantity such as a radar pulse (see following)) along the direction of propagation. The most widely used military radar frequency/wavelength values are given inTable One. 

Because of the low-power level of the reflected or 'echo' signal and its vulnerability to modulation (alteration of its characteristics) and interference, the receiver (see following) used in a radar system must be particularly sensitive.The reduction in signal power between transmission and reception is usually defined in terms of a ratio between the two (in the order of 10 to 17:1) and expressed logarithmically in Decibels (abbreviated to dB). Factors capable of modulating or interfering with the echo signal include: Clutter, Noise, Jamming, Apparent variations in the target's Radar Cross-Section, Echo signal cancellation.

Factors capable of modulating or interfering with the echo signal (Clutter): obscuration ofthe main echo signal by additional echo responses generated by surrounding ground features, the sea and rain. Alongside its obscurant effect, clutter can generate false alarms in a radar, that is, register what seems to be a valid target but is not. A similar effect can be created by atmospheric effects such as a localised air mass which is a different temperature or pressure from that of the surrounding atmosphere. Such a phenomenon can create phantom targets known as angels.
 
Factors capable of modulating or interfering with the echo signal (Noise): Electronic noise inherent within the radar itself. To be detectable, the echo signal must typically be some 10 dB more powerful than the receiver's own noise level. 

Factors capable of modulating or interfering with the echo signal (Jamming): Manmade interference known as jamming (see introduction to electronic warfare section). 

Electronic noise inherent within the radar itself (Apparent variations in the target's Radar Cross-Section): Apparent variations in the target's Radar Cross-Section (abbreviated to RCS) as perceived by the radar due frequency and time modulation effects. Radar cross-section may be defined as a measure of the size of radar response generated by a particular target geometry. This phenomenon is termed scintillation.

Apparent variations in the target's Radar Cross-Section (Echo signal cancellation): Echo signal cancellation caused by reflections produced by flat surface reflectors such as a calm sea. Known as the multipath effect. 

A list of the major military radar applications is given in Table Two. 

Measurements produced by a radar comprise range, bearing and elevation. Target range can be either a slant or ground value. Slant range is the line of sight distance between the radar and the object illuminated while ground range is the horizontal distance between the emitter and its target and its calculation requires knowledge of the target's elevation. Radar range is established from that time delay experienced between the transmission of the emitter's output and the reception of the echo signal.This is achieved by measuring the particular time value against the known velocity of microwave energy (approximately 300,000,000 m/s). As a rough guide, every microsecond of delay between transmission and reception equals 150 m in range. Target bearing is its direction relative to the radar and is traditionally determined by the mechanical position of the antenna at the moment of reception. Target elevation is identified in a similar manner except for the angular measurement being in the vertical rather than the horizontal plane.

A radar's output can take a number of forms, the most usual of which is termed pulsed. In a pulsed radar, the output is transmitted in bursts of energy (individually termed pulses) with the echo signal being received during the interval between sequential transmissions. Another form of output is termed Continuous Wave (abbreviated to CW) which, as its name implies, is continuous. In its basic form, a CW radar is incapable of generating range information and its output must be modulated in some way if such a measurement is to be achieved. A common approach to this problem is frequency modulation with frequency modulated CW radars being used in applications such as altimeters, tracking radars and instrumentation equipments.

Power sources for modern radar systems most frequently take the form of a magnetron or a Travelling Wave Tube (abbreviated to TWT) or tubes. A basic magnetron consists of a central cathode (a negative electrode) surrounded by a cylindrical anode (a positive electrode) which is divided into segments or contains a number of cavity resonators, that is a space within a closed or substantially closed conductor which will maintain an oscillating electromagnetic field when suitably excited by an external force. 

Functionally, a steady electrostatic field is applied between the magnetron's anode and cathode together with a steady magnetic field which is parallel to the device's cylindrical axis and orthogonal to the electrostatic field. Electrons emitted by the cathode are influenced by both fields and their interaction within the gaps or resonating cavities of the device's anode produces microwave frequency oscillations. Currently, magnetron power sources are available in a number of configurations which include fixed frequency, tunable and frequency agile, that is, able to generate an output whose frequency is variable. Within the travelling wave tube, a beam of electrons is generated which continuously interacts with a radio-frequency electromagnetic field to produce amplification or, in some cases, oscillation at microwave frequencies. Like magnetrons, TWTs come in a number of configurations which currently include ring loop, ring bar and coupled cavity types. 

A modern, mechanically scanning (use of a rotating antenna) pulse radar comprises: Transmitter, Modulation Circuitry, Timing Circuitry, Duplexer, Receiver, Antenna, Signal Processor, Display Unit.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Transmitter to provide the radar's output energy.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Modulation Circuitry generating the output's operating frequency and waveform, that is, the configuration of the pulses used in the output.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Timing Circuitry which manages the duty cycle between the radar's transmit and receive functions and triggers (activates) the equipment's transmitter and receiver at the appropriate times.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Duplexer which has the duel function of channelling the transmitter output to the radar's antenna with minimum power loss (see following) and no damage to the receiver and the returning echo signal to the receiver with maximum gain. 

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Receiver (usually of superheterodyne type) which is tuned to the frequency of the radar's transmitted signal and which detects the incoming echo response after it has received Intermediate Frequency (abbreviated to IF) amplification.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of an Antenna comprising, in most cases, a back plane reflector (which can be a solid parabolic dish or a shaped, openwork structure) on which a combined transmit and receive element is mounted. The architecture used is optimised to produce a concentrated beam of outgoing energy. Antenna movement is controlled by a dedicated drive mechanism and energy is fed to the transmit/receive element by either a coaxial cable or a waveguide. A waveguide is a hollow transmission line and can, in some cases, be used as an antenna in its own right (see following). While most modern emitters use dual function transmit/receive elements, the two can be separated with the resultant system being known as a bistatic radar.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Signal Processor which extracts the required data from the received echo signal.

A mechanically scanning (use of a rotating antenna) pulse radar is comprised of a Display Unit which provides the radar operator with a visual presentation of the data generated by the signal processor. 

The concept of the radar beam is something of a misnomer as imperfections in antenna manufacture mean that alongside the desired main beam, a number of unwanted sidelobes are produced at angles offset from the main illumination. For a reflector antenna (of the type described previously), these effects typically start at about -40 dB near the main beam and fall away as the angular separation increases. Radar designers make strenuous attempts to minimise sidelobes as clutter and jamming returns collected by them can mask the wanted main beam signal. Antenna design is also important in minimising the width of the main beam being generated in order to maximise antenna gain. Gain can be defined as a measure of ability to increase the magnitude of a given electrical input parameter against a theoretical perfect system and is measured in dBs. In simpler terms, gain defines an antenna's ability to maximise main beam output in transmit mode and received signal strength in receive mode.

Radar antenna design is a broad church which, alongside the already noted reflector type, includes approaches such as the Vagi, slotted waveguide, inverted cassegrain and phased-array. Taking these in order, a Vagi antenna (named after its Japanese inventor, Professor Vagi) comprises (from the rear forwards) a reflector, a dipole transmit/ receive element and a series of horizontally aligned directors. The distance between the dipole and reflector is equal to approximately half the wavelength of the radar's output and, the greater the number of directors used, the better the antenna's directivity. A current application of the Vagi antenna is in the US Navy's AN/APS-130/-139/-145 series of Airborne EarlyWarning (abbreviated to AEW) radars. A slotted waveguide antenna takes the form of a folded waveguide into which slots are cut and arranged in such a way as to produce a highly directional main beam output. An example of a slotted waveguide antenna application is the array used with the AN/APY-1/2 radar which is installed in the Boeing E-3 Sentry Airbome Waming And Control System (abbreviated to AWACS) aircraft.

In the inverted cassegrain design, the radar's output is transmitted from an element mounted in a parabolic reflector into a second reflector mounted in front of it.This bounces the energy back into the main reflector where it is reflected forwards. At this point, the energy's polarity (linear or circular) is switched through 90° which makes the forward reflector transparent to it and allows it to pass through it into space. Received energy goes through the same process but in reverse. Although complex, the inverted cassegrain antenna has the advantage of good sidelobe performance (and, therefore, high resistance to jamming and clutter) in a relatively small and lightweight package. An example of the use of such an antenna is in the GEC-Marconi Radar and Defence Systems' Foxhunter radar which is fitted to the Tornado F Mk 3 interceptor. 

A phased-array antenna uses a flat plane planar arrangement of rows and columns of equally spaced radiating elements. Each radiating element offers a similar output to its neightbours and is designed in such a way as to avoid two elements coupling their outputs. The width of the main beam produced by such a system depends on both the spacing of the individual elements within the array (typically, half the wavelength value of the radar's output) and an inverse relationship between beamwidth and the number of elements used (that is, the greater the number of elements, the narrower the beam produced). Beam steering is facilitated through the use of what are termed phase shifters. When the phase shifter controlling an individual element or group of elements is set at zero, the output beam is transmitted at right angles to the element or elements. Altering the shifter settings in a particular pattern across the array allows the beam to be 'steered' in the direction required without mechanical movement of the antenna plane. In practice, 'steerability' in phased arrays is restricted to approximately ±60° and requires some mechanical array movement to provide all angle coverage.

One application of this type of technology is used to create what is known as a passive multifunction radar. Here, the equipment has a similar function to a conventional surveillance radar but with the mechanical scanned reflector antenna replaced by a planar phased-array. Equally, the equipment can be made to provide coverage in three dimensions (abbreviated to 3-D and representing the ability to deduce target range, bearing and elevation; a two-dimensional or 2-D radar can only provide range and bearing data) by a judicial mix of electronic beam-steering and mechanical movement. Such a capability does away with the need for a supporting height-finding radar but is generally not accurate enough in elevation to act as a fire-control system. Again, the use of electronic beam-steering allows the radar to perform surveillance and multiple target tracking tasks in time sequence within a single unit.

Moving a stage further, an active array multifunction radar replaces the common transmitter chain which feeds the radiating elements in the described passive equipment with several thousand individual duplexed transmit/receive modules. Each of these independent units takes the form of a solid-state Microwave Monolithic Integrated Circuitry (abbreviated to MMIC) which utilises semiconductor material such as Gallium Arsenide (chemical symbol GaAs) and generates a typical output of around 10 W. Use of such units virtually eliminates loss of microwave energy and further advantage can be gained from applying digital adaptive beam-forming techniques to groups of modules within the array. Using this technique, each sub-array of perhaps 100 transmit/receive units, can be weighted in amplitude (the peak value of an alternating entity such as a radar pulse in both the positive and negative directions) and phase of the described transmit/receive units, the active array and passive multifunction radars function in a similar manner.

Alongside the traditional form of technology already discussed, the reader will encounter a number of other manifestations which require explanation. Among these, four will probably be encountered most frequently, namely Synthetic Aperture, MovingTarget Indicator, Over-TheHorizon Backscatter (abbreviated to SAR, MTI and OTH-B respectively) and Monopulse radars. Taking these in order, an SAR radar is an airborne system which utilises the flight path ofthe aircraft to simulate an extremely large antenna or aperture. Over time, individual transmit/receive cycles are completed with the data from each cycle being stored electronically. After a given number of cycles, the stored data is recombined (taking into account the Doppler effects (see following) inherent in the differing transmitter to target geometry experienced in each succeeding cycle) to create a high-resolution (that is, a measure of the radar's ability to discern objects of a given size) picture of the terrain being overflown. Using such a technique, radar designers are able to achieve resolutions which would require real aperture antennas so large as to be impractical with arrays ranging in size from the 7 m long unit used in the American Joint Surveillance Target Attack Radar System (abbreviated to Joint STARS) to modified fighter radars. SAR radar is partnered by what is termed Inverse SAR (abbreviated to ISAR) technology which in the broadest terms, utilises the movement of the target rather than the emitter to create the synthetic aperture. ISAR radars have a significant role aboard maritime patrol aircraft to provide them with radar imagery of a sufficient quality to allow it to be used for target recognition purposes.

Moving target detection is, as its name suggests, the domain ofthe MTI system which relies for its effect on the Doppler shift in frequency which occurs when transmitted radar energy is reflected by a moving target. The Doppler effect is best illustrated by the analogy of the changing pitch of a railway engine (locomotive) whistle as heard by an observer standing by the track. As the engine approaches, the pitch of the whistle appears to be higher than it actually is, correct when it is alongside the observer and lower when it has passed by. Equally, the greater the speed of the engine, the greater is the shift in the pitch of the whistle. In RF terms, the Doppler shift in frequency is directly proportional to the target velocity component towards the radar and the operating frequency of the emitter.

MTI function is essentially that of a filter which removes unwanted low-velocity components of clutter and passes only those returns coming from a moving target. The sensitivity of the technique is a direct function of the radar's pulse repetition frequency (abbreviated to PRF), that is, the number of pulses occurring in a second. The higher the PRF value, the better is the radar able to reject clutter signals. Within MTI technology, there is a point at which target speed induces a Doppler effect which produces a 360° phase change which prevents the target echo being separated from its clutter background. Known as blind velocity, the problem can be overcome by using staggered PRFs where the blind velocity value is different for each transmitted pulse. While some PRF values will still be 'blinded', others will not, allowing the MTI function to continue. Equally, where the target being tracked has a low RCS and generates a minimal Doppler effect, a range ambiguous system can be employed which uses a high pulse on target rate but where the time interval between the transmission of each pulse (known as the pulse repetition interval and abbreviated to PRi) is less than the radar time delay (the time for a pulse to reach the target and return to the radar receiver) of the particular target. It is also worth noting here that the length of time the transmitter is switched on to produce a single pulse is known as the pulse-width (abbreviated to PW and measured in microseconds). The described MTI function can also be taken as being almost synonymous with what are termed pulse Doppler radars which use the clutter rejection/ target velocity detection capabilities achievable through Doppler filtering to create 'Iook-down/shoot-down' systems for interceptor aircraft. 

OTH-B radars operate in what is known as the 3 to 30 MHz high frequency (abbreviated to HF) portion ofthe RF spectrum and make use of the fact that the ionosphere reflects signals transmitted within this part of the spectrum rather than allowing them to pass through and be dissipated in space. Accordingly, such radars are able to detect objects at much longer ranges (perhaps in the order of 2,000 km) than is possible with a microwave radar.To date, OTH-B radars have been deployed or developed by Australia, the USA and Russia and usually take the form of separate transmit and receive arrays located at different sites. While the ability to detect objects at such distances is obviously of considerable value, OTH-B technology is difficult to manage. A key element here is the frequency with which the ionosphere's reflectivity changes and its vulnerability to interference by naturally occurring auroral activity. A second approach to this type of technology utilises the ground wave effect whereby an HF band RF frequency signal can attach itself to a conductive surface such as the sea and provide an over-water detection capability at ranges of up to 350 to 400 km. Such technology has been developed in Canada and the UK and can be termed HF Ground Wave Radar (abbreviated to HF-GWR) or OTH Surface Wave (abbreviated to OTH-SW) radar. It should also be noted that the described OTH technologies should not be confused with OTH targeting where a conventional airborne radar is used to extend the radar horizon of a surface platform beyond that of its onboard sensors. As microwave RF energy (with the exceptions already noted) travels in straight lines, detection range is restricted by the curvature of the earth's surface. Overcoming this requires the elevation of the emitter above the earth's surface, with the increase in detection range being directly proportional to the radar's altitude.

The remaining emitter type cited - monopulse radar - is widely used in precision tracking systems and is currently held to be a key electronic warfare target. Such a system uses a main beam output which is divided into four segments each of which has a different polarity. Target tracking is achieved by comparing the amplitude or phase of the echo signal as perceived by each of the four segments within the transmission beam. Being a complex technology, radar design requires consideration of a wide range of factors if the end product is to be effective in its role. Considerations (in no particular order of importance) include Operating Frequency, Transmitter Power, Waveform, PRF, Bandwidth, Scan Pattern.
 
Operating Frequency: Factors here include increased susceptibility to clutter interference, limited detection range and a falloff in transmitter efficiency at the high end of the spectrum as against poor angular resolution and large antenna size at the lower end. As a rough guide, current land-based and shipborne air surveillance radars operate across the B- through G-band section of the spectrum (see Table One for band definitions); ground-based battlefield surveillance and fire-control radars in the B- through M-bands; naval fire-control and navigation radars in the G- through J-bands (with an emphasis on I-band); airborne surveillance radars in the B- through F-band and airborne fire-control radars in the I/J-band. It is also worth noting the increasing use of frequency-agile radars which are able to switch frequencies on a pulse-by-pulse or batch-of-pulses by batch-of-pulses basis as a counter jamming measure.

Transmitter Power: Available transmitter power is a major factor in a radar's aperture product value (that is, the amount of power radiated multiplied by the effective aperture ofthe antenna) which is a prime indicator ofthe emitter's potential target detection performance. As a rough rule of thumb, the smaller the target and the smaller the emitter's antenna, the higher the power level needed to maintain performance. By way of example, a mediumrange missile system radar might be able to detect a 1 m2 target with a mean (average) radiated power of 20 W. When target size is reduced to 0.001 m2, this value would have to rise to 20 kW to maintain the same level of detection capability. 

Waveform: Selection of an appropriate waveform is crucial in achieving radar performance. By way of example, a pulse Doppler radar, while excellent as a means of extracting a target from a clutter background, cannot resolve range unambiguously because of the high PRF values used (see previously). One way round this is to code or compress the pulses (by modulating their frequency or phase) so that those pulses forming the echo signal can be readily distinguished from those forming the transmitter output. Equally, multimode radars require different waveforms for different tasks and skilful manipulation of the waveform is a major aid to countering jamming. Aside from pulse compression/ coding and the already noted CW format, other examples of currently used waveforms include jitter (a short duration instability in either the amplitude or phase of the signal), coherent (a waveform in which all the pulses have a stable phase relationship to one another) and non-coherent (the reverse of coherent). 

PRF: PRF is a major factor in determining a radar's detection range and ability to detect targets within clutter. Accordingly, a radar with a low PRF value provides unambiguous range data but is highly susceptible to clutter interference, while an emitter with a high PRF value provides unambiguous velocity information but second order range resolution with inherent inaccuracies. A medium PRF value provides some of the advantages of both the high and low values but requires high-order processing to resolve its inherent ambiguities. 

Bandwidth: A radar signal is made up of what is termed a carrier wave on to which the desired characteristics are imposed. Accordingly, because the signal is made up of a number of components, it actually contains a range of frequencies grouped around that of the baseline carrier wave. The extent of this range is termed the radar's bandwidth. An example of the impact of bandwidth is the spread-spectrum radar which uses an ultra-wide bandwidth (so wide in fact that it frequently appears to be electronic noise rather than a modulated waveform) to minimise detection by a hostile radar warning receiver. Radars which incorporate features such as spreadspectrum, pulse compression or low-output power levels to minimise detectability are termed Low Probability of Intercept (abbreviated to LPI) emitters.

Scan Pattern: In non-electronically scanning radars, a range of patterns of antenna movement are used to maximise the suitability of the particular radar for its purpose. Commonly used scan patterns include: Circular, Conical, Conical Scan On Receive Only, Frescan, Helical, Lobe On Receive Only, Palmer, Raster, Spiral, Track-While-Scan, Track-While-Scan On Receive Only, V-Beam.

Circular Scan Pattern, in which the antenna rotates through 360° in azimuth continuously. 

Conical Scan Pattern, in which the antenna traces a cone pattern around its central axis. Used in tracking radars with target azimuth and elevation being taken from the mechanical position of the antenna. 

Conical Scan On Receive Only Scan Pattern, in which a conical scan pattern is used while the radar is in receive mode only. Abbreviated to COSRO. 

Frescan Scan Pattern, in which the pattern is produced by successive beams (each on a different bearing) which are made to overlap by stepped changes in frequency. 

Helical Scan Pattern, in which the antenna's movement takes the form of a rising and falling helix around the antenna boresight. 

Lobe On Receive Only Scan Pattern, in which the echo signal is sampled at fou r positions around the antenna boresight while the radar is in receive mode only. Abbreviated to LORO. 

Palmer Scan Pattern, in which a circular pattern of movement is imposed on another scan type such as conical or raster. 

Raster Scan Pattern, in which the antenna follows a continuous rectangular pattern of movement which expands and contracts to give area coverage. Used primarily for target acquisition in air-to-air applications. 

Spiral Scan Pattern, in which the antenna follows a spiral pattern in the vertical plane. This approach provides range and relative azimuth/elevation data and is used primarily for target acquisition. 

Track-While-Scan Scan Pattern, in which two unidirectional sector (specified area) scan patterns provide simultaneous coverage in the vertical and horizontal planes. Abbreviated to TWS. 

Track-While-Scan On Receive Only Scan Pattern, in whichTWS is used while the radar is in receive mode only. Abbreviated to TWSRO. 

V-Beam Scan Pattern, in which separate radar transmitters feed back-to-back antennas which produce a vertical fan beam and one which has an inclination of approximately 30°. Target elevation is deduced by comparing the differences in echo response experienced by the two antennas. Scan patterns for the individual beams may be circular or over a defined sector.

As a final point in the introductory survey, readers should be aware of Identification Friend-or-Foe (abbreviated to IFF) or Secondary Surveillance Radars (abbreviated to SSR). While radars can locate targets in space, they are, for the most part (see note on ISAR systems) unable to distinguish between friendly or hostile contacts. This is achieved by using an IFF/SSR subsystem which transmits an interrogator signal which activates a transponder (a combined transmit/receive device which automatically broadcasts a signal when it receives an appropriate trigger signal) aboard the friendly vehicle. The IFF output is coded in some way and shows up as a distinct and recognisable response on either the radar's main display or a co-located dedicated presentation unit. To complete the survey, an explanation of the American 'AN' system of electronic equipment alphanumeric identifiers is given in Table Three."""

In [3]:
import pandas as pd

jane_data = pd.read_excel("jane_radar.xlsx")

jane_data

Unnamed: 0,MetaData,Chunk text
0,Chapter Two: RADAR SYSTEMS; Subchapter: Introd...,Note The following overview of radar is intend...
1,Chapter Two: RADAR SYSTEMS; Subchapter: Introd...,Radar (standing for RAdio Direction And Rangin...
2,Chapter Two: RADAR SYSTEMS; Subchapter: Introd...,A small proportion of this energy is reflecte...
3,Chapter Two: RADAR SYSTEMS; Subchapter: Introd...,It should also be noted that the microwave seg...
4,Chapter Two: RADAR SYSTEMS; Subchapter: Introd...,"Stepping back a stage, frequency may be define..."
...,...,...
138,LAND-BASED AIR DEFENCE RADARS; Czech Republic;...,System and Upgrade:\nSystem: 1S91 Surn fire-co...
139,LAND-BASED AIR DEFENCE RADARS; Czech Republic;...,Upgrade Components:\nDigital signal processing...
140,LAND-BASED AIR DEFENCE RADARS; Czech Republic;...,Digital Signal Processing Capabilities: \nData...
141,LAND-BASED AIR DEFENCE RADARS; Czech Republic;...,Display and Communication:\nReplaced analogue ...


In [13]:
from langchain_core.documents.base import Document

pdf_data = []
for _, chunk in jane_data.iterrows():
    temp_doc = Document(page_content=f"{chunk['MetaData']} {chunk['Chunk text']}")
    temp_doc.metadata = {"metadata":chunk["MetaData"]}
    pdf_data.append(temp_doc)

In [14]:
# vectorstore.delete_collection()
vectorstore = Chroma.from_documents(documents=pdf_data, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

In [27]:
retriever.invoke("چه الگوهایی برای اسکن کردن وجود دارد؟")

[Document(page_content='Chapter Two: RADAR SYSTEMS; Subchapter: Introduction Raster Scan Pattern, in which the antenna follows a continuous rectangular pattern of movement which expands and contracts to give area coverage. Used primarily for target acquisition in air-to-air applications.', metadata={'metadata': 'Chapter Two: RADAR SYSTEMS; Subchapter: Introduction'}),
 Document(page_content='Chapter Two: RADAR SYSTEMS; Subchapter: Introduction Spiral Scan Pattern, in which the antenna follows a spiral pattern in the vertical plane. This approach provides range and relative azimuth/elevation data and is used primarily for target acquisition.', metadata={'metadata': 'Chapter Two: RADAR SYSTEMS; Subchapter: Introduction'}),
 Document(page_content='Chapter Two: RADAR SYSTEMS; Subchapter: Introduction Scan Pattern: In non-electronically scanning radars, a range of patterns of antenna movement are used to maximise the suitability of the particular radar for its purpose. Commonly used scan pa