# 02 Make Embeddings

## Imports and setup

In [2]:
%matplotlib inline

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pickle, textwrap

from sentence_transformers import SentenceTransformer, util

In [35]:
pd.options.display.max_colwidth = 400

In [3]:
SEED = 2023
random.seed(SEED)

## Load data

In [38]:
df = (pd
      .read_csv('data/manufacturing_articles.csv')
      .loc[:,['title', 'summary']]
     )

display(df.shape)
display(df.sample(2))

(527, 2)

Unnamed: 0,title,summary
43,Welding power supply,A welding power supply is a device that provides or modulates an electric current to perform arc welding. There are multiple arc welding processes in common use ranging from relatively simple Shielded Metal Arc Welding (SMAW) to more complicated welding processes using inert shielding gas like Gas metal arc welding (GMAW) or Gas tungsten arc welding (GTAW). Welding power supplies primarily ser...
26,Laser-hybrid welding,"Laser-hybrid welding is a type of welding process that combines the principles of laser beam welding and arc welding.The combination of laser light and an electrical arc into an amalgamated welding process has existed since the 1970s, but has only recently been used in industrial applications. There are three main types of hybrid welding process, depending on the arc used: TIG, plasma arc or M..."


## Calculate article embeddings

In [24]:
vectoriser = SentenceTransformer("all-MiniLM-L12-v2")

vectoriser.max_seq_length = 512

print('Start calculating article embeddings...')
embeddings = vectoriser.encode(df['summary'].tolist(), show_progress_bar=True)
print(f'Completed! Embeddings shape:{embeddings.shape}')

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/573 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/134M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/352 [00:00<?, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading: 0.00B [00:00, ?B/s]

Downloading:   0%|          | 0.00/349 [00:00<?, ?B/s]

Start calculating article embeddings...


Batches:   0%|          | 0/17 [00:00<?, ?it/s]

Complated! Embeddings shape:(527, 384)


In [25]:
# Serialize embeddings and save (=pickle)
with open('data/embeddings.pickle', 'wb') as pickle_file:
    pickle.dump(embeddings, pickle_file)

## Semantic seach

In [53]:
def semantic_search(
    unseen_process_description: str,
    n_results: int,
    article_df: pd.DataFrame,
    article_embeddings: np.array,
) -> None:
    """
    Perform semantic search to find the most similar processes to an unseen description.

    Args:
        unseen_process_description (str): The description of the unseen process.
        n_results (int): The number of most similar processes to retrieve.
        article_df (pd.DataFrame): The DataFrame containing the process articles.
        article_embeddings (np.array): The array of embeddings for the process articles.

    Returns:
        None

    Prints the input description of a process and the top `n_results` most similar processes.

    Example:
        semantic_search(
            unseen_process_description="This is a description of a manufacturing process.",
            n_results=5,
            article_df=df,
            article_embeddings=embeddings
        )
    """
    unseen_embedding = vectoriser.encode(unseen_process_description)

    cosine_similarities = util.cos_sim(article_embeddings, unseen_embedding[np.newaxis,:])

    indexes_of_most_simialar_articles = np.argsort(cosine_similarities.flatten())[-3:]

    print('INPUT DESCRIPTION OF A PROCESS\n', unseen_process_description)
    print(f'TOP {n_results} MOST SIMILAR PROCESSES\n')
    display(df.iloc[indexes_of_most_simialar_articles, :])


unseen_process_description = textwrap.dedent("""
    Finishing operation of high-precision holes performed with 
    a multi-edge tool. High surface finish, superb hole quality, 
    and close dimensional tolerance are achieved at high 
    penetration rates and small depths of cut.
""")
semantic_search(description, 3, df, embeddings)

INPUT DESCRIPTION OF A PROCESS
 
Finishing operation of high-precision holes performed with 
a multi-edge tool. High surface finish, superb hole quality, 
and close dimensional tolerance are achieved at high 
penetration rates and small depths of cut.

TOP 3 MOST SIMILAR PROCESSES



Unnamed: 0,title,summary
141,Laser drilling,"Laser drilling is the process of creating thru-holes, referred to as “popped” holes or “percussion drilled” holes, by repeatedly pulsing focused laser energy on a material. The diameter of these holes can be as small as 0.002” (~50 μm). If larger holes are required, the laser is moved around the circumference of the “popped” hole until the desired diameter is created.\n\n\n\nApplications\nLase..."
87,Blanking and piercing,"Blanking and piercing are shearing processes in which a punch and die are used to produce parts from coil or sheet stock. Blanking produces the outside features of the component, while piercing produces internal holes or shapes. The web is created after multiple components have been produced and is considered scrap material. The ""slugs"" produced by piercing internal features are also considere..."
118,Drilling,"Drilling is a cutting process where a drill bit is spun to cut a hole of circular cross-section in solid materials. The drill bit is usually a rotary cutting tool, often multi-point. The bit is pressed against the work-piece and rotated at rates from hundreds to thousands of revolutions per minute. This forces the cutting edge against the work-piece, cutting off chips (swarf) from the hole as ..."


In [54]:
unseen_process_description = textwrap.dedent("""
    Melting two metals together so they can bond. 
    The connection is strong. There are shiny beads of metal on the side.
""")
semantic_search(unseen_process_description, 3, df, embeddings)

INPUT DESCRIPTION OF A PROCESS
 
Melting two metals together so they can bond. 
The connection is strong. There are shiny beads of metal on the side.

TOP 3 MOST SIMILAR PROCESSES



Unnamed: 0,title,summary
31,Cladding (metalworking),Cladding is the bonding together of dissimilar metals. It is different from fusion welding or gluing as a method to fasten the metals together. Cladding is often achieved by extruding two metals through a die as well as pressing or rolling sheets together under high pressure.\nThe United States Mint uses cladding to manufacture coins from different metals. This allows a cheaper metal to be use...
41,Filler metal,"A filler metal is a metal added in the making of a joint through welding, brazing, or soldering.\n\n\n\nSoldering\nSoldering and brazing processes rely on a filler metal added to the joint to form the junction between the base metal parts. Soft soldering uses a filler that melts at a lower temperature than the workpiece, often a lead-tin solder alloy. Brazing and hard soldering use a higher te..."
13,Cold welding,"Cold welding or contact welding is a solid-state welding process in which joining takes place without fusion or heating at the interface of the two parts to be welded. Unlike in fusion welding, no liquid or molten phase is present in the joint.\nCold welding was first recognized as a general materials phenomenon in the 1940s. It was then discovered that two clean, flat surfaces of similar meta..."


In [55]:
unseen_process_description = textwrap.dedent("""
    I want to produce small metal part with complex geometry. 
    I dont like to use cutting processes. 
    Cost needs to be low. The volume is high.
""")
semantic_search(unseen_process_description, 3, df, embeddings)

INPUT DESCRIPTION OF A PROCESS
 
I want to produce small metal part with complex geometry. 
I dont like to use cutting processes. 
Cost needs to be low. The volume is high.

TOP 3 MOST SIMILAR PROCESSES



Unnamed: 0,title,summary
97,Metal spinning,"Metal spinning, also known as spin forming or spinning or metal turning most commonly, is a metalworking process by which a disc or tube of metal is rotated at high speed and formed into an axially symmetric part. Spinning can be performed by hand or by a CNC lathe.\nThe metal spinning trade is one that dates back to antiquity and was a skill used in the Ancient Egyptian era. This is when meta..."
33,Metalworking,"Metalworking is the process of shaping and reshaping metals to create useful objects, parts, assemblies, and large scale structures. As a term it covers a wide and diverse range of processes, skills, and tools for producing objects on every scale: from huge ships, buildings, and bridges down to precise engine parts and delicate jewelry.\nThe historical roots of metalworking predate recorded hi..."
48,Metal fabrication,"Metal fabrication is the creation of metal structures by cutting, bending and assembling processes. It is a value-added process involving the creation of machines, parts, and structures from various raw materials. \nTypically, a fabrication shop bids on a job, usually based on engineering drawings, and if awarded the contract, builds the product. Large fab shops employ a multitude of value-add..."
