In [None]:
!pip install gensim



In [84]:
import gensim
import numpy as np
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import Word2Vec


In [None]:
import gensim.downloader as api

wv = api.load('word2vec-google-news-300')



Finding analogies

In [71]:
def calculate_average_vector(paragraph, wv):

  """Calculates the average Word2Vec vector for a paragraph.

  Args:
    paragraph: The paragraph to process.
    wv: The Word2Vec model.

  Returns:
    The average Word2Vec vector for the paragraph.
  """

  sent_vectors = []
  for sentence in paragraph.split('.'):
    sent_vector = np.zeros((300,))
    count = 0
    for word in sentence.split():
      if word in wv:
        count += 1
        sent_vector += wv[word]
    if count > 0:
      sent_vectors.append(sent_vector / count)

  if sent_vectors:
    return sum(sent_vectors) / len(sent_vectors)
  else:
    return None  # Handle cases where no words in the paragraph are found in the vocabulary

In [85]:
# Create a list of paragraphs

paragraphs = [

"Classical physics is a branch of physics that describes the behavior of matter and energy on a macroscopic scale. It encompasses the laws of motion, gravity, electricity, magnetism, and thermodynamics. These laws, formulated by scientists like Newton, Einstein, and Maxwell, provide a framework for understanding the world around us, from the motion of planets to the flow of electricity. While classical physics is highly successful in explaining many phenomena, it breaks down at the quantum level, where particles exhibit behaviors that cannot be described by classical mechanics.",
"Quantum physics is a branch of physics that deals with the behavior of matter and energy at the atomic and subatomic level. It introduces concepts that defy classical intuition, such as wave-particle duality, superposition, and quantum entanglement. These principles explain phenomena like the photoelectric effect, black-body radiation, and the behavior of electrons in atoms. Quantum physics has led to groundbreaking technologies like lasers, transistors, and quantum computers, revolutionizing our understanding of the universe and its applications.",
"Nietzsche's On the Genealogy of Morals is a philosophical exploration of the origins and development of moral concepts. Nietzsche argues that traditional morality is a product of historical circumstances, power dynamics, and human psychology. He criticizes the prevailing moral systems as oppressive and harmful, rooted in concepts like guilt, resentment, and herd mentality. Instead, Nietzsche proposes a new morality based on self-overcoming, creativity, and the affirmation of life. The book delves into the historical roots of morality, tracing its evolution from the slave morality of the weak to the master morality of the strong. Nietzsche ultimately challenges readers to question the prevailing moral values and forge their own paths to meaning and fulfillment.",
"Albert Camus's The Myth of Sisyphus is a philosophical essay that explores the absurdity of human existence. The central figure, Sisyphus, is condemned to eternally roll a boulder up a hill, only for it to roll back down, forcing him to repeat the task endlessly. Camus argues that this futile labor mirrors the human condition, filled with meaningless tasks and the inevitable absurdity of death. However, he suggests that Sisyphus can find meaning and defiance in his suffering by acknowledging the absurdity of his situation and choosing to rebel against it. Camus proposes that the essence of human existence lies in this constant struggle against the absurd, and it is through this defiance that we can find a sense of purpose and dignity.",
"Football is a team sport played between two teams of eleven players on a rectangular field. The objective of the game is to score more goals than the opposing team by kicking the ball into the net. Players use their feet, head, and chest to control the ball, passing it to teammates and attempting to dribble past opponents. The game is known for its physicality, strategy, and teamwork, making it one of the most popular sports worldwide.",
"Cricket is a bat-and-ball game played between two teams of eleven players on a field with a 22-yard (20-meter) pitch at the center. The objective is to score more runs than the opposing team. One team bats while the other fields. The batting team tries to hit the ball bowled by the fielding team with the bat, scoring runs by running between wickets or hitting the ball to the boundary. The fielding team tries to dismiss the batting team by catching the ball or bowling the batsman out. The game is often played over multiple days, with each team taking turns to bat and field.",
# feel free to add more paragraphs here
]


In [86]:
# Calculate average Word2Vec vectors for all paragraphs

paragraph_vectors = [calculate_average_vector(paragraph, wv) for paragraph in paragraphs]
print(len(paragraph_vectors))
print(paragraph_vectors[0].shape)
print(paragraph_vectors[0])


6
(300,)
[ 9.61197102e-03  1.09775772e-02  1.18761485e-01  3.30625704e-02
 -1.03038250e-01 -3.17664966e-03  1.01022409e-01 -1.32280054e-01
  4.62385929e-02  5.17516366e-02 -4.31814896e-02 -7.51176048e-02
 -3.75535979e-02  2.41843864e-02 -1.05576640e-01  1.11946612e-01
  2.98005915e-02  1.04677806e-01  1.82065276e-02 -4.58878418e-02
 -3.19171414e-02  6.40440853e-03 -2.72235082e-02  1.39520609e-02
  2.98616525e-02 -5.69975820e-02 -7.35180673e-02  3.85824297e-02
 -3.62074102e-02 -2.87001063e-02 -4.41128512e-02 -9.82893242e-02
 -1.96955681e-02  4.94313733e-02  1.49613634e-02  6.48684129e-03
 -2.49305670e-02  5.36598086e-03  8.37838133e-02  7.65732840e-02
  7.42497816e-02  3.02027114e-02  2.80729048e-02  6.91543014e-02
 -2.04754999e-02 -1.68439310e-02 -3.33419843e-02 -8.32023819e-03
 -2.08962704e-02  1.56743440e-02 -4.21941926e-02 -6.89315002e-03
 -2.68291864e-02 -5.13979238e-02  6.19067614e-02  9.48549360e-02
 -1.93802690e-02 -1.02461578e-01  3.92619233e-03 -1.12163644e-01
 -4.78616435e-02

In [90]:
# Function to find the most similar paragraphs to a query

def find_similar_paragraphs(query, paragraph_vectors, top_n=3):
    query_vector = calculate_average_vector(query, wv)
    similarities = cosine_similarity([query_vector], paragraph_vectors)[0]
    top_indices = similarities.argsort()[-top_n:][::-1]
    return [paragraphs[i] for i in top_indices]

In [92]:
# Most  Similar paragraphs to the query below

query = "what is quantum physics ?"
similar_paragraphs = find_similar_paragraphs(query, paragraph_vectors, 4)
print(similar_paragraphs)

['Quantum physics is a branch of physics that deals with the behavior of matter and energy at the atomic and subatomic level. It introduces concepts that defy classical intuition, such as wave-particle duality, superposition, and quantum entanglement. These principles explain phenomena like the photoelectric effect, black-body radiation, and the behavior of electrons in atoms. Quantum physics has led to groundbreaking technologies like lasers, transistors, and quantum computers, revolutionizing our understanding of the universe and its applications.', 'Classical physics is a branch of physics that describes the behavior of matter and energy on a macroscopic scale. It encompasses the laws of motion, gravity, electricity, magnetism, and thermodynamics. These laws, formulated by scientists like Newton, Einstein, and Maxwell, provide a framework for understanding the world around us, from the motion of planets to the flow of electricity. While classical physics is highly successful in expl

In [95]:
# Most Similar paragraphs to the query below

query = "Explain physics in general ?"
similar_paragraphs = find_similar_paragraphs(query, paragraph_vectors, 2)
print(similar_paragraphs)

['Quantum physics is a branch of physics that deals with the behavior of matter and energy at the atomic and subatomic level. It introduces concepts that defy classical intuition, such as wave-particle duality, superposition, and quantum entanglement. These principles explain phenomena like the photoelectric effect, black-body radiation, and the behavior of electrons in atoms. Quantum physics has led to groundbreaking technologies like lasers, transistors, and quantum computers, revolutionizing our understanding of the universe and its applications.', 'Classical physics is a branch of physics that describes the behavior of matter and energy on a macroscopic scale. It encompasses the laws of motion, gravity, electricity, magnetism, and thermodynamics. These laws, formulated by scientists like Newton, Einstein, and Maxwell, provide a framework for understanding the world around us, from the motion of planets to the flow of electricity. While classical physics is highly successful in expl

In [97]:
# Most Similar paragraphs to the query below

query = "Explain sports in general ?"
similar_paragraphs = find_similar_paragraphs(query, paragraph_vectors, 1)
print(similar_paragraphs)

['Football is a team sport played between two teams of eleven players on a rectangular field. The objective of the game is to score more goals than the opposing team by kicking the ball into the net. Players use their feet, head, and chest to control the ball, passing it to teammates and attempting to dribble past opponents. The game is known for its physicality, strategy, and teamwork, making it one of the most popular sports worldwide.']


In [100]:
# Most Similar paragraphs to the query below

query = "Explain philosophy in general ?"
similar_paragraphs = find_similar_paragraphs(query, paragraph_vectors, 1)
print(similar_paragraphs)

["Nietzsche's On the Genealogy of Morals is a philosophical exploration of the origins and development of moral concepts. Nietzsche argues that traditional morality is a product of historical circumstances, power dynamics, and human psychology. He criticizes the prevailing moral systems as oppressive and harmful, rooted in concepts like guilt, resentment, and herd mentality. Instead, Nietzsche proposes a new morality based on self-overcoming, creativity, and the affirmation of life. The book delves into the historical roots of morality, tracing its evolution from the slave morality of the weak to the master morality of the strong. Nietzsche ultimately challenges readers to question the prevailing moral values and forge their own paths to meaning and fulfillment."]


In [103]:
# Most Similar paragraphs to the query below

query = "Explain the Myth of Sysiphus ?"
similar_paragraphs = find_similar_paragraphs(query, paragraph_vectors, 1)
print(similar_paragraphs)

["Albert Camus's The Myth of Sisyphus is a philosophical essay that explores the absurdity of human existence. The central figure, Sisyphus, is condemned to eternally roll a boulder up a hill, only for it to roll back down, forcing him to repeat the task endlessly. Camus argues that this futile labor mirrors the human condition, filled with meaningless tasks and the inevitable absurdity of death. However, he suggests that Sisyphus can find meaning and defiance in his suffering by acknowledging the absurdity of his situation and choosing to rebel against it. Camus proposes that the essence of human existence lies in this constant struggle against the absurd, and it is through this defiance that we can find a sense of purpose and dignity."]


In [105]:
# Most Similar paragraphs to the query below

query = "Explain cricket ?"
similar_paragraphs = find_similar_paragraphs(query, paragraph_vectors, 1)
print(similar_paragraphs)

['Cricket is a bat-and-ball game played between two teams of eleven players on a field with a 22-yard (20-meter) pitch at the center. The objective is to score more runs than the opposing team. One team bats while the other fields. The batting team tries to hit the ball bowled by the fielding team with the bat, scoring runs by running between wickets or hitting the ball to the boundary. The fielding team tries to dismiss the batting team by catching the ball or bowling the batsman out. The game is often played over multiple days, with each team taking turns to bat and field.']
