# Setup and Load dataset



In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Tue Feb 14 06:48:05 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   30C    P0    51W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [11]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


In [2]:

!pip install faiss-gpu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [2]:
#!pip install transformers

In [3]:
!pip install sentence_transformers 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<5.0.0,>=4.6.0
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m89.5 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m75.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.4.0
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3

In [4]:
!pip install --upgrade --no-cache-dir gdown

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gdown
  Downloading gdown-4.6.3-py3-none-any.whl (14 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 4.4.0
    Uninstalling gdown-4.4.0:
      Successfully uninstalled gdown-4.4.0
Successfully installed gdown-4.6.3


In [19]:
!gdown --id 1_ila0qFpe28YFFp6-drABuq-eHutzfOz

Downloading...
From: https://drive.google.com/uc?id=1_ila0qFpe28YFFp6-drABuq-eHutzfOz
To: /content/wiki_movie.zip
100% 31.4M/31.4M [00:00<00:00, 60.8MB/s]


In [20]:
!unzip -o "wiki_movie.zip"  -d  "/content"

Archive:  wiki_movie.zip
  inflating: /content/wiki_movie_plots_deduped.csv  


## Data Loading

In [21]:
from datetime import datetime
import os
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

from scipy import sparse
from scipy.sparse import csc_matrix
from sklearn.decomposition import TruncatedSVD

np.random.seed(0)

#### Name of the file which contain all the item properties

In [22]:
file="wiki_movie_plots_deduped.csv"

###### Run below cell

In [23]:
content_df  = pd.read_csv( file)

In [24]:
content_df=content_df[content_df['Release Year']>2015]

## Details about dataset

In [25]:
itemid="Title"

In [26]:
features=['Plot']

In [27]:
allcols=[itemid]
for i in features:
  allcols.append(i)

# Setup

In [28]:
content_df['NewTag']=""
for i in features:
  content_df[i] = content_df[i].fillna(' ')
for i in features:
  content_df['NewTag']+=(" "+content_df[i])
content_df['NewTag']=content_df['NewTag'].astype(str)

In [29]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [30]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re

In [31]:
def clean_text(text):
    
    text = text.lower()  # lowercase text
    # replace the matched string with ' '
    text = re.sub( re.compile("\'s"), ' ', text)
    text = re.sub(re.compile("\\r\\n"), ' ', text)
    text = re.sub(re.compile(r"[^\w\s]"), ' ', text)
    return text

In [32]:
stopwords=set(stopwords.words('english'))

In [33]:
def tokenizer(sentence, min_words=4, max_words=200, stopwords=stopwords, lemmatize=True):
    
    if lemmatize:
        stemmer = WordNetLemmatizer()
        tokens = [stemmer.lemmatize(w) for w in word_tokenize(sentence)]
    else:
        tokens = [w for w in word_tokenize(sentence)]
    token = [w for w in tokens if (len(w) > min_words and len(w) < max_words
                                                        and w not in stopwords)]
    return tokens    

In [34]:
content_df['clean'] = content_df['NewTag'].apply(clean_text)
# content_df['token_lem_sentence'] = content_df['clean'].apply(
#         lambda x: tokenizer(x))

# Model train

In [48]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('msmarco-distilbert-base-dot-prod-v3')


Downloading (…)b6d67/.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)/2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

Downloading (…)13d78b6d67/README.md:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

Downloading (…)d78b6d67/config.json:   0%|          | 0.00/554 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/265M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)b6d67/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading (…)13d78b6d67/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)78b6d67/modules.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

In [49]:
import torch
#Select the device

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: DistilBertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Dense({'in_features': 768, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
)

In [50]:
corpus_embeddings = model.encode(content_df.clean.values,show_progress_bar=True, convert_to_numpy=True)

Batches:   0%|          | 0/48 [00:00<?, ?it/s]

# Search with FAISS

In [51]:
import faiss
from sentence_transformers import SentenceTransformer
encoded_data = corpus_embeddings
encoded_data = np.asarray(encoded_data.astype('float32'))
index = faiss.IndexIDMap(faiss.IndexFlatIP(corpus_embeddings.shape[1]))
index.add_with_ids(encoded_data, np.array(range(0, len(content_df))))
faiss.write_index(index, 'item.index')

### fetch details of the document

In [55]:

def search(query, k, index, model):
    qv = model.encode([query])
    top_k = index.search(qv, k)
    top_k_ids = top_k[1].tolist()[0]
    top_k_ids = list(np.unique(top_k_ids))
    results =  [content_df.iloc[idx][itemid] for idx in top_k_ids]
    return results

In [56]:
from pprint import pprint
query="movie about action and drama"
results=search(query, k=10, index=index, model=model)
content_df[content_df[itemid].isin(results)][allcols]

Unnamed: 0,Title,Plot
18037,Dhaka Attack,"The story centers on the Dhaka Metropolitan Police (DMP) Elite Forces' fierce operation against a terrorist organization, the attack is part of a plot by few foreign intelligence agencies to destabilize the country, in order to destroy the country's defense layers.[5] The film stars Arifin Shuvo as the Assistant Deputy commissioner of DMP, and the in-charge of Bomb Disposal Unit. Sumon played the role of a commander of SWAT Force. The film also features Afzal Hossain as the DMP Commissioner, Shatabdi Wadud as Intelligence officer, and Mahiya Mahi as a news reporter.[6][7]"
27538,Sa...,"SA Kannada Movie is a thriller action cinema starring Vijay Suriya , Karthik Jayaram, Samyuktha Hornad, Hemanth Hegde, Directed by Hemanth Hegde. The story takes place in Coorg in the background of a Home stay. A personal conflict in the family turns to be serious when an external intruder enter the family. The story has very unusual touch of terror and thriller. How ever the forte of the filmmaker is his screenplay and it is well depicted in this movie too. It was in Story for the controversial Poster that was set in Bangalore [1]"
27546,Mandya to Mumbai,"The movie begins in Mandya where the Hero leads a happy life with his parents. A shocking incident changes his life. His parents are murdered by an anti-social and Hero is forced to go to prison, where he is tortured. He comes across four criminals in the prison. These hardcore criminals break loose from the prison and help Hero take revenge on his parent's killers. They decide to go to Mumbai where they plan to become big gangsters. But fate has other plans."
27563,Tiger,The movie Tiger is a Kannada action packed thriller film led by the young star Pradeep (protagonist) who dreams to become a police officer which is against the wish of his father played by the well known actor and also a very famous Ex IAS officer K Shivram. Why is the father against the wish and the dream of his son is the crux of the story.
28608,Marupadi,"The film is based on a real life incident which took place in North India and it portrays the story of a family which is permanently chased by the law and power. The film is centered around the family of Aby (Rahman), a bank employee, his spouse Sara (Bhama), an orphan and their teenaged daughter Riya (Baby Nayanthara). While leading a peaceful life, Aby gets a punishment transfer to Kolkata due to some issues in the bank he worked. On their journey to the new residence at Kolkata, the anti-narcotic squad finds some drugs in their car, which led them to jail, where Sara and Riya faces sexual onsets from the police officers and inmates.[3][4] The film points finger against the prevailing social and legal systems which makes the life of innocent people tragic. Moreover, it picturize the pathetic conditions of women and children in the present day society.[5][6]"
28619,Aakashathinum Bhoomikkumidayil,The movie revolves around a village in northern Kerala where political murders happen and in such one incident a guy is murdered mistaking him to be someone else. The families of both are left stranded and the film takes us through the incidents that happen to the children of both families.
28657,Tharangam,"The movie tells about Pappan (Tovino Thomas) a cop along with the struggles led with Joy (Balu Varghese). They land in trouble while investigating a business man's wife Ommana (Neha Iyer). They cross path with a smuggler Siju (Saiju Kurup), a dreaded gangster Raghu (Unni Mukundan) which leads to a chaotic situation."
28805,Aadesh - Power Of Law,Film is a Journey of Public Prosecutor and it is a court drama.
31460,Yaanum Theeyavan,"This movie claims to represent a number of true events that have actually happened. Put together under one shell with imaginative screenplay and characters, the story revolves around 3 primary characters Michael (Ashwin Jerome), Soumya (Varsha) and Pasupathy (Raju Sundaram). The plot forms around how these 3 characters meet and what happens after their meeting. However the complete plot will be known only after the release of the movie."
34058,The Farm: En Veettu Thottathil,"Inspired by the popular tale of Red Riding Hood, this Malaysian Tamil slasher film, which gives a dark twist to the story, tells of the painful journey of a girl. The Farm follows a hearing-impaired girl and her encounter with the various kinds of men in her life; one whose nice intentions hide a darker motive, one with bad intentions initially who later repents, and one who abuses women, which the director portrays as ‘an animal’."


In [57]:
from pprint import pprint
query="a movie about robots"
results=search(query, k=10, index=index, model=model)
content_df[content_df[itemid].isin(results)][allcols]

Unnamed: 0,Title,Plot
17937,Scare Campaign,"Scare Campaign is a hidden camera prank show that has been scaring its targets with old school scares for the last 5 years. Their latest target (John Brumpton) nearly ends up shooting an actress thinking she's a real zombie until the crew reveals to him that it's all on TV. Emma (Meegan Warner), another actress on the show, speaks to her director and ex-boyfriend Marcus (Ian Meadows) about being more careful who they prank in case they end up scaring ""the wrong guy"", but Marcus reassures her. Later that morning, their manager, Vicki (Sigrid Thornton) shows them a web series called Masked Freaks that involve a bunch of costumed people killing other people gruesomely. The contents of this web series threatens Scare Campaign's popularity and so Vicki encourages the team to up the ante to increase their ratings.\r\nHiring a new and young actress named Abby (Olivia DeJonge), Marcus and the team take their next prank to an abandoned psychiatric hospital where they intend to a prank a new groundskeeper, Rohan (Josh Quong Tart). As the prank goes along as planned, Emma starts having second thoughts about it and leaves Rohan alone, where he explores the asylum through many of the team's special effects, despite Emma's pleas to end the prank. When Abby is signalled to surprise him, Rohan stabs her to death with a letter opener, strangles cameraman Tony (Steve Mouzakis) and slits the throat of background actress Suze (Cassandra Magrath). Emma and special effects specialist J.D. (Patrick Harvey) barricade themselves in a room and begs Marcus and camera operator Dick (Jason Geary) to call the police before Rohan breaks in and kills Marcus and Dick, then smiles into the camera. Emma and J.D. make it to the van to escape, but J.D. runs back inside to find the keys. Rohan appears in the van, but Emma stabs his hand with a screwdriver into the car seat and runs inside. It's then revealed that the whole predicament was a prank and Emma was their new stooge. Trent, the actor playing Rohan, berates Marcus and quits.\r\nSuddenly, Masked Freaks appear with weapons equipped to their cameras and kills Trent, revealing themselves as actual serial killers and not actors as the web series made it look. They proceed to murder the other members of Scare Campaign, while Marcus runs back inside after finding Trent dead. He tries to warn Emma, who yells at him and ignores his warning after discovering she was the real stooge of their prank. She finally believes him and they both run inside and find Suze's body and witness Tony getting killed. Masked Freaks hack into Scare Campaign's computer system and reveal they do what they do for the new generation of online entertainment. They then bury Abby alive and give Marcus and Emma five minutes to save her before she runs out of oxygen. Marcus and Emma run out to save Abby and end up killing one of the Masked Freaks, who is revealed to be merely a teenager. After saving Abby, the three are surrounded by the Masked Freaks and their boss tells Emma she may leave with either Marcus or Abby. She chooses Abby and kisses Marcus before leaving with Abby. While they're driving away, the Masked Freaks reveal to Marcus that Abby was their spy the whole time before wheeling Marcus on a stretcher into a furnace. On the drive back to town, Emma notices one of the Masked Freaks cameras in the van pointing at her, leaving her to wonder about Abby's involvement."
22892,The Thousand Faces of Dunjia,The film follows a group of swordsmen's adventures to secretly protect humankind by hunting some mysterious creatures from the outer space.
27538,Sa...,"SA Kannada Movie is a thriller action cinema starring Vijay Suriya , Karthik Jayaram, Samyuktha Hornad, Hemanth Hegde, Directed by Hemanth Hegde. The story takes place in Coorg in the background of a Home stay. A personal conflict in the family turns to be serious when an external intruder enter the family. The story has very unusual touch of terror and thriller. How ever the forte of the filmmaker is his screenplay and it is well depicted in this movie too. It was in Story for the controversial Poster that was set in Bangalore [1]"
27543,Mukunda Murari,"The film follows the story of an atheist, Mukunda, who sues God after losing his shop in an earthquake. Religious organizations revolt against him and Murari visits him as his human guide.\r\nAtheist shopowner Mukunda (Upendra) talks about selling bulk statues of Hindu gods, and later tricks a devotee from Andhra Pradesh into buying a statue by claiming it's famous. The lack of respect, as his family sees it, makes his wife sad and she makes their son write the name of Rama for at least 10 pages."
31384,Uriyadi,"The story revolves around three sets of characters: 1) Four carefree and adventurous friends who are college students in their final year of study, including Lenin Vijay, Suresh, Akil, and Karthi; 2) a politician named Kumar who is eager to begin his own caste-based political party; and 3) Ramanathan, a part-time engineering student. The movie opens by showcasing the daily lives of the four student friends. The students soon meet the politician, Kumar, who also runs a roadside dhaba frequently visited by the students.\r\nIn many subsequent scenes, the students get into trouble with the local residents. In one such instance in the dhaba, the politician comes to their rescue and hence they get in touch with each other. Later that night, however, one of the students is attacked by some of the local residents. The other students become furious, chase the locals, and a fight ensues. Meanwhile, Vijay’s ex-girlfriend is harassed by Ramanathan. This is witnessed by Karthi, who confronts and physically assaults Ramanathan and then narrates these events to Vijay. Later, to their surprise, Ramanathan’s friends wait in the dhaba to ambush Quarter. Ramanathan, who has little luck with love, yearns for one college student who seems to be the ex-girlfriend of Lenin Vijay. One night, the four friends get very drunk and accidentally annoy some of the caste fanatics. Kumar, along with his uncle, happens to see this and contemplates how to take advantage of this tense situation. While all this is happening, the politician drafts plans to initiate a caste-based political party about which he has been dreaming. The plot reaches a climax when all three sets of characters meet and each of them discovers their mutual interconnections."
33850,Tantei Opera Milky Holmes the Movie: Milky Holmes' Counterattack,"Set in the near future in the Great Era of Detectives, chosen people are born with supernatural abilities known as Toys (トイズ, Toizu). Those who use these Toys for evil are responsible for a wave of crimes and necessitate employing Toy-using detectives to help solve them. In the Yokohama District, Opera Kobayashi runs a detective agency named Milky Holmes, made up of four budding young detectives, Sherlock Shellingford, Nero Yuzurizaki, Hercule Burton and Cordelia Glauca, who each wield their own unique toys and train to become detectives at Holmes Detective Academy.\r\nThe video game casts players in the role of Kobayashi, who must utilise Milky Holmes' unique abilities to stop the Thieves' Empire, a band of thieves led by the mysterious Arséne. The second puts players in the role of another detective, Ellery Himeyuri. The Alternative TV specials also take place in this universe, in which Kobayashi and Milky Holmes are assisted by another Toy user, Lily Adler, in fighting against the Thieves' Empire whilst visiting London.\r\nIn the first two anime television series, the four girls of Milky Holmes, Sherlock, Nero, Hercule and Cordelia, end up losing their Toys during an encounter with the Thieves' Empire. Taken away from their rich lifestyle and thrown into an attic, the girls must try and regain use of their Toys or else face expulsion from Holmes Detective Academy. Futari wa Milky Holmes, which takes place two years after the other series, follows two young girls, Alice and Kazumi, who, inspired by Milky Holmes, form their own detective unit, Feathers, and fight against a group of thieves known as the Color the Phantom. Tantei Kageki Milky Holmes TD sees Milky Holmes help an idol named Marine Amagi recover her Toys, the seven Elements, which have been stolen from her."
33960,Kamen Rider Ex-Aid the Movie: True Ending,"The film is set a few days after the end of the TV Series. A new strain of the Bugster Virus spreads among the population and a group of Ninja Players led by Kamen Rider Fuma invades the Seito University Hospital. Emu, Poppy, Hiiro and Taiga transform into Kamen Riders to fight back, but are defeated, and everybody except for Emu is infected and put into a coma, with Fuma using his Gashacon Bugvisor on a child patient with brain tumor called Madoka Hoshi and rendering her also unconscious before leaving. Once she awakes, Poppy is surprised to see her friends participating in a School Festival, as if nothing had happened. Meanwhile, Kiriya and Kuroto start to investigate the origin of this new virus and discover through Genm Corp's new president Tsukuru Koboshi that Masamune once negociated with Johnny Maxima, the CEO of the game company Machina Vision regarding the contents of Kamen Rider Chronicle, but the deal was never completed, and both a Rider Gashat and Gamer Driver was stolen a few days later, along part of the company's data.\r\nKiriya then confronts both Maxima and Fuma who is revealed as Kagenari Nagumo, Madoka's father. Kagenari then reveals that he sent Madoka and the others to the VR world for her sake, and does the same with Maxima and Kiriya. Kuroto then instructs Emu to enter the VR world to rescue them using a special VR system he developed and gives him a new Gashat called ""Mighty Creator VRX"". As Emu enters the game, Kuroto creates a distraction for Fuma with Parad's help. Once learning their plans, Kagenari chases after Emu to the VR world but is defeated by him. Emu then uses his new powers to create an exit for everybody to escape, but Madoka refuses to leave with them and stays alone in the VR world, until Maxima appears before her and uses the power she obtained for himself.\r\nBack to the real world, the riders confront Kagenari about his actions and learn that his intention was to let Madoka live happily in the VR world as there was little chance for her to survive her treatment and even if she does, she would suffer from complications afterwards. Maxima then appears before them, revealing that he obtained the power to transform into Gamedeus and will use it to destroy the world. The riders then confront Maxima, but are overpowered by him, until Kuroto and Parad sacrifice themselves to restrain his powers with their own, forcing him to flee. Back at the CR, Emu and the others learn that Madoka's condition had worsened, and Hiiro, Emu and Asuna perform an emergency operation to save her. Meanwhile, Taiga and Kiriya decide to storm the headquarters of Machina Vision. Kagenari confronts them just to be defeated, and Maxima appears to attack them. Emu and Hiiro join the battle after the operation is completed and Emu reveals to Kagenari that he realized that Madoka's true wish was his own happiness. He also informs him that the operation was a success but she needs him by her side for her consciousness to return to the real world. As Kiriya leaves with Kagenari, Maxima ejects both Parad and Kuroto from his body to regain his full powers as Gamedeus and proceeds to destroy the world, until Emu transforms into Hyper Muteki to destroy him for good, while Kagenari, once reunited with his daughter, greets her as she wakes up.\r\nWith the incident solved, and the Bugster threat eradicated from Earth, the riders then return to their daily lives. In the post-credits, Emu and Parad confront a group of Bugsters when they are intercepted by Kamen Rider Build, who defeats Parad and extracts Ex-Aid's essence from Emu before leaving."
34058,The Farm: En Veettu Thottathil,"Inspired by the popular tale of Red Riding Hood, this Malaysian Tamil slasher film, which gives a dark twist to the story, tells of the painful journey of a girl. The Farm follows a hearing-impaired girl and her encounter with the various kinds of men in her life; one whose nice intentions hide a darker motive, one with bad intentions initially who later repents, and one who abuses women, which the director portrays as ‘an animal’."
34261,The Heritage of Love,"The film is set in 2016. Andrey Kulikov is a young machinist who is going to Paris to visit an old lady, and to buy the oldest Russian-made car, the Russo-Balt. While walking through Paris, Andrey sees a woman, Vera, and so starts the tale of two love stories, separated by three generations and one hundred years.\r\nLater, when Andrey visits a cemetery he finds the grave of a young lady similar to the one he just met in Paris.\r\nDuring his search to the past it becomes clear that Vera's and Andrey's fates are connected. His great-grandfather, Andrey Dolmatov, had been an officer in the White Army during the Russian Revolution and felt in love with Duchess Vera Chernisheva in the last days of the Russian Empire and in the Russian civil war."
34276,Attraction,"The storyline revolves around colonel Valentin Lebedev, who is in charge of the military operation, his daughter Yulia, who develops a romantic relationship with the alien Hekon, and her former boyfriend Artyom who is the main antagonist.\r\nHekon is a representative of a technologically-advanced humanoid race who arrives to Earth incognito for research purposes. His spaceship is damaged by a meteor shower. The Russian Air Force mistakes his spaceship for a NATO spy satellite/spacecraft and damages it by firing missiles into it, causing it to crash into several buildings in Moscow and kill hundreds of people. It is on that same day when Yulia and Artyom have finished attending a meteor-watching event, that Yulia's friend Svetlana is killed while watching some meteors.\r\nThe Russian government decides not to enter into a contact with the ship and let it fix itself on its own. The landing area is evacuated, fenced, and guarded, and Moscow is placed under a curfew. Meanwhile, Yulia, Artyom, and his friends Khariton, Ruslan, and Piton sneak into the crash site to investigate the alien: after they beat him up and cause him to crash down a building, they retrieve his armor before an army patrol may find them. On another day, Yulia sneaks again to retrieve the alien, placing him to a nerdy classmate known as ""Google"" to help the alien recover. While examining his body, she notices a wristband forming on her right wrist, which manipulates water.\r\nThe alien, who introduces himself as ""Hekon"", was initially unsure about Earth and its people, but tells them that he is looking for a device known as ""Shilk"", that allows him to travel through space without destroying his body. Shilk, like the spaceship, appears to be attracting water. As he goes out to the streets to look for the device, the police mistake him for a drug addict and send him off to a police station to be interrogated.\r\nMeanwhile, Artyom and his friends drive in their car to inform Colonel Lebedev about the alien's armor that they had hidden in their garage. and stumble upon a crowd who is angry over rationed water, as the spaceship appears to absorb water in order to repair itself. Ruslan decides to provoke the crowd into becoming a disorderly riot. Artyom is hit by a policeman's baton as he tries to convince people to stop rioting, and is taken to the police station but he forgets to tell Colonel Lebedev about his discovery.\r\nIn order to get Shilk, Yulia goes to see her father, who interrupts his meeting with his officers when she informed him that she is pregnant with Hekon's child. She actually says this to distract him, so Hekon can disguise himself as a scientist and take Shilk from its container, and also because she was upset that he did not spend much time with her after her mother passed away.\r\nYulia confesses her true feelings for Hekon in a phone call to Artyom, while Artyom arrives with his friends to beat up the alien. Hekon fights back, and when one of Artyom's friends tries to shoot Hekon, Ruslan is shot by accident instead. Artyom and one of his friends flee before the police would arrest them. He reports this incident to gather many supporters and form a group to attack Hekon's spaceship, and avenge Ruslan's death, whom he blames the alien for causing it. In spite of his initial support, Piton sometimes expresses doubt on Artyom's goals.\r\nArtyom's group breaks through a police barricade, triggering several other robotic suits to come out and fight against the mob. Yulia and Hekon drive through the police barricade and the mob using a military car to return him and Shilk back to their ship, while fending off against Artyom, who angrily rampages using Hekon's suit. Hekon defeats Artyom, but Artyom shoots him and Yulia with a dropped military rifle. Saddened by this, Colonel Lebedev follows several walking suits as they carry Yulia and Hekon to the spaceship, where its machinery uses water to nurse Yulia's injuries. The spaceship's computer also responds to Lebedev's questions about Hekon's mission to observe the Earth, as its warlike civilizations and history had made it very unsafe for interstellar contact. However, Yulia's love for Hekon, and her willingness to protect him, have caused the computer's authorities to rewrite the results of their study on Earth.\r\nYulia, her father, and everyone else return to their daily lives, as the spaceship departs and releases excess water. Meanwhile, Artyom is arrested and presumably sent to a prison camp.\r\nThe fabula of the movie is vocalized by Yulia at the end: The truth is that one alien from far away trusted us more than we trust ourselves."


In [58]:
from pprint import pprint
query="a movie about Artificial Intelligence"
results=search(query, k=10, index=index, model=model)
content_df[content_df[itemid].isin(results)][allcols]

Unnamed: 0,Title,Plot
17286,Armed Response,"A team of trained operatives find themselves trapped inside an isolated military compound after its artificial intelligence is suddenly shut down. There, they begin to experience strange and horrific phenomena as they attempt to uncover what killed the previous team."
17327,Geostorm,"In 2019, following many catastrophic natural disasters, an international coalition commissions a system of climate-controlling satellites called ""Dutch Boy"". After Dutch Boy neutralizes a typhoon in Shanghai, a Senate sub-committee reprimands chief architect Jake Lawson for acting without authorization and replaces him with his brother Max, who works under Secretary of State Leonard Dekkom.\r\nThree years later in 2022, as a UN team stationed in the Registan Desert comes across a frozen village, Makmoud Habib, an engineer working on the International Climate Space Station (ICSS) copies data from the Afghanistan satellite onto a hard drive before being ejected into space. After convincing President Andrew Palma to conduct an investigation, Max persuades Jake to go to the ICSS to investigate. Another satellite increases temperatures in Hong Kong, causing a firenado that nearly kills Max's college friend Cheng Long, the head of Dutch Boy's Hong Kong department.\r\nJake arrives at ICSS to examine the malfunctioning satellites (which are damaged afterwards and their data erased) with station commander Ute Fassbinder and her crew. Outside the ICSS, they retrieve the hard drive, but hide it from the crew, suspecting a traitor, and recover the data, discovering that a virus has wiped out everyone's login access to the satellite and is causing the malfunctions. Suspecting Palma is using Dutch Boy as a weapon, Jake tells Max he needs to reboot the system, which requires the kill code, held by Palma. The ICSS staff neutralize malfunctioning satellites by deliberately knocking them offline via collisions with replacement satellites.\r\nBack on Earth, Cheng discovers he and Max have lost login access and warns Max of a global cataclysm known as a ""Geostorm"" if the malfunction continues. Cheng is pursued to Washington, D.C. by a team of rogue government agents, who ultimately cause his death in a traffic incident, but not before he says ""Zeus"", Discovering Project Zeus simulates extreme weather patterns to create a Geostorm, Max enlists his girlfriend, Secret Service agent Sarah Wilson, to acquire the code.\r\nDuring the Democratic National Convention in Orlando, Max discovers Orlando is next to be targeted after a massive hailstorm hits Tokyo and part of Rio de Janeiro freezes over. He requests Dekkom's help, but Dekkom instead tries unsuccessfully to kill Max, unveiling himself as the saboteur; Max immediately informs Sarah. The two kidnap Palma to protect him from Dekkom's agents and secure the kill code, which is Palma's biometrics. As they escape from the DNC stadium before a lightning storm destroys it, Max reveals their activities and Dekkom's treachery to Palma. After outsmarting Dekkom's mercenaries, the three arrest and confront Dekkom about his intentions: to decimate America's fellow governments and line of succession, dominating the world.\r\nThe ICSS team loses control of all operations as the virus initiates the self-destruct program. As more disasters strike around the world (including tornadoes in Mumbai, a heatwave in Moscow and a tsunami in Dubai), Jake realizes software engineer Duncan is the traitor. In the ensuing confrontation, Duncan accidentally ejects himself into space while Jake escapes. As the crew evacuates, Jake stays behind to ensure the reboot completes. Max and Sarah escort Palma to the Kennedy Space Center, where they transmit the code but learn that the self-destruct sequence can't be stopped. Jake and Ute, who also stayed behind, work together to reboot the system, transferring satellite control to the world's space agencies and preventing the Geostorm at the last second. The two take shelter in a replacement satellite as the self-destruction sequence completes and use its thrusters as a beacon. A nearby shuttle piloted by crew member Hernandez picks them up. Six months later, Jake works as the head engineer for a new system of satellites, which is now administered by an international committee."
21613,The White King,"The film follows Djata (Lorenzo Allchurch), a 12-year-old boy growing up in a dystopian territory, called Homeland, under a dictatorship and without access to the rest of the world. His father, Peter (Ross Partridge), tells him of the true nature of Homeland as well as of a treasure guarded by a man named Pickaxe (Ólafur Darri Ólafsson). Peter is later taken away by two government agents, leading to him and Djata's mother Hannah (Agyness Deyn) being labelled traitors by the government and the citizenry alike as Djata works to discover the whereabouts of his father."
22830,I Love That Crazy Little Thing,"Jiang Yang (William Chan) is an editor who dreams of becoming a movie director. In efforts to pursue his passion, however, Yang encounters several challenges. These lead him to crazy yet memorable adventures alongside his girlfriend, Qianqian (Jessica Jung).[3]"
23878,Barcelona: A Love Untold,"The movie follows the story of Ely (Daniel Padilla), a boy who can't get over his past love. In Barcelona he juggles work with his studies as he aims to get his master's degree. He then meets Mia (Kathryn Bernardo), a girl who sees Spain as a fresh start. After a series of mistakes in the past, she abruptly uprooted herself from her life in the Philippines to move forward and start anew. But the question is how? Together, they will go on a journey to find the answers to their lifelong questions, such as proving their worth, confronting the past, finding their way, and self-forgiveness."
27538,Sa...,"SA Kannada Movie is a thriller action cinema starring Vijay Suriya , Karthik Jayaram, Samyuktha Hornad, Hemanth Hegde, Directed by Hemanth Hegde. The story takes place in Coorg in the background of a Home stay. A personal conflict in the family turns to be serious when an external intruder enter the family. The story has very unusual touch of terror and thriller. How ever the forte of the filmmaker is his screenplay and it is well depicted in this movie too. It was in Story for the controversial Poster that was set in Bangalore [1]"
28878,Vaapsi,"Vaapsi film is the story of those youngsters, who left their country after 1984 massacre to survive and settle in other countries. Now all of them just want to come back to their native homes. They are eagerly waiting for the moment when they will come back to their country.[5][6]\r\nThe main protagonist of the story, Ajit Singh (Harish Verma), resides with his parents and sister"
31489,Chennai 2 Singapore,"The film is a comedy about a struggling Indian film maker, Harish, who makes his way to Singapore to look for investors to finance his dream movie. However, a series of misfortunes foils his plans and he also ends up losing his passport. At a loss, Harish meets Vaanambaadi, an eccentric Singaporean cameraman. With Vaanambaadi's advice and help, Harish almost realises his dream. But then he meets a girl who is dying from cancer and gets swept up in a number of hilarious but serious situations. Without the cliched romance in Indian films, the story takes a light and humorous turn into a madcap journey about life, death and everything else.[4]"
34261,The Heritage of Love,"The film is set in 2016. Andrey Kulikov is a young machinist who is going to Paris to visit an old lady, and to buy the oldest Russian-made car, the Russo-Balt. While walking through Paris, Andrey sees a woman, Vera, and so starts the tale of two love stories, separated by three generations and one hundred years.\r\nLater, when Andrey visits a cemetery he finds the grave of a young lady similar to the one he just met in Paris.\r\nDuring his search to the past it becomes clear that Vera's and Andrey's fates are connected. His great-grandfather, Andrey Dolmatov, had been an officer in the White Army during the Russian Revolution and felt in love with Duchess Vera Chernisheva in the last days of the Russian Empire and in the Russian civil war."
34276,Attraction,"The storyline revolves around colonel Valentin Lebedev, who is in charge of the military operation, his daughter Yulia, who develops a romantic relationship with the alien Hekon, and her former boyfriend Artyom who is the main antagonist.\r\nHekon is a representative of a technologically-advanced humanoid race who arrives to Earth incognito for research purposes. His spaceship is damaged by a meteor shower. The Russian Air Force mistakes his spaceship for a NATO spy satellite/spacecraft and damages it by firing missiles into it, causing it to crash into several buildings in Moscow and kill hundreds of people. It is on that same day when Yulia and Artyom have finished attending a meteor-watching event, that Yulia's friend Svetlana is killed while watching some meteors.\r\nThe Russian government decides not to enter into a contact with the ship and let it fix itself on its own. The landing area is evacuated, fenced, and guarded, and Moscow is placed under a curfew. Meanwhile, Yulia, Artyom, and his friends Khariton, Ruslan, and Piton sneak into the crash site to investigate the alien: after they beat him up and cause him to crash down a building, they retrieve his armor before an army patrol may find them. On another day, Yulia sneaks again to retrieve the alien, placing him to a nerdy classmate known as ""Google"" to help the alien recover. While examining his body, she notices a wristband forming on her right wrist, which manipulates water.\r\nThe alien, who introduces himself as ""Hekon"", was initially unsure about Earth and its people, but tells them that he is looking for a device known as ""Shilk"", that allows him to travel through space without destroying his body. Shilk, like the spaceship, appears to be attracting water. As he goes out to the streets to look for the device, the police mistake him for a drug addict and send him off to a police station to be interrogated.\r\nMeanwhile, Artyom and his friends drive in their car to inform Colonel Lebedev about the alien's armor that they had hidden in their garage. and stumble upon a crowd who is angry over rationed water, as the spaceship appears to absorb water in order to repair itself. Ruslan decides to provoke the crowd into becoming a disorderly riot. Artyom is hit by a policeman's baton as he tries to convince people to stop rioting, and is taken to the police station but he forgets to tell Colonel Lebedev about his discovery.\r\nIn order to get Shilk, Yulia goes to see her father, who interrupts his meeting with his officers when she informed him that she is pregnant with Hekon's child. She actually says this to distract him, so Hekon can disguise himself as a scientist and take Shilk from its container, and also because she was upset that he did not spend much time with her after her mother passed away.\r\nYulia confesses her true feelings for Hekon in a phone call to Artyom, while Artyom arrives with his friends to beat up the alien. Hekon fights back, and when one of Artyom's friends tries to shoot Hekon, Ruslan is shot by accident instead. Artyom and one of his friends flee before the police would arrest them. He reports this incident to gather many supporters and form a group to attack Hekon's spaceship, and avenge Ruslan's death, whom he blames the alien for causing it. In spite of his initial support, Piton sometimes expresses doubt on Artyom's goals.\r\nArtyom's group breaks through a police barricade, triggering several other robotic suits to come out and fight against the mob. Yulia and Hekon drive through the police barricade and the mob using a military car to return him and Shilk back to their ship, while fending off against Artyom, who angrily rampages using Hekon's suit. Hekon defeats Artyom, but Artyom shoots him and Yulia with a dropped military rifle. Saddened by this, Colonel Lebedev follows several walking suits as they carry Yulia and Hekon to the spaceship, where its machinery uses water to nurse Yulia's injuries. The spaceship's computer also responds to Lebedev's questions about Hekon's mission to observe the Earth, as its warlike civilizations and history had made it very unsafe for interstellar contact. However, Yulia's love for Hekon, and her willingness to protect him, have caused the computer's authorities to rewrite the results of their study on Earth.\r\nYulia, her father, and everyone else return to their daily lives, as the spaceship departs and releases excess water. Meanwhile, Artyom is arrested and presumably sent to a prison camp.\r\nThe fabula of the movie is vocalized by Yulia at the end: The truth is that one alien from far away trusted us more than we trust ourselves."


#Query generation

In [None]:
!pip install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import AutoTokenizer, AutoModel
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

In [None]:
tokenizer = T5Tokenizer.from_pretrained('BeIR/query-gen-msmarco-t5-large-v1')
model = T5ForConditionalGeneration.from_pretrained('BeIR/query-gen-msmarco-t5-large-v1')
model.eval()

#Select the device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=1024, out_features=4096, bias=False)
              (wo): Linear(in_features=4096, out_features=1024, bias=False)
              (d

In [None]:
content_df['clean']=content_df['clean'].astype(str)

In [None]:
import tqdm

# Parameters for generation
batch_size = 5 #Batch size
num_queries = 5 #Number of queries to generate for every paragraph
max_length_paragraph = 512 #Max length for paragraph
max_length_query = 64   #Max length for output query

paragraphs = list( content_df['clean'].values)
def _removeNonAscii(s): return "".join(i for i in s if ord(i) < 128)
with open('generated_queries_all.tsv', 'w') as fOut:
    for start_idx in range(0, len(content_df['clean'].values), batch_size):
        sub_paragraphs =paragraphs[start_idx:start_idx+batch_size]
        inputs = tokenizer.prepare_seq2seq_batch(sub_paragraphs, max_length=max_length_paragraph, truncation=True, return_tensors='pt').to(device)
        outputs = model.generate(
            **inputs,
            max_length=max_length_query,
            do_sample=True,
            top_p=0.95,
            num_return_sequences=num_queries)

        for idx, out in enumerate(outputs):
            query = tokenizer.decode(out, skip_special_tokens=True)
            query = _removeNonAscii(query)
            para = sub_paragraphs[int(idx/num_queries)]
            para = _removeNonAscii(para)
            fOut.write("{}\t{}\n".format(query.replace("\t", " ").strip(), para.replace("\t", " ").strip()))

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and targets.

Here is a short example:

model_inputs = tokenizer(src_texts, text_target=tgt_texts, ...)

If you either need to use different keyword arguments for the source and target texts, you should do two calls like
this:

model_inputs = tokenizer(src_texts, ...)
labels = tokenizer(text_target=tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



#Finetune

In [6]:
""
from sentence_transformers import SentenceTransformer, InputExample, losses, models, datasets
import os


train_examples = []
with open('generated_queries_all.tsv') as fIn:
    for line in fIn:
        query, paragraph = line.strip().split('\t', maxsplit=1)
        train_examples.append(InputExample(texts=[query, paragraph]))

# To use MultipleNegativesRankingLoss, it is important
# that the batch does not contain duplicate entries, i.e.
# no two equal queries and no two equal paragraphs.
# To ensure this, we use a special data loader
train_dataloader = datasets.NoDuplicatesDataLoader(train_examples, batch_size=32)

# Now we create a SentenceTransformer model from scratch
word_emb = models.Transformer('distilbert-base-uncased')
pooling = models.Pooling(word_emb.get_word_embedding_dimension())
model = SentenceTransformer(modules=[word_emb, pooling])

# MultipleNegativesRankingLoss requires input pairs (query, relevant_passage)
# and trains the model so that is is suitable for semantic search
train_loss = losses.MultipleNegativesRankingLoss(model)


#Tune the model
num_epochs = 3
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=num_epochs, warmup_steps=warmup_steps, show_progress_bar=True)

os.makedirs('output', exist_ok=True)
model.save('output/programming-model')

Downloading (…)lve/main/config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/238 [00:00<?, ?it/s]

Iteration:   0%|          | 0/238 [00:00<?, ?it/s]

Iteration:   0%|          | 0/238 [00:00<?, ?it/s]

search

In [10]:

from sentence_transformers import SentenceTransformer, util
import gzip
import json
import os

# Load the model we trained in 2_programming_train_bi-encoder.py
model = SentenceTransformer('output/programming-model')



In [8]:

# # Load the corpus
# docs = []
# corpus_filepath = 'wiki-programmming-20210101.jsonl.gz'
# if not os.path.exists(corpus_filepath):
#     util.http_get('https://sbert.net/datasets/wiki-programmming-20210101.jsonl.gz', corpus_filepath)

# with gzip.open(corpus_filepath, 'rt') as fIn:
#     for line in fIn:
#         data = json.loads(line.strip())
#         title = data['title']
#         for p in data['paragraphs']:
#             if len(p) > 100:    #Only take paragraphs with at least 100 chars
#                 docs.append((title, p))

# paragraph_emb = model.encode([d[1] for d in docs], convert_to_tensor=True)

# print("Available Wikipedia Articles:")
# print(", ".join(sorted(list(set([d[0] for d in docs])))))

  0%|          | 0.00/173k [00:00<?, ?B/s]

Available Wikipedia Articles:
Assembly language, C (programming language), C Sharp (programming language), C++, Go (programming language), Java (programming language), JavaScript, Keras, Laravel, MATLAB, Matplotlib, MongoDB, MySQL, Natural Language Toolkit, NumPy, PHP, Pandas (software), Perl, PostgreSQL, PyTorch, Python (programming language), R (programming language), Rust (programming language), Scala (programming language), SciPy, Scikit-learn, Swift (programming language), TensorFlow, Vue.js


In [35]:
corpus_embeddings = model.encode(content_df.clean.values,show_progress_bar=True, convert_to_numpy=True)

Batches:   0%|          | 0/48 [00:00<?, ?it/s]

semantic search

In [37]:
query = "movie about action and drama"
query_emb = model.encode(query, convert_to_tensor=True)
hits = util.semantic_search(query_emb, corpus_embeddings, top_k=10)[0]
all_hits_ids = set([hit['corpus_id'] for hit in hits])

In [40]:
pd.set_option('display.max_colwidth', None)
itemids=[]
for i in all_hits_ids:
  itemids.append(content_df.iloc[i][itemid])
content_df[content_df[itemid].isin(itemids)][allcols]

Unnamed: 0,Title,Plot
22809,Brothers,The film centers on the relationship between two brothers who used to have a bond for life but it is ripped apart through a civil war in 1936.
27064,Laali Ki Shaadi Mein Laaddoo Deewana,"The movie revolves around a royal marriage and quirky characters, who are trapped in their own tragedies."
27540,Sipaayi,"The protagonist, a TV reporter fights to stop illegal activities around the city."
27546,Mandya to Mumbai,"The movie begins in Mandya where the Hero leads a happy life with his parents. A shocking incident changes his life. His parents are murdered by an anti-social and Hero is forced to go to prison, where he is tortured. He comes across four criminals in the prison. These hardcore criminals break loose from the prison and help Hero take revenge on his parent's killers. They decide to go to Mumbai where they plan to become big gangsters. But fate has other plans."
27553,Chowka,"The film is the narrative which emerges through five different timelines and geographies within the state of Karnataka, encapsulating the lives of four people with a common friend and their love lives while sending across a strong message relating to the common man. Four individuals, each with their own dreams and aspirations, get framed for sins they did not commit and land up in jail. They meet a school teacher who not just shows them hope but also stirs up a passion towards reform.\r\nThe story takes place in the cities of Bangalore in 1986, followed by 1995 in Mysore, 2000 in Mangalore and 2007 in Bijapur. The present day is depicted in 2017, in the Central Jail of Bellary and ends in the Suvarna Vidhana Soudha premises in Belagavi. It ends on a happy but tragic note as all five are proved innocent, but the four heroes get shot by police personnel."
28619,Aakashathinum Bhoomikkumidayil,The movie revolves around a village in northern Kerala where political murders happen and in such one incident a guy is murdered mistaking him to be someone else. The families of both are left stranded and the film takes us through the incidents that happen to the children of both families.
28647,Lechmi,"The main theme of the movie revolves around four bachelors, Vineeth, Ikku, Stephen and Sudhi. The story advances on an incident that made unforeseen, dramatic changes in their lives. Once a spirit comes to their flat. This Spirit (Lechmi) doesn't remember anything, where she comes from, her real name, who killed her, and so on. With the help of Baba Swami (Biju Sopanam), an exorcist they came to know more about the spirit. The story of the film revolves around the incidents happen after it."
28652,Parava,"The story revolves in Fort Cochin about the life of two tween boys. After a half hour, it changes to the life of a young boy his mentor and his friends, of which at the end a tragedy happens. After that the story comes back to the two boys, and at the end, revenge is taken on the tragedy and the movie has a feel-good happy ending with everything back to normal."
28797,Gaon Thor Pudhari Chor,"The film’s screenplay does not spare the ruling party as well as opposition party, presenting their gimmicks to remain in limelight, while pulling each other’s legs. In short, all the politicians will be target in this film. And the makers of this film are confident that their film would offer total entertain to the Marathi audience .[5]"
28805,Aadesh - Power Of Law,Film is a Journey of Public Prosecutor and it is a court drama.


In [44]:
query = "a movie about Artificial Intelligence"
query_emb = model.encode(query, convert_to_tensor=True)
hits = util.semantic_search(query_emb, corpus_embeddings, top_k=10)[0]
all_hits_ids = set([hit['corpus_id'] for hit in hits])

In [45]:
pd.set_option('display.max_colwidth', None)
itemids=[]
for i in all_hits_ids:
  itemids.append(content_df.iloc[i][itemid])
content_df[content_df[itemid].isin(itemids)][allcols]

Unnamed: 0,Title,Plot
17211,Ghost in the Shell,"In the near future, humans are augmented with cybernetic improvements to traits such as vision, strength, and intelligence. Augmentation developer Hanka Robotics establishes a secret project to develop an artificial body, or ""shell"", that can integrate a human brain rather than an AI. Mira Killian, the sole survivor of a cyberterrorist attack which killed her parents, is chosen as the test subject after her body is damaged beyond repair. Over the objections of her designer, Dr. Ouelet, Hanka CEO Cutter decides to use Killian as a counter-terrorism operative.\r\nA year later, Killian has attained the rank of Major in the anti-terrorist bureau Section 9, working alongside operatives Batou and Togusa under Chief Daisuke Aramaki. Killian, who experiences hallucinations that Ouelet dismisses as glitches, is troubled by how little she remembers of her past. The team thwarts a terrorist attack on a Hanka business conference, and Killian destroys a robotic geisha after it murders a hostage. After learning that the geisha was hacked by an unknown entity known as Kuze, Killian breaks protocol and ""dives"" into its AI for answers. The entity attempts a counter-hack, and Batou is forced to disconnect her. They trace the hacker to a yakuza nightclub, where they are lured into a trap. The explosion destroys Batou's eyes and damages Killian's body. Cutter is enraged by Killian's actions, and threatens to have Section 9 shut down unless Aramaki keeps her in line.\r\nKuze tracks down Section 9's Hanka consultant, Dr. Dahlin, and kills her. The team links her murder to the deaths of other senior company researchers and realize that Ouelet is the next target. Kuze takes control of two sanitation workers and sends them to kill Ouelet. Batou, now with cybernetic eyes, kills one while the repaired Killian subdues the other. While they interrogate the worker, Kuze speaks through him before compelling him to commit suicide. Togusa traces the hack to a secret location, where the team discovers a large number of humans mentally linked as a makeshift signal network. Killian is captured and Kuze reveals that he is a failed Hanka test subject from the same project that created Killian. He urges her to question her own memories, then frees her and escapes.\r\nKillian confronts Ouelet, who admits that 98 test subjects died before Killian, and that her memories are implanted. Cutter has decided that Killian is a liability and orders Ouelet to kill her after she returns to Hanka Robotics. Instead, Ouelet gives Killian an address and helps her escape. Cutter kills Ouelet, but blames Killian, saying that she has gone rogue. He informs Aramaki and the team that Killian must be terminated.\r\nKillian follows the address to an apartment occupied by a widowed mother, who reveals that her daughter, Motoko Kusanagi, ran away from home a year ago and was arrested. Motoko took her own life while in custody. Killian leaves and contacts Aramaki, who allows Cutter to remotely eavesdrop on their conversation. Batou, Togusa, and Aramaki eliminate Cutter's men trying to ambush them, while Killian follows her memories to the hideaway where Motoko was last seen. There, she and Kuze meet and recall their past lives as anti-augmentation radicals who were abducted by Hanka as test subjects.\r\nCutter deploys a ""spider-tank"" to kill them. Kuze nearly dies before Killian is able to tear off the tank's motor, losing an arm in the process. Mortally wounded, Kuze offers to merge his ""ghost"" with Killian's, but Killian refuses. Kuze fades out, then a Hanka sniper kills him. Batou and the team rescue Killian, while Aramaki executes Cutter with Killian's consent. The next day, Killian, now repaired and embracing her true identity as the Japanese Motoko, reconnects with her mother and returns to work with Section 9."
17286,Armed Response,"A team of trained operatives find themselves trapped inside an isolated military compound after its artificial intelligence is suddenly shut down. There, they begin to experience strange and horrific phenomena as they attempt to uncover what killed the previous team."
22872,China Salesman,A young Chinese engineer discovers a conspiracy and is the only one who can stop it.
27059,Machine,"Machine depicts the story of racing enthusiasts Sarah (Kiara Advani) daughter of very rich businessman Balraj Thapar (Ronit Roy) and Ransh (Mustafa Burmawalla), who meet each other due to circumstances. Sarah is beaten by Ransh at a car race event, Sarah later finds out Ransh; new in college, is her new classmate. As their bond becomes stronger, they eventually fall in love and after the accidental death of her friend/secret lover Aditya (Eshan Shanker) their common friend, they get married. However, Ransh deceived Sarah as he is told to and kills her for her money and property by throwing her off a cliff. She is eventually saved by her college lover Aditya's twin brother Raj (Eshan Shanker) who is a Commando in the Indian Army. Then Sarah and Raj come to know the exact reason about her killing when they find out Ransh was doing all this on someone's orders and he is duping another rich businessman's daughter. They trace him and find that her own father had planned all these due to a very convoluted reason. He is not Sarah's biological father and they come to know that he only raised her as his own daughter for her wealth. He even kills Sarah's parents by planning it like an accident. When he was about to pull his gun to shoot her, Sarah shoots him and burns him alive with all the money he looted so far by shooting the main power socket. They both eventually try take revenge from Ransh ,but Sarah tells that she is deeply in love with him and also, how can she kill him as he is only a ""MACHINE"". HE does whatever is fitted into his mind, whatever instructed, he never realized her love for him, and tried to murder her. After this when Ransh understands, he comes forward, Raj sees the gun lying just beside Ransh and throws a knife for safety of Sara, and Ransh realizes what he actually is. He realizes that he has his own brain and heart. At the end Ransh understands he is truly in love with Sara and dies in the end committing his true love for Sarah leaving Sarah sad.[5]"
27088,Shab,"This film is about lives of people who live on the edge of what society finds acceptable. It is about coming to terms with oneself and accepting other people as they are. The film is set in the cosmopolitan city of Delhi where people from all over the country come to fulfill their dreams. For some, these dreams are realized, while others remain in an endless pursuit of other elusive dreams. Perhaps it is the hope of ultimately reaching that destination that keeps one going. And then there are others who surrender to the overwhelming power of the city of dreams and get lost in the maze. This film is the story of a coffee shop girl Raina (Arpita Chatterjee) and the man who loves her Afzar (Ashish Bisht) an aspiring model. Sonal (Raveena Tandon) is a fashion patron who becomes his mentor. The film centers around their intense relationship. Love, hurt and power. Into their lives comes in Benoit (Simon Frenay) a French expat who teaches French in Delhi."
27109,The Final Exit,It is the story of a guy who begins to hallucinate and is seeing things which are not for real. Things get worst when he tries to shoot those things through his camera.[6][7]
28552,Style,"Tom, a young auto mechanic falls in love with Diya whom he sees one day from the bus. The love got blossomed and at one point a psychopath named Edger came into the life of this couple after an unfortunate accident. The incident lead to a scenario where Edger got humiliated and the movie is basically about the rivalry between Tom and Edger."
28805,Aadesh - Power Of Law,Film is a Journey of Public Prosecutor and it is a court drama.
33896,Galactic Armored Fleet Majestic Prince: Genetic Awakening,"In the year 2110, humanity has expanded its frontier into space in its drive for new resources. Advances in genetic engineering research led to the establishment of the MJP (Military Junior Pre-Academy) project, which saw the creation of genetically enhanced humans known as the Evolved Children, developed with the intention of allowing humans to adapt to the new frontier. However, when Earth finds itself under threat by an alien race called the Wulgaru, human forces decide to have the evolved children deployed as soldiers to fight on the front lines.\r\nTeam Rabbits, a quintet of evolved children whose troubling lack of teamwork and common sense prevents them from reaching their true potential, are selected as test pilots of cutting edge mobile battle suits called AHSMB (Advanced High Standard Multipurpose Battle Device), powered by the Juria System, a cutting edge technology whose effectiveness in combat is increased according to the survival instincts of whoever uses it.Ep. 4 As they join forces to overcome their personal weaknesses and achieve their true potential, the members of Team Rabbits (nicknamed Majestic Princes because of their association with MJPEp. 2) eventually assume a key role in mankind's effort to thwart the alien invasion of Earth."
34884,Non-Transferable,"The film centres around a young woman named Amy Tyler, who books a surprise holiday to Europe with her boyfriend Josh Merit.[4] However, the two separate before they are able to go onto the trip; therefore, she advertises her tickets online in search for another man with an identical name to her ex-boyfriend to go on the tour instead.[3][1] The story, although written in 2012, bore similarities[1] to a 2014 case of a 28-year-old Toronto man named Jordan Axani, who offered up free tickets to on Reddit a girl named Elizabeth Gallagher, when he broke up with his girlfriend of the same name. Although his ticket had a strict no-transfer policy, as passport information was not required when booking, the ticket could be used by anyone with the same name.[5]\r\nWhen a BuzzFeed article about the story became viral,[6] the script was given ""new life"".[2] Brendan Bradley said that ""I wrote this script five years ago"", Bradley said, ""and everyone told me ... 'This would never happen! This is too unrealistic!' And the project didn’t get any traction because everyone thought the premise was too crazy. And then it happened in real life.""[1]"


In [46]:
query = "a movie about robots"
query_emb = model.encode(query, convert_to_tensor=True)
hits = util.semantic_search(query_emb, corpus_embeddings, top_k=10)[0]
all_hits_ids = set([hit['corpus_id'] for hit in hits])

In [47]:
pd.set_option('display.max_colwidth', None)
itemids=[]
for i in all_hits_ids:
  itemids.append(content_df.iloc[i][itemid])
content_df[content_df[itemid].isin(itemids)][allcols]

Unnamed: 0,Title,Plot
17211,Ghost in the Shell,"In the near future, humans are augmented with cybernetic improvements to traits such as vision, strength, and intelligence. Augmentation developer Hanka Robotics establishes a secret project to develop an artificial body, or ""shell"", that can integrate a human brain rather than an AI. Mira Killian, the sole survivor of a cyberterrorist attack which killed her parents, is chosen as the test subject after her body is damaged beyond repair. Over the objections of her designer, Dr. Ouelet, Hanka CEO Cutter decides to use Killian as a counter-terrorism operative.\r\nA year later, Killian has attained the rank of Major in the anti-terrorist bureau Section 9, working alongside operatives Batou and Togusa under Chief Daisuke Aramaki. Killian, who experiences hallucinations that Ouelet dismisses as glitches, is troubled by how little she remembers of her past. The team thwarts a terrorist attack on a Hanka business conference, and Killian destroys a robotic geisha after it murders a hostage. After learning that the geisha was hacked by an unknown entity known as Kuze, Killian breaks protocol and ""dives"" into its AI for answers. The entity attempts a counter-hack, and Batou is forced to disconnect her. They trace the hacker to a yakuza nightclub, where they are lured into a trap. The explosion destroys Batou's eyes and damages Killian's body. Cutter is enraged by Killian's actions, and threatens to have Section 9 shut down unless Aramaki keeps her in line.\r\nKuze tracks down Section 9's Hanka consultant, Dr. Dahlin, and kills her. The team links her murder to the deaths of other senior company researchers and realize that Ouelet is the next target. Kuze takes control of two sanitation workers and sends them to kill Ouelet. Batou, now with cybernetic eyes, kills one while the repaired Killian subdues the other. While they interrogate the worker, Kuze speaks through him before compelling him to commit suicide. Togusa traces the hack to a secret location, where the team discovers a large number of humans mentally linked as a makeshift signal network. Killian is captured and Kuze reveals that he is a failed Hanka test subject from the same project that created Killian. He urges her to question her own memories, then frees her and escapes.\r\nKillian confronts Ouelet, who admits that 98 test subjects died before Killian, and that her memories are implanted. Cutter has decided that Killian is a liability and orders Ouelet to kill her after she returns to Hanka Robotics. Instead, Ouelet gives Killian an address and helps her escape. Cutter kills Ouelet, but blames Killian, saying that she has gone rogue. He informs Aramaki and the team that Killian must be terminated.\r\nKillian follows the address to an apartment occupied by a widowed mother, who reveals that her daughter, Motoko Kusanagi, ran away from home a year ago and was arrested. Motoko took her own life while in custody. Killian leaves and contacts Aramaki, who allows Cutter to remotely eavesdrop on their conversation. Batou, Togusa, and Aramaki eliminate Cutter's men trying to ambush them, while Killian follows her memories to the hideaway where Motoko was last seen. There, she and Kuze meet and recall their past lives as anti-augmentation radicals who were abducted by Hanka as test subjects.\r\nCutter deploys a ""spider-tank"" to kill them. Kuze nearly dies before Killian is able to tear off the tank's motor, losing an arm in the process. Mortally wounded, Kuze offers to merge his ""ghost"" with Killian's, but Killian refuses. Kuze fades out, then a Hanka sniper kills him. Batou and the team rescue Killian, while Aramaki executes Cutter with Killian's consent. The next day, Killian, now repaired and embracing her true identity as the Japanese Motoko, reconnects with her mother and returns to work with Section 9."
17286,Armed Response,"A team of trained operatives find themselves trapped inside an isolated military compound after its artificial intelligence is suddenly shut down. There, they begin to experience strange and horrific phenomena as they attempt to uncover what killed the previous team."
21592,Native,2 pilots in space are drawn towards a distant music.
22410,Operation Avalanche,"Two CIA agents, part of the Agency’s (then) small Audio Visual (AV) Department, infiltrate NASA to expose a potential mole working for the Soviet Union. There, they find that NASA has kept its inability to reach the 1969 deadline for the Apollo 11 Moon landing a secret, with a plan to cover the problem by shooting down the Apollo 11 capsule (killing the real astronauts) and blaming the Soviets. When the young agents learn that the Moon mission can execute everything but the lunar landing itself, they become involved in a plot to use their AV skills to fake the landing portion. As their operation continues to a successful conclusion, the team becomes increasingly paranoid that they are being watched, by either the mole or by the CIA itself. As the Apollo 11 mission nears, the team buries copies of what becomes the found footage in a verdant rural field."
22892,The Thousand Faces of Dunjia,The film follows a group of swordsmen's adventures to secretly protect humankind by hunting some mysterious creatures from the outer space.
27059,Machine,"Machine depicts the story of racing enthusiasts Sarah (Kiara Advani) daughter of very rich businessman Balraj Thapar (Ronit Roy) and Ransh (Mustafa Burmawalla), who meet each other due to circumstances. Sarah is beaten by Ransh at a car race event, Sarah later finds out Ransh; new in college, is her new classmate. As their bond becomes stronger, they eventually fall in love and after the accidental death of her friend/secret lover Aditya (Eshan Shanker) their common friend, they get married. However, Ransh deceived Sarah as he is told to and kills her for her money and property by throwing her off a cliff. She is eventually saved by her college lover Aditya's twin brother Raj (Eshan Shanker) who is a Commando in the Indian Army. Then Sarah and Raj come to know the exact reason about her killing when they find out Ransh was doing all this on someone's orders and he is duping another rich businessman's daughter. They trace him and find that her own father had planned all these due to a very convoluted reason. He is not Sarah's biological father and they come to know that he only raised her as his own daughter for her wealth. He even kills Sarah's parents by planning it like an accident. When he was about to pull his gun to shoot her, Sarah shoots him and burns him alive with all the money he looted so far by shooting the main power socket. They both eventually try take revenge from Ransh ,but Sarah tells that she is deeply in love with him and also, how can she kill him as he is only a ""MACHINE"". HE does whatever is fitted into his mind, whatever instructed, he never realized her love for him, and tried to murder her. After this when Ransh understands, he comes forward, Raj sees the gun lying just beside Ransh and throws a knife for safety of Sara, and Ransh realizes what he actually is. He realizes that he has his own brain and heart. At the end Ransh understands he is truly in love with Sara and dies in the end committing his true love for Sarah leaving Sarah sad.[5]"
28656,Sherlock Toms,"Young Thomas Joy is an avid follower of Sir Arthur Conan Doyle's world-famous investigative stories having Sherlock Holmes as the lead character. His friends call him 'Sherlock Toms' due to his keen observation skills. Even after facing some hiccups in life at a very young age he grows up to be an IRS officer but ends up having a troubled relationship with his wife Rekha. Film shows tom getting accidentally married to Rekha who doesn't adjust in anyway with Tom's difficulty and also doesn't show respect to Tom and his father. The sharp and shrewd Toms hits a new low when a raid and an operation planned by him sinks without a trace.Tom get suspended due to failure of the raid. Tom quarrels with wife due to the failure. But she bursts in tears and hurts Tom more and gives complaint to police to arrest Tom under Domestic Violence. Now tom has been filed case against by wife and the chit fund company where he had conducted raid. Tom gets arrested by police. During the hearing in court, Tom shouts seeing his advocate fail to prove his innocence, that he is going to commit suicide. The chief judge orders Tom to undergo psychiatric treatment as he tells of his plan to suicide and then only truth will come out. Film shows Tom fed up with life and going to suicide. During the commotion he accidentally enters Vijyaragavan's room and standing by window side, attracts police men and the media people and makes demands. First his suspension to be cancelled. Then his wife to sign a long waited divorce petition. Meanwhile Tom discover cancelled currencies in Vijayaraghavan's room for which he demand news reporter to show in the media and get him good name. By the time rescue team capture Tom from the window preventing his suicide. Then while in police custody, he confesses that all the act was his drama for a secret raid which his local friends did after previous scene in the film showing that they quarrelled and don't have contact anymore. The film ends with news reporter family search for a guy to marry the reporter from a newspaper and finds Tom's advertisement in search of a girl and the reporter telling he is rather good guy to get for her."
33864,Terra Formars and Nicholas and Gordon’s Stranger Corps: The Fake Wave,"In the 21st century, overpopulation has become such a problem that scientists begin preparing for the colonization of Mars. Their first steps in the terraforming of Mars involve seeding the planet with moss in order to absorb the sunlight and create a hospitable atmosphere as well as increase the surface temperature. They also introduce cockroaches in order to spread the moss.\r\n500 years later, in the year 2599, a manned mission is sent to the red planet to clear out the cockroach colony in preparation for the human colonization of Mars. The crew is an international team of misfits and criminals who are given the opportunity to take part in the mission to gain forgiveness for their crimes.\r\nThey enter the new atmosphere aboard the BUGS 2 and find the terraforming to have been successful. They release a roach bomb but when they walk onto the surface they do not find any dead roaches among the moss. Eventually they come upon human-sized anthropomorphic cockroaches wielding clubs that quickly kill two of the humans with their superior speed and physical strength. They determine that these creatures are the result of the accelerated evolution of the original small roaches sent by humans to aid in the terraforming efforts 500 years earlier.\r\nIn order to combat the creatures the team is given a range of specially concocted DNA infusions individually designed to give each member of the team special powers. God Lee is eager to accept the infusion, believing that his powers will also give him added bargaining power when he returns to Earth. He walks on the surface of Mars and injects the DNA when he encounters a giant cockroach, transforming into a miidera beetle (Pheropsophus jessoensis) with the power to emit fire. The cockroach survives the fire blast and kills God Lee.\r\nA giant cockroach climbs on top of the spacecraft breaks through its transparent shell, killing two members of the team as it falls. Captain Dojima injects the DNA and transforms into a bullet ant (Paraponera clavata) with the power to lift one hundred times his own weight and kills the giant cockroach.\r\nDozens of giant cockroaches swarm into the spacecraft as an expedition team prepares to venture out to grab parts from the BUGS 1, the vessel from the previous mission 10 years earlier, to get the current vessel BUGS 2 into shape to return to Earth. Tezuka injects the DNA and transforms into a Steninae MacLeay then sits on the back of the Mars rover and blows a jet of fire out to propel it across the terrain and kill giant cockroaches at the same time. The captain and Ichiro remain behind to battle the swarm of giant cockroaches. Ichiro injects the DNA and transforms into a sleeping chironomid (Polypedilum vanderplanki) then removes the oxygen from the vessel to cause the cockroaches to die of asphyxiation.\r\nThe expedition team encounters a wave of giant cockroaches but they push through it. A second larger wave approaches and the team fears that Tezuka will not be able to survive this one so Osako volunteers to go out to help him. She injects the DNA and transforms into a black weevil Pachyrhynchus infernalis, an insect strong enough to survive being stepped on by an elephant. Mary joins her, injecting the DNA to become a rainbow stag beetle Phalacrognathus muelleri with optical camouflage capabilities. They protect Tezuka as he propels the rover through the second wave. The three are thrown from the vehicle and killed by the giant cockroaches as the rover rolls over and enters into autopilot mode to continue its journey.\r\nThe remaining members discover and enter the BUGS 1. They find that it was already attempting to send something back to Earth. Outside two more members of the team are shot by giant cockroaches wielding large firearms. It is revealed that Ko Honda, the person who sent the team on this mission, is in communication with the giant cockroaches, who appear to have human intelligence. Keisuke Dojima injects the DNA and transforms into a deadly Asian giant hornet while Jim Muto injects the DNA and transforms into a desert locust with flying and powerful kicking and jumping abilities. Zhang Ming-Ming injects the DNA and transforms into an orchid mantis, a carnivorous insect. They then discover giant pyramids on the surface of Mars.\r\nAsuka Moriki appears alive aboard the BUGS 2 and transformed into an emerald cockroach wasp, capable of entering the bodies of giant cockroaches and manipulating them. She revives Ichiro with water and they launch the BUGS 2 back toward Earth with a giant cockroach egg on board. The egg hatches and the giant cockroaches inside resist Asuka's manipulation and kill her.\r\nSurprised at the new evolutionary advantage of the giant cockroaches, Ko Honda commands the BUGS 2 to fall back to the surface of Mars. The cockroaches aboard the BUGS 2 call for the assistance of the cockroaches on the surface, who sprout wings and ascend into the sky to soften the landing of the BUGS 2.\r\nBack on the surface they face the remaining members of the human expedition team, all of whom inject another dose of DNA to transform and do battle with the hordes of cockroaches. Over the course of the battle they discover that taking a second dose will create an enhanced transformation with increased powers. Jim injects a third time, causing irreversible transformation. Shokichi attempts to defend him and is injured.\r\nNanao appears, transformed into a silk moth, and spreads her dust over the cockroach horde. A cockroach fires a weapon at her, causing an explosion that destroys the cockroach horde as well as Nanao. Jim and Shokichi escape the explosion and fly away, only to fall back to the surface where Jim dies in his permanent insect form. Shokichi boards the BUGS 2 again and confronts Ichiro, who saves him from a cockroach attack then suggests that they return to Earth quickly on a two-person spacecraft. As they blast off the cockroaches decide against flying after them.\r\nBack on Earth, Sakakibara fears what the reaction will be when Earth learns of their actions. Ko Honda admits that it would be their downfall and suggests preventing the vessel from returning to Earth and then disappearing from Japanese society. She attempts to shoot him but he stops the bullet and tosses a spider at her which releases a toxic purple spray and kills her.\r\nThe film ends with Ichiro and Shokichi in the small vessel discussing their plans for the future. Shokichi plans to build graveyards for Nanao and every member. Besides, he decides to return to Mars."
33890,GANTZ: O and Morrison Steel: The Greatest Years of My Life,"While fighting invading monsters, Kei Kurono rescues his friend, Reika. Though she protests, he attacks the monsters' leader, dying after killing it. Reika and her surviving teammates are teleported back to their base. Elsewhere, Masaru Kato dies attempting to save someone from a knife attack in a subway. He wakes up in a room with Reika and her teammates: Yoshikazu Suzuki, an old man; Joichiro Nishi, a surly teenager; and an angry, unidentified man. Suzuki explains that they have each died and woken in the room, and have been forced to fight against waves of monsters ever since. Before Suzuki can explain further, a black orb identified as ""Gantz"" announces that their next mission is about to begin. The angry man refuses to participate and is killed by Nishi, who reasons that he would have been a liability.\r\nAfter putting on skintight suits containing advanced technology, the group is teleported to Osaka. When Kato presses for answers, Nishi threatens to kill him. Suzuki quickly explains the ""game"": the team is given two hours to kill every monster in the city; if they fail, the team members die. After Kato and his team meet another team, some of whom have exotic weapons, Suzuki tells him that players accrue points for killing monsters. Anyone who scores 100 points can choose from three bonuses: upgraded weaponry, resurrection of a fallen teammate, or freedom from the ""game"".\r\nNishi turns invisible to stealthily wait for high point monsters. The others watch as the better-equipped Osaka team easily slaughters monsters. Reika and Suzuki urge Kato to not engage the monsters needlessly, but he insists that they rescue the city's citizens. They refuse, saying they are too weak.\r\nAnzu Yamasaki, a member of the Osaka team, follows Kato, trying to figure out his motivation. Kato says he is a student who has been living alone with his younger brother since the death of their parents. Yamasaki reveals that she has a young son and says they have something in common: both must stay alive to help someone who depends on them. Although Yamasaki initially believes that Kato is showing off, she eventually concludes that he genuinely wants to help others and joins him in killing a monster that was menacing civilians.\r\nA gigantic monster rises from the water, and an equally large mecha appears, piloted by seven-time winner Hachiro Oka. Oka kills the monster but loses his mecha. In the mean time, another monster kills the two most competent players on the Osaka team.\r\nOka, wounded and weaponless, engages the lead monster, who appears in a series of increasingly deadly forms. Oka appears to win but tells the team it is not dead; it must be taken by surprise to be truly killed. After rising, the demon easily defeats the remaining players. Suzuki is heavily wounded after sacrificing himself to save Kato, and Nishi's arm is ripped off. Instead of killing them, it goes after Oka, who has left to retrieve better weapons.\r\nKnowing someone must defeat the demon and Oka may be too wounded, Kato decides to act as a decoy while Reika and Yamasaki snipe the monster. Before the plan begins, Yamasaki proposes that, if she and Kato survive, they begin living together with her son and Kato's brother. Although surprised, Kato agrees, and they take their positions.\r\nThe demon, having killed Oka, attacks Kato, and the others shoot it from a distance. Before dying, the demon kills Yamasaki and maims Kato. Kato collapses beside Yamasaki's body, and the others assume him dead. Reika, Suzuki, and Nishi are surprised when he rejoins them at the base, where all are fully healed. When the scores are tallied up, Kato has 100 points; Reika and Suzuki encourage him to choose his freedom, but he resurrects Yamasaki. After Gantz allows them to leave, Kato races to his brother. The others reveal that this is Kato's second time in Gantz' game: after winning a previous game, he was granted his freedom and had his memories wiped. Although they do not know how he returned, they state that his self-sacrificing nature remains the same."
33896,Galactic Armored Fleet Majestic Prince: Genetic Awakening,"In the year 2110, humanity has expanded its frontier into space in its drive for new resources. Advances in genetic engineering research led to the establishment of the MJP (Military Junior Pre-Academy) project, which saw the creation of genetically enhanced humans known as the Evolved Children, developed with the intention of allowing humans to adapt to the new frontier. However, when Earth finds itself under threat by an alien race called the Wulgaru, human forces decide to have the evolved children deployed as soldiers to fight on the front lines.\r\nTeam Rabbits, a quintet of evolved children whose troubling lack of teamwork and common sense prevents them from reaching their true potential, are selected as test pilots of cutting edge mobile battle suits called AHSMB (Advanced High Standard Multipurpose Battle Device), powered by the Juria System, a cutting edge technology whose effectiveness in combat is increased according to the survival instincts of whoever uses it.Ep. 4 As they join forces to overcome their personal weaknesses and achieve their true potential, the members of Team Rabbits (nicknamed Majestic Princes because of their association with MJPEp. 2) eventually assume a key role in mankind's effort to thwart the alien invasion of Earth."


In [41]:
!zip -r /content/model.zip /content/output

  adding: content/output/ (stored 0%)
  adding: content/output/programming-model/ (stored 0%)
  adding: content/output/programming-model/sentence_bert_config.json (deflated 4%)
  adding: content/output/programming-model/config.json (deflated 44%)
  adding: content/output/programming-model/tokenizer_config.json (deflated 41%)
  adding: content/output/programming-model/tokenizer.json (deflated 71%)
  adding: content/output/programming-model/vocab.txt (deflated 53%)
  adding: content/output/programming-model/1_Pooling/ (stored 0%)
  adding: content/output/programming-model/1_Pooling/config.json (deflated 47%)
  adding: content/output/programming-model/modules.json (deflated 53%)
  adding: content/output/programming-model/config_sentence_transformers.json (deflated 26%)
  adding: content/output/programming-model/README.md (deflated 58%)
  adding: content/output/programming-model/special_tokens_map.json (deflated 42%)
  adding: content/output/programming-model/pytorch_model.bin (deflated 8%