In [2]:
!pip install flair
!pip install transformers==3.1.0

# Zero Shot Learning (ZSL)

In [77]:
import pandas as pd
import flair
import tqdm

from flair.models import SequenceTagger
from itertools import islice


In [78]:
df = pd.read_csv("to_ZSL.csv")

In [79]:
df.head()

Unnamed: 0,book_authors,book_title,book_desc,genre
0,Suzanne Collins,The Hunger Games,Winning will make you famous. Losing means cer...,Young Adult
1,J.K. Rowling|Mary GrandPré,Harry Potter and the Order of the Phoenix,There is a door at the end of a silent corrido...,Fantasy
2,Harper Lee,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,Classics
3,Stephenie Meyer,Twilight,About three things I was absolutely positive.F...,Young Adult
4,Markus Zusak,The Book Thief,Trying to make sense of the horrors of World W...,Historical


In [80]:
print(df.shape)


(43793, 4)


### Genre prediction

In [81]:
from flair.models import TARSClassifier
from flair.data import Sentence

In [82]:
# TARS: Task-aware representation of sentences
tars = TARSClassifier.load('tars-base')
existing_tasks = tars.list_existing_tasks()
existing_tasks

2022-05-24 15:44:58,325 loading file /root/.flair/models/tars-base-v8.pt


{'AGNews',
 'Amazon',
 'DBPedia',
 'GO_EMOTIONS',
 'IMDB',
 'NEWS_CATEGORY',
 'SST',
 'TREC_6',
 'Yelp'}

In [181]:
# we try a different - smaller - taxonomy
#genres_list = ['Books', 'Travel', "Children's", 'Science', 'Medical', 'Health,', 'Fitness', 'Dieting', 'Fiction', 'Business', 'Money', 'Crafts,', 'Hobbies', 'Home', 'Math', 'Christian', 'Bibles', 'Cookbooks,', 'Food', 'Wine', 'Computers', 'Technology', 'Literature', 'Religion', 'Spirituality', 'Teen', 'Young', 'Adult', 'Law', 'Humor', 'Entertainment', 'History', 'Arts', 'Photography', 'Sports', 'Outdoors', 'Romance', 'Biographies', 'Memoirs', 'Fantasy', 'Politics', 'Social', 'Sciences', 'Reference', 'Comics', 'Graphic', 'Novels', 'Test', 'Preparation', 'Self-Help', 'Engineering', 'Transportation', 'Calendars', 'Parenting', 'Relationships', 'Mystery,', 'Thriller', 'Suspense', 'Education', 'Teaching', 'Gay', 'Lesbian']

In [83]:
genre = df.genre.unique()

In [89]:
df_pred_genres = df[:5].copy()
df_pred_genres

Unnamed: 0,book_authors,book_title,book_desc,genre
0,Suzanne Collins,The Hunger Games,Winning will make you famous. Losing means cer...,Young Adult
1,J.K. Rowling|Mary GrandPré,Harry Potter and the Order of the Phoenix,There is a door at the end of a silent corrido...,Fantasy
2,Harper Lee,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,Classics
3,Stephenie Meyer,Twilight,About three things I was absolutely positive.F...,Young Adult
4,Markus Zusak,The Book Thief,Trying to make sense of the horrors of World W...,Historical


In [184]:
def genre_tagger(summary, genre, tars):
  s = Sentence(summary)
  tars.predict_zero_shot(s, genre, multi_label=True)
  dict_tag = {}
  for label in s.labels:
      dict_tag[label.value] = label.score
  sorted_tag = sorted(dict_tag, key=dict_tag.get, reverse=True)[:3]
  return [sorted_tag[0], sorted_tag[1],sorted_tag[2]]

In [185]:
df_pred_genres['tag_1'], df_pred_genres['tag_2'], df_pred_genres['tag_3'] = zip(*df_pred_genres["book_desc"].apply(lambda x: genre_tagger(x, genre, tars)))

In [186]:
df_pred_genres

Unnamed: 0,book_authors,book_title,book_desc,genre,tag_1,tag_2,tag_3
0,Suzanne Collins,The Hunger Games,Winning will make you famous. Losing means cer...,Young Adult,Politics,Retellings,Law
1,J.K. Rowling|Mary GrandPré,Harry Potter and the Order of the Phoenix,There is a door at the end of a silent corrido...,Fantasy,True Story,Realistic Fiction,Magical Realism
2,Harper Lee,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,Classics,True Story,Book Club,Writing
3,Stephenie Meyer,Twilight,About three things I was absolutely positive.F...,Young Adult,True Story,Love Inspired,Love


### Another ZSL model

In [85]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification")


In [90]:
def genre_tagger_transformer(summary, genre, classifier):
  genres = classifier(summary, candidate_labels)["labels"]
  return [genres[0], genres[1],genres[2]]

In [91]:
df_pred_genres['tag_1'], df_pred_genres['tag_2'], df_pred_genres['tag_3'] = zip(*df_pred_genres["book_desc"].apply(lambda x: genre_tagger_transformer(x, genre, classifier)))

In [92]:
df_pred_genres

Unnamed: 0,book_authors,book_title,book_desc,genre,tag_1,tag_2,tag_3
0,Suzanne Collins,The Hunger Games,Winning will make you famous. Losing means cer...,Young Adult,young adult,politics,novel
1,J.K. Rowling|Mary GrandPré,Harry Potter and the Order of the Phoenix,There is a door at the end of a silent corrido...,Fantasy,young adult,novel,fantasy
2,Harper Lee,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,Classics,novel,young adult,politics
3,Stephenie Meyer,Twilight,About three things I was absolutely positive.F...,Young Adult,novel,young adult,fantasy
4,Markus Zusak,The Book Thief,Trying to make sense of the horrors of World W...,Historical,novel,young adult,fantasy


# NER & ZSL

In [11]:
tagger = SequenceTagger.load('ner')

Downloading:   0%|          | 0.00/432M [00:00<?, ?B/s]

2022-05-24 14:50:29,451 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
2022-05-24 14:50:31,905 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


In [52]:
def ner_extraction(summary): 
  sentence = Sentence(summary)
  tagger.predict(sentence)
  misc = []
  loc = []
  org = []
  per = []
  for label in sentence.get_labels('ner'):
    if label.value == "MISC": 
      misc.append(label.data_point.text)
    elif label.value == "LOC":
      loc.append(label.data_point.text)
    elif label.value == "ORG":
      org.append(label.data_point.text)
    elif label.value == "PER":
      per.append(label.data_point.text)
 
  return [misc, loc, org, per]

In [53]:
df_pred_genres['MISC'], df_pred_genres['LOC'], df_pred_genres['ORG'], df_pred_genres["PER"] = zip(*df_pred_genres["book_desc"].apply(lambda x: ner_extraction(x)))

In [54]:
df_pred_genres

Unnamed: 0,book_authors,book_title,book_desc,genre,ner,MISC,LOC,ORG,PER
0,Suzanne Collins,The Hunger Games,Winning will make you famous. Losing means cer...,Young Adult,"{'PER': ['Katniss', 'Prim', 'Katniss', 'Peeta'...","[Panem, Hunger Games, Reaping]","[North America, Capitol, Capitol]","[Capitol, District 12]","[Katniss, Prim, Katniss, Peeta, Katniss]"
1,J.K. Rowling|Mary GrandPré,Harry Potter and the Order of the Phoenix,There is a door at the end of a silent corrido...,Fantasy,"{'PER': ['Harry Pottter', 'Harry', 'Harry'], '...",[He-Who-Must-Not-Be-Named],[Hogwarts],"[Hogwarts, Gryffindor Quidditch team]","[Harry Pottter, Harry, Harry]"
2,Harper Lee,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,Classics,"{'PER': ['Harper Lee'], 'ORG': [], 'MISC': ['S...","[Southern, To Kill A Mockingbird, Pulitzer Pri...",[Alabama],[],[Harper Lee]
3,Stephenie Meyer,Twilight,About three things I was absolutely positive.F...,Young Adult,"{'PER': ['Edward', 'Stephenie Meyer', 'Bella S...",[Twilight Saga],[],[],"[Edward, Stephenie Meyer, Bella Swan, Edward C..."
