# Zero Shot Learning (ZSL)

In [1]:
import pandas as pd
import flair
import re
from flair.models import SequenceTagger
from itertools import islice


In [46]:
df = pd.read_csv("data/books_def_small.csv")

In [47]:
df.head()

Unnamed: 0,book_authors,book_desc,book_rating,book_title,genres
0,Suzanne Collins,Winning will make you famous. Losing means cer...,4.33,The Hunger Games,Young Adult
1,J.K. Rowling|Mary GrandPré,There is a door at the end of a silent corrido...,4.48,Harry Potter and the Order of the Phoenix,Fantasy
2,Harper Lee,The unforgettable novel of a childhood in a sl...,4.27,To Kill a Mockingbird,Classics
3,Stephenie Meyer,About three things I was absolutely positive.F...,3.58,Twilight,Young Adult
4,Markus Zusak,Trying to make sense of the horrors of World W...,4.36,The Book Thief,Historical


In [48]:
print(df.shape)


(26484, 5)


In [49]:
df.book_desc = df.book_desc.astype(str)
df.genres = df.genres.astype(str)

In [50]:
#genre subsets
df_gb = [x for _, x in df.groupby('genres')]
df_fantasy = df_gb[1].copy()
df_mistery = df_gb[4].copy()
df_scifi = df_gb[7].copy()
df_romance = df_gb[6].copy()


In [51]:
df_fantasy

Unnamed: 0,book_authors,book_desc,book_rating,book_title,genres
1,J.K. Rowling|Mary GrandPré,There is a door at the end of a silent corrido...,4.48,Harry Potter and the Order of the Phoenix,Fantasy
5,C.S. Lewis|Pauline Baynes,"Journeys to the end of the world, fantastic cr...",4.25,The Chronicles of Narnia,Fantasy
23,Cassandra Clare|Franca Fritz|Heinrich Koop|Fra...,When fifteen-year-old Clary Fray heads out to ...,4.11,City of Bones,Fantasy
34,William Goldman,What happens when the most beautiful girl in t...,4.26,The Princess Bride,Fantasy
38,Madeleine L'Engle,"It was a dark and stormy night; Meg Murry, her...",4.02,A Wrinkle in Time,Fantasy
...,...,...,...,...,...
26453,Lev Grossman,"Two novels from ""New York Times"" bestselling a...",4.26,The Magicians and the Magician King,Fantasy
26465,Sherrilyn Kenyon|Dianna Love,The Medb coven sends a team of deadly Svart Tr...,4.23,The Curse,Fantasy
26467,Sherrilyn Kenyon|Dianna Love,In this explosive new world of betrayals and s...,4.19,Alterant,Fantasy
26469,Sherrilyn Kenyon,Two enemy Were-Panthers discover that fate has...,4.27,Winter Born,Fantasy


### Genre prediction

In [8]:
from flair.models import TARSClassifier
from flair.data import Sentence

In [24]:
# TARS: Task-aware representation of sentences
tars = TARSClassifier.load('tars-base')
existing_tasks = tars.list_existing_tasks()
existing_tasks

2022-05-31 19:02:46,059 loading file C:\Users\Erica\.flair\models\tars-base-v8.pt


{'AGNews',
 'Amazon',
 'DBPedia',
 'GO_EMOTIONS',
 'IMDB',
 'NEWS_CATEGORY',
 'SST',
 'TREC_6',
 'Yelp'}

In [25]:
genre = df.genres.unique()

In [26]:
genre

array(['Young Adult', 'Fantasy', 'Classics', 'Historical',
       'Science Fiction', 'Fiction', 'Nonfiction', 'Romance', 'Mystery',
       'Sequential Art'], dtype=object)

In [27]:
df_pred_genres = df[:5].copy()
df_pred_genres

Unnamed: 0,book_authors,book_desc,book_rating,book_title,genres
0,Suzanne Collins,Winning will make you famous. Losing means cer...,4.33,The Hunger Games,Young Adult
1,J.K. Rowling|Mary GrandPré,There is a door at the end of a silent corrido...,4.48,Harry Potter and the Order of the Phoenix,Fantasy
2,Harper Lee,The unforgettable novel of a childhood in a sl...,4.27,To Kill a Mockingbird,Classics
3,Stephenie Meyer,About three things I was absolutely positive.F...,3.58,Twilight,Young Adult
4,Markus Zusak,Trying to make sense of the horrors of World W...,4.36,The Book Thief,Historical


In [30]:
def genre_tagger(summary, genre, tars):
    s = Sentence(summary)
    tars.predict_zero_shot(s, genre, multi_label=True)
    dict_tag = {}
    
    for label in s.labels:
        dict_tag[label.value] = label.score
        print (label)
    sorted_tag = sorted(dict_tag, key=dict_tag.get, reverse=True)[:3]
    
    return [sorted_tag[0], sorted_tag[1],sorted_tag[2]]

In [31]:
df_pred_genres['tag_1'], df_pred_genres['tag_2'], df_pred_genres['tag_3'] = zip(*df_pred_genres["book_desc"].apply(lambda x: genre_tagger(x, genre, tars)))

IndexError: list index out of range

### Another ZSL model

In [9]:
import torch
torch.cuda.is_available()

False

In [10]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification")


No model was supplied, defaulted to facebook/bart-large-mnli (https://huggingface.co/facebook/bart-large-mnli)


In [11]:
def genre_tagger_transformer(summary, genre, classifier):
    genres = classifier(summary, genre)["labels"]
    return [genres[0], genres[1],genres[2]]

In [22]:
df_seq_art['tag_1'], df_seq_art['tag_2'], df_seq_art['tag_3'] = zip(*df_seq_art["book_desc"].apply(lambda x: genre_tagger_transformer(x, genre, classifier)))

KeyboardInterrupt: 

In [48]:
df_classics.to_csv("data/zsl_seq_art.csv")

# NER & ZSL

In [52]:
tagger = SequenceTagger.load('ner')

2022-05-31 19:19:18,573 loading file C:\Users\Erica\.flair\models\ner-english\4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
2022-05-31 19:19:21,027 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


In [55]:
def ner_extraction(summary): 
    sentence = Sentence(summary)
    tagger.predict(sentence)
    misc = []
    loc = []
    org = []
    per = []
    for label in sentence.get_labels('ner'):
        if label.value == "MISC": 
            misc.append(label.data_point.text)
        elif label.value == "LOC":
            loc.append(label.data_point.text)
        elif label.value == "ORG":
            org.append(label.data_point.text)
        elif label.value == "PER":
            per.append(label.data_point.text)
    return [misc, loc, org, per]

In [None]:
df_fantasy['MISC'], df_fantasy['LOC'], df_fantasy['ORG'], df_fantasy["PER"] = zip(*df_fantasy["book_desc"].apply(lambda x: ner_extraction(x)))

In [None]:
df_fantasy

In [None]:
df_fantasy.to_csv("data/ner_fantasy.csv")