# Importing Libraries


In [1]:
# Install all necessary libraries
import sys
#!{sys.executable} -m pip install setuptools-rust # https://www.rust-lang.org/tools/install
#!{sys.executable} -m pip install transformers # https://huggingface.co/docs/transformers/installation

# For the Hugging Face pipeline to work, you have to install either PyTorch or Tensorflow!
# For PyTorch:
#!{sys.executable} -m pip install torch torchvision torchaudio 
# For Tensorflow: https://www.tensorflow.org/install/pip

# Import the Transformers pipeline library
from transformers import pipeline

# Initializing Zero-Shot Classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [2]:
import pandas as pd
import numpy as np
import os
import pandas as pd
import warnings
pd.set_option('display.max_colwidth', None)
warnings.filterwarnings("ignore")

# Processing the Data


In [3]:
df = pd.read_csv("netflix_titles.csv")
df.head(2)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmmaker Kirsten Johnson stages his death in inventive and comical ways to help them both face the inevitable."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thabang Molaba, Dillon Windvogel, Natasha Thahane, Arno Greeff, Xolile Tshabalala, Getmore Sithole, Cindy Mahlangu, Ryle De Morny, Greteli Fincham, Sello Maake Ka-Ncube, Odwa Gwanya, Mekaila Mathys, Sandi Schultz, Duane Williams, Shamilla Miller, Patrick Mofokeng",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town teen sets out to prove whether a private-school swimming star is her sister who was abducted at birth."


In [4]:
# Select the 2 columns we need
df = df[['listed_in', 'description']]
# Split the text and get the length thereafter
df['text_length'] = df['description'].str.split().str.len()
# Sort the new dataframe by the text length
df2 = df.sort_values(by = ['text_length'], ascending = False)
df2.head()

Unnamed: 0,listed_in,description,text_length
4948,"Dramas, International Movies","An Indian village is abuzz with news that a major movie star is coming to town – even more so because the town barber went to school with him. But as the day of the actor's visit approaches, the humble barber begins to worry he won't be recognized.",48
6221,Children & Family Movies,"Half-dog, half-wolf Balto (voiced by Maurice LaMarche) and his wife proudly put their pups up for adoption to humans, but when nobody takes daughter Aleu because she looks too much like a wolf, she runs away to search for her place in the world.",44
215,"Action & Adventure, Dramas, International Movies","Based on a true story, this action film follows an incident that stunned a nation in the early 1990s. In Mumbai, India, the notorious gangster Maya holds off veteran cop Khan and a force of more than 200 policemen in a six-hour bloody gunfight.",44
593,"Children & Family Movies, Comedies","When a snow day shuts down the whole town, the Wheeler family cuts loose. Hal makes a play for the most popular girl in his school, 10-year-old Natalie takes on the dreaded snowplow man, and Dad gets into a showdown with a rival meteorologist.",44
5990,"Action & Adventure, International Movies","In the age of the Ming Dynasty, Quinglong is the best of the Jinyiwei, an elite assassin squad made up of highly trained former street urchins. When evil eunuch Jia unseats the emperor, Quinglong is called to action but is quickly betrayed.",42


In [5]:
# Create a new DF that has only products with more than text length of 40
df2 = df2[df2['text_length']>40]
# Reset the index of the new DF
df2 = df2.reset_index(drop = True)
df2

Unnamed: 0,listed_in,description,text_length
0,"Dramas, International Movies","An Indian village is abuzz with news that a major movie star is coming to town – even more so because the town barber went to school with him. But as the day of the actor's visit approaches, the humble barber begins to worry he won't be recognized.",48
1,Children & Family Movies,"Half-dog, half-wolf Balto (voiced by Maurice LaMarche) and his wife proudly put their pups up for adoption to humans, but when nobody takes daughter Aleu because she looks too much like a wolf, she runs away to search for her place in the world.",44
2,"Action & Adventure, Dramas, International Movies","Based on a true story, this action film follows an incident that stunned a nation in the early 1990s. In Mumbai, India, the notorious gangster Maya holds off veteran cop Khan and a force of more than 200 policemen in a six-hour bloody gunfight.",44
3,"Children & Family Movies, Comedies","When a snow day shuts down the whole town, the Wheeler family cuts loose. Hal makes a play for the most popular girl in his school, 10-year-old Natalie takes on the dreaded snowplow man, and Dad gets into a showdown with a rival meteorologist.",44
4,"Action & Adventure, International Movies","In the age of the Ming Dynasty, Quinglong is the best of the Jinyiwei, an elite assassin squad made up of highly trained former street urchins. When evil eunuch Jia unseats the emperor, Quinglong is called to action but is quickly betrayed.",42
5,"Action & Adventure, International Movies","Leader of the Ten Kwangtung Tigers, Tieh Chiao-san works with his kung fu students to protect his village from crime. But when an opium dealer starts to ruin the town, Tieh Chiao-san struggles to fight back, thanks to his growing opium addiction.",42
6,Dramas,"After one of his high school students attacks him, dedicated teacher Trevor Garfield grows weary of the gang warfare in the New York City school system and moves to California to teach there, thinking it must be a less hostile environment.",41
7,"Action & Adventure, Anime Features, Sci-Fi & Fantasy","When a group of vicious Sinners plots to escape from this eternal prison in Hell, they discover that Substitute Soul Reaper Ichigo is the key to their freedom. The Sinners launch an attack and in the process kidnap Ichigo's younger sister.",41
8,"Action & Adventure, Comedies, Dramas","Paul Newman proves he's still got prodigious acting chops, playing an aging bank robber who may have one last job in him. Faking a stroke, the incarcerated Newman is transferred to a nursing home and the care of a skeptical nurse.",41


# Code Implementation

In [6]:
# sequences_to_classify
sequences = df2.iloc[2]["description"]

# candidate_labels
labels = ["Actions", "Adventure", "Crime", "Violence", "Finance", "Food"]

# Running the classifier
classifier(sequences, labels, multi_label=True)

{'sequence': 'Based on a true story, this action film follows an incident that stunned a nation in the early 1990s. In Mumbai, India, the notorious gangster Maya holds off veteran cop Khan and a force of more than 200 policemen in a six-hour bloody gunfight.',
 'labels': ['Actions', 'Violence', 'Crime', 'Adventure', 'Finance', 'Food'],
 'scores': [0.8735317587852478,
  0.8443195819854736,
  0.4216251075267792,
  0.21568214893341064,
  0.004167867824435234,
  0.0024876173119992018]}

In [7]:
# Creating a function to call the ZSTC iteratively for all rows

def zeroshot(dataset, labels):
    """
    This function takes in a dataset with a text column and the corresponding support labels of the standardized label.
    The Zero-Shot Topic Classification algorithm will determine a confidence score for each support label.
    The corresponding confidence score given by the model will be added as a new column to the original dataset.
    """
    result = []
    all_dic = []
    for i in dataset["description"]:
        result.append(classifier(i, labels, multi_label=True))
    for j in range(len(result)):
        dic = {result[j]["labels"][i]: result[j]['scores'][i] for i in range(len(result[j]['scores']))}
        sorted_tuples = sorted(dic.items(), key=lambda item: item[1], reverse=True)
        all_dic.append(dict(sorted_tuples))
    return pd.concat([dataset, pd.DataFrame(all_dic)], axis=1)
    

In [8]:
# apply the zero shot topic classification algorithm
df3 = zeroshot(df2, labels)

In [9]:
df3

Unnamed: 0,listed_in,description,text_length,Adventure,Actions,Crime,Violence,Food,Finance
0,"Dramas, International Movies","An Indian village is abuzz with news that a major movie star is coming to town – even more so because the town barber went to school with him. But as the day of the actor's visit approaches, the humble barber begins to worry he won't be recognized.",48,0.32844,0.212973,0.055224,0.044739,0.003632,0.002722
1,Children & Family Movies,"Half-dog, half-wolf Balto (voiced by Maurice LaMarche) and his wife proudly put their pups up for adoption to humans, but when nobody takes daughter Aleu because she looks too much like a wolf, she runs away to search for her place in the world.",44,0.757345,0.349966,0.05372,0.030573,0.002486,0.004124
2,"Action & Adventure, Dramas, International Movies","Based on a true story, this action film follows an incident that stunned a nation in the early 1990s. In Mumbai, India, the notorious gangster Maya holds off veteran cop Khan and a force of more than 200 policemen in a six-hour bloody gunfight.",44,0.215682,0.873532,0.421625,0.84432,0.002488,0.004168
3,"Children & Family Movies, Comedies","When a snow day shuts down the whole town, the Wheeler family cuts loose. Hal makes a play for the most popular girl in his school, 10-year-old Natalie takes on the dreaded snowplow man, and Dad gets into a showdown with a rival meteorologist.",44,0.885316,0.900145,0.084415,0.589101,0.002241,0.010849
4,"Action & Adventure, International Movies","In the age of the Ming Dynasty, Quinglong is the best of the Jinyiwei, an elite assassin squad made up of highly trained former street urchins. When evil eunuch Jia unseats the emperor, Quinglong is called to action but is quickly betrayed.",42,0.394322,0.87105,0.213171,0.901674,0.002098,0.000886
5,"Action & Adventure, International Movies","Leader of the Ten Kwangtung Tigers, Tieh Chiao-san works with his kung fu students to protect his village from crime. But when an opium dealer starts to ruin the town, Tieh Chiao-san struggles to fight back, thanks to his growing opium addiction.",42,0.40152,0.566734,0.698439,0.393664,0.001336,0.000523
6,Dramas,"After one of his high school students attacks him, dedicated teacher Trevor Garfield grows weary of the gang warfare in the New York City school system and moves to California to teach there, thinking it must be a less hostile environment.",41,0.168099,0.420861,0.073064,0.869795,0.000919,0.003404
7,"Action & Adventure, Anime Features, Sci-Fi & Fantasy","When a group of vicious Sinners plots to escape from this eternal prison in Hell, they discover that Substitute Soul Reaper Ichigo is the key to their freedom. The Sinners launch an attack and in the process kidnap Ichigo's younger sister.",41,0.452236,0.532758,0.681314,0.903291,0.010378,0.015898
8,"Action & Adventure, Comedies, Dramas","Paul Newman proves he's still got prodigious acting chops, playing an aging bank robber who may have one last job in him. Faking a stroke, the incarcerated Newman is transferred to a nursing home and the care of a skeptical nurse.",41,0.177433,0.520176,0.56866,0.055478,0.00433,0.004166
