# Prediccion del Genero de una Pelicula

## Solucion Zero Shot Classification

In [1]:
#!pip install transformers --quiet

In [2]:
import pandas as pd
df_train = pd.read_parquet('https://github.com/amiune/freecodingtour/raw/main/cursos/espanol/deeplearning/data/train.parquet', engine='pyarrow')
df_test = pd.read_parquet('https://github.com/amiune/freecodingtour/raw/main/cursos/espanol/deeplearning/data/test.parquet', engine='pyarrow')

In [3]:
genres = list(df_train.genre.unique())
genres

['fantasy',
 'horror',
 'family',
 'scifi',
 'action',
 'crime',
 'adventure',
 'mystery',
 'romance',
 'thriller']

In [4]:
df_train["text"] = df_train.movie_name.str.lower() + " " + df_train.synopsis.str.lower()
df_train.iloc[0,:]["text"]

'super me a young scriptwriter starts bringing valuable objects back from his short nightmares of being chased by a demon. selling them makes him rich.'

## Zero shot classification

In [5]:
from transformers import pipeline

In [13]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)

In [11]:
sequence_to_classify = df_train.iloc[0,:]["text"]
candidate_labels = genres
classifier(sequence_to_classify, candidate_labels)

{'sequence': 'super me a young scriptwriter starts bringing valuable objects back from his short nightmares of being chased by a demon. selling them makes him rich.',
 'labels': ['action',
  'scifi',
  'adventure',
  'thriller',
  'fantasy',
  'horror',
  'mystery',
  'crime',
  'family',
  'romance'],
 'scores': [0.27660876512527466,
  0.16218248009681702,
  0.15024444460868835,
  0.12439803034067154,
  0.10515356808900833,
  0.10134987533092499,
  0.04093746095895767,
  0.01950187422335148,
  0.010038221254944801,
  0.009585333988070488]}

In [8]:
N = 10
accuracy = 0
for i in range(N):
    result = classifier(df_train.iloc[i,:]["text"],genres)
    print(f"{i}: Real:{df_train.iloc[i,:]['genre']} Predicted:{result['labels'][0]}")
    if df_train.iloc[i,:]['genre'] == result['labels'][0]:
        accuracy += 1

0: Real:fantasy Predicted:action
1: Real:horror Predicted:mystery
2: Real:family Predicted:family
3: Real:scifi Predicted:action
4: Real:action Predicted:action
5: Real:horror Predicted:mystery
6: Real:horror Predicted:action
7: Real:crime Predicted:crime
8: Real:adventure Predicted:action
9: Real:mystery Predicted:mystery


In [9]:
print(f"Total accuracy:{accuracy/N}")

Total accuracy:0.4


In [45]:
df_test["text"] = df_test.movie_name.str.lower() + " " + df_test.synopsis.str.lower()
df_test.head()

Unnamed: 0,id,movie_name,synopsis,genre,text
0,16863,A Death Sentence,"12 y.o. ida's dad'll die without a dkk1,500,00...",action,a death sentence 12 y.o. ida's dad'll die with...
1,48456,Intermedio,a group of four teenage friends become trapped...,action,intermedio a group of four teenage friends bec...
2,41383,30 Chua Phai Tet,a guy left his home for 12 years till he came ...,action,30 chua phai tet a guy left his home for 12 ye...
3,84007,Paranoiac,a man long believed dead returns to the family...,action,paranoiac a man long believed dead returns to ...
4,40269,Ordinary Happiness,"after a deadly accident, paolo comes back on e...",action,"ordinary happiness after a deadly accident, pa..."


In [None]:
prediction = []
for text in df_test["text"]:
    result = classifier(text,genres)
    prediction.append(result['labels'][0])
    #print(text)
    #print(prediction[i])

In [None]:
len(prediction)

In [67]:
import pickle
pickle.dump(prediction, open("zeroshot_pred.pkl","wb"))

In [None]:
pred = pickle.load(open("zeroshot_pred.pkl","rb"))
len(pred)

In [None]:
pred

In [None]:
df_submission = pd.DataFrame({"id":df_test["id"],"genre":prediction})

In [None]:
df_submission.to_csv("submission3.csv", index=False)

# Fin: [Volver al contenido del curso](https://www.freecodingtour.com/cursos/espanol/deeplearning/deeplearning.html)