In [89]:
import re
import pickle
import pandas as pd
from flask import Flask, jsonify
from flask_restx import Api, Resource, fields
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from flask_cors import CORS

# Cargar los modelos y objetos necesarios
model = pickle.load(open('modelo_clasificacion_entrenado.pkl', 'rb'))
vect = pickle.load(open('vect.pkl', 'rb'))
tfidf = pickle.load(open('tfidf_transformer.pkl', 'rb'))

# Géneros disponibles
cols = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Family',
        'Fantasy', 'Film-Noir', 'History', 'Horror', 'Music', 'Musical', 'Mystery', 'News', 'Romance',
        'Sci-Fi', 'Short', 'Sport', 'Thriller', 'War', 'Western']

# Función de expansión de contracciones
def decontracted(phrase):
    # Específicas
    phrase = re.sub(r"won't", "will not", phrase)
    phrase = re.sub(r"can't", "can not", phrase)

    # Generales
    phrase = re.sub(r"n't", " not", phrase)
    phrase = re.sub(r"'re", " are", phrase)
    phrase = re.sub(r"'s", " is", phrase)
    phrase = re.sub(r"'d", " would", phrase)
    phrase = re.sub(r"'ll", " will", phrase)
    phrase = re.sub(r"'t", " not", phrase)
    phrase = re.sub(r"'ve", " have", phrase)
    phrase = re.sub(r"'m", " am", phrase)
    return phrase

stopwords= set(['br', 'the', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've",\
            "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', \
            'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their',\
            'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', \
            'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', \
            'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', \
            'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',\
            'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',\
            'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',\
            'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', \
            's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', \
            've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',\
            "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',\
            "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", \
            'won', "won't", 'wouldn', "wouldn't"])

# Crear la aplicación Flask
app = Flask(__name__)
CORS(app) 
#api = Api(app)

api = Api(
    app, 
    version='1.0', 
    title='Clasificación de generos de películas',
    description='Prediction API')


# Definir el namespace y el parser para los endpoints
namespace = api.namespace('predict', description='Clasificación de películas')

parser = api.parser()
parser.add_argument('plot', type=str, required=True, help='Trama de la película', location='args')

resource_fields = api.model('Resource', {
    'genres': fields.List(fields.String),
})

@namespace.route('/')
class ClasificationApi(Resource):

    @api.doc(parser=parser)
    def get(self):
        # Obtener los parámetros de entrada
        args = parser.parse_args()
        plot = args['plot']
        
        
        # Preprocesar los datos de entrada
        preprocessed_plot = decontracted(plot)
        preprocessed_plot = re.sub('[^A-Za-z]+', ' ', preprocessed_plot)
        preprocessed_plot = ' '.join(e.lower() for e in preprocessed_plot.split() if e.lower() not in stopwords)
        
        # Crear el DataFrame con los datos preprocesados
        df = pd.DataFrame({'Plot': [preprocessed_plot]})
        
        # Transformar los datos de entrada utilizando el vectorizador y el transformador TF-IDF
        X_test_dtm_1 = vect.transform(df['Plot'])
        X_test_dtm = tfidf.transform(X_test_dtm_1)
        
        # Realizar la predicción utilizando el modelo entrenado
        predicted_genres = model.predict_proba(X_test_dtm)
        
        # Obtener los géneros basados en las columnas proporcionadas
        movie_genres = [col for pred, col in zip(predicted_genres[0], cols) if pred >= 0.25]
                
        if not movie_genres:
            movie_genres = ["Sin Clasificación"]
        
        # Devolver los géneros como una lista
        return jsonify({'Genres': movie_genres})


if __name__ == '__main__':
    app.run(debug=True, use_reloader=False, host='0.0.0.0', port=5000)


 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Running on all addresses.
 * Running on http://172.20.10.3:5000/ (Press CTRL+C to quit)
172.20.10.3 - - [28/May/2023 13:21:08] "GET / HTTP/1.1" 200 -
172.20.10.3 - - [28/May/2023 13:21:09] "GET /swagger.json HTTP/1.1" 200 -
172.20.10.3 - - [28/May/2023 13:21:20] "GET /predict/?plot=american%20maxwell%20smart%20works%20for%20a%20government%20spy%20agency%20in%20an%20administrative%20capacity%20.%20%20when%20the%20agency%20%27%20s%20head%20office%20is%20attacked%20%2C%20%20the%20chief%20decides%20to%20assign%20maxwell%20as%20a%20spy%20and%20partners%20him%20with%20sexy%20agent%20%20N%20%20%2C%20%20much%20to%20her%20chagrin%20.%20%20the%20duo%20nevertheless%20set%20off%20to%20combat%20their%20attackers%20by%20first%20parachuting%20off%20an%20airplane%20and%20landing%20in%20russian%20territory%20%20-%20%20followed%20closely%20by%20an%20over%20seven%20feet%20tall%20%2C%20%20%20N%20%20pound%20goon%20%2C%20%20known%20simply%20as%20dalip%20.%20%20the%20duo%20%2C%20%20handicapped%20by%20maxw

In [5]:
# Procesamiento de los datos de Entrada
#Importación del modelo a utilizar 
model = pickle.load(open('modelo__clasificacion_entrenado.pkl', 'rb'))
vect = pickle.load(open('vect.pkl', 'rb'))
tfidf = pickle.load(open('tfidf_transformer.pkl', 'rb'))

# Función de expansión de contracciones
def decontracted(phrase):
    # Específicas
    phrase = re.sub(r"won't", "will not", phrase)
    phrase = re.sub(r"can't", "can not", phrase)

    # Generales
    phrase = re.sub(r"n't", " not", phrase)
    phrase = re.sub(r"'re", " are", phrase)
    phrase = re.sub(r"'s", " is", phrase)
    phrase = re.sub(r"'d", " would", phrase)
    phrase = re.sub(r"'ll", " will", phrase)
    phrase = re.sub(r"'t", " not", phrase)
    phrase = re.sub(r"'ve", " have", phrase)
    phrase = re.sub(r"'m", " am", phrase)
    return phrase

stopwords= set(['br', 'the', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've",\
            "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', \
            'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their',\
            'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', \
            'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', \
            'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', \
            'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',\
            'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',\
            'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',\
            'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', \
            's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', \
            've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',\
            "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',\
            "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", \
            'won', "won't", 'wouldn', "wouldn't"])


# Preprocesar los datos de entrada
preprocessed_plot = decontracted(plot)
preprocessed_plot = re.sub('[^A-Za-z]+', ' ', preprocessed_plot)
preprocessed_plot = ' '.join(e.lower() for e in preprocessed_plot.split() if e.lower() not in stopwords)
        
        # Crear el DataFrame con los datos preprocesados
df = pd.DataFrame({'Plot': [preprocessed_plot]})


In [70]:
plot = "in the distant future the world is in the grip of another ice age .  a city originally built to house five million people is now in its death throes as the relentlessly advancing glacier is slowly crushing the metropolis ' s steel infrastructure .  the relatively few surviving fur - clad inhabitants ,  perhaps thousands ,  perhaps only hundreds ,  drift aimlessly in their grim ,  drab world ,  awaiting their inevitable fate as they try to survive from day to day with scavenged firewood and minimal diet .  their only solaces are booza ,  an alcoholic drink distilled from moss ,  and quintet ,  a seemingly innocuous board game for six players .  the only other surviving mammals are roving packs of hungry mastiffs which roam the city ' s corridors and quickly dispose of the remains of the dead .  newly arrived from the south is essex with his pregnant wife vivia ,  seeking shelter in the doomed city only to find it populated by people middle - aged or older .  they had supported themselves by hunting seals ,  but now that the last of the aquatic mammals has been killed off ,  they seek shelter in the apartment of essex ' s brother ,  a renowned quintet player .  the new arrivals quickly learn that the game has a more sinister side ."


preprocessed_plot = decontracted(plot)
preprocessed_plot = re.sub('[^A-Za-z]+', ' ', preprocessed_plot)
preprocessed_plot = ' '.join(e.lower() for e in preprocessed_plot.split() if e.lower() not in stopwords)

In [71]:
preprocessed_plot

'distant future world grip another ice age city originally built house five million people death throes relentlessly advancing glacier slowly crushing metropolis steel infrastructure relatively surviving fur clad inhabitants perhaps thousands perhaps hundreds drift aimlessly grim drab world awaiting inevitable fate try survive day day scavenged firewood minimal diet solaces booza alcoholic drink distilled moss quintet seemingly innocuous board game six players surviving mammals roving packs hungry mastiffs roam city corridors quickly dispose remains dead newly arrived south essex pregnant wife vivia seeking shelter doomed city find populated people middle aged older supported hunting seals last aquatic mammals killed seek shelter apartment essex brother renowned quintet player new arrivals quickly learn game sinister side'

In [72]:
# Procesamiento de los datos de Entrada
#Importación del modelo a utilizar 
model = pickle.load(open('modelo__clasificacion_entrenado.pkl', 'rb'))
vect = pickle.load(open('vect.pkl', 'rb'))
tfidf = pickle.load(open('tfidf_transformer.pkl', 'rb'))

In [73]:
        # Crear el DataFrame con los datos preprocesados
df = pd.DataFrame({'Plot': [preprocessed_plot]})
        
        # Transformar los datos de entrada utilizando el vectorizador y el transformador TF-IDF
X_test_dtm_1 = vect.transform(df['Plot'])
X_test_dtm = tfidf.transform(X_test_dtm_1)
        
        # Realizar la predicción utilizando el modelo entrenado
predicted_genres = model.predict_proba(X_test_dtm)

In [74]:
print(predicted_genres)

[[0.13220793 0.10337998 0.03000895 0.03553452 0.16020853 0.10902483
  0.05605391 0.64903071 0.0579415  0.06861865 0.01861578 0.03170106
  0.11811061 0.02944152 0.02343625 0.28558367 0.00083139 0.09073283
  0.34142068 0.01198829 0.03094736 0.22478141 0.034449   0.02263806]]


In [75]:
cols = ['p_Action', 'p_Adventure', 'p_Animation', 'p_Biography', 'p_Comedy', 'p_Crime', 'p_Documentary', 'p_Drama', 'p_Family',
        'p_Fantasy', 'p_Film-Noir', 'p_History', 'p_Horror', 'p_Music', 'p_Musical', 'p_Mystery', 'p_News', 'p_Romance',
        'p_Sci-Fi', 'p_Short', 'p_Sport', 'p_Thriller', 'p_War', 'p_Western']

In [76]:
movie_genres = [col for pred, col in zip(predicted_genres[0], cols) if pred >= 0.25]

In [77]:
movie_genres

['p_Drama', 'p_Mystery', 'p_Sci-Fi']

In [49]:
class_indices = np.argmax(movie_genres)

In [50]:
class_indices

1

In [47]:
import numpy as np