---

# Imports

In [291]:
import pandas as pd
import numpy as np
import requests
import time
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from wordcloud import WordCloud
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer
from pycaret.clustering import *
import nltk


# Read Data

In [292]:
path_to_export = '/Users/henricobela/Desktop/Estudos/Github/Faculdade/FIAP/Segundo_ano/2o_semestre/AUTOML/Checkpoint_2_webapp_recomendacao/data' #mac
df = pd.read_csv(f'{path_to_export}/all_movies.csv', sep = ';')

df = df[["title_pt", "title_en", "year", "rating", "genre", "sinopse"]]

print(df.shape)
df.head()

(357, 6)


Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse
0,Besouro Azul,1. Besouro Azul,2023,6.8 (17K),Ação,"Jaime Reyes, um adolescente de origem mexicana..."
1,Barbie,2. Barbie,2023,7.4 (255K),Aventura,Viver na Terra da Barbie é ser um ser perfeito...
2,Oppenheimer,3. Oppenheimer,2023,8.6 (381K),Biografia,A história do cientista americano J. Robert Op...
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023,7.5 (19K),Animação,Os irmãos Tartaruga trabalham para conquistar ...
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023,6.4 (3.2K),Animação,Um cachorro abandonado se une a outros vira-la...


# Processing the Data

In [293]:
df['year'] = df['year'].astype('int64')
df["rating"] = df["rating"].apply(lambda x: x.split("\xa0")[0] if not isinstance(x, float) else x)
idx = df[df.rating.isin(["Rate"])].index.tolist()
df.loc[idx, "rating"] = 6.0
df['rating'] = df['rating'].astype('float64')
df.rating.fillna(6.0, inplace = True)
df.dropna(inplace = True)
df

Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse
0,Besouro Azul,1. Besouro Azul,2023,6.8,Ação,"Jaime Reyes, um adolescente de origem mexicana..."
1,Barbie,2. Barbie,2023,7.4,Aventura,Viver na Terra da Barbie é ser um ser perfeito...
2,Oppenheimer,3. Oppenheimer,2023,8.6,Biografia,A história do cientista americano J. Robert Op...
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023,7.5,Animação,Os irmãos Tartaruga trabalham para conquistar ...
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023,6.4,Animação,Um cachorro abandonado se une a outros vira-la...
...,...,...,...,...,...,...
352,Meus Sogros Tão pro Crime,Meus Sogros Tão pro Crime,2023,5.4,Ação,Um conservador gerente de banco prestes a se c...
353,Dunkirk,Dunkirk,2017,7.8,Ação,"Soldados aliados da Bélgica, do Império Britân..."
354,Guardiões da Galáxia,Guardiões da Galáxia,2014,8.0,Ação,Um grupo de criminosos intergalácticos deve se...
355,Titanic,Titanic,1997,7.9,Drama,Uma aristocrata de dezessete anos se apaixona ...


In [294]:
df_processed = df.copy()
df_processed['sinopse'] = df_processed['sinopse'].str.lower()
df_processed['sinopse'].head()


0    jaime reyes, um adolescente de origem mexicana...
1    viver na terra da barbie é ser um ser perfeito...
2    a história do cientista americano j. robert op...
3    os irmãos tartaruga trabalham para conquistar ...
4    um cachorro abandonado se une a outros vira-la...
Name: sinopse, dtype: object

In [295]:
def qty_words(text):
    words = text.split()
    word_count = len(words)
    return word_count

In [296]:
df_processed['word_count'] = df_processed['sinopse'].apply(qty_words).astype('int64')
df_processed.head()

Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse,word_count
0,Besouro Azul,1. Besouro Azul,2023,6.8,Ação,"jaime reyes, um adolescente de origem mexicana...",24
1,Barbie,2. Barbie,2023,7.4,Aventura,viver na terra da barbie é ser um ser perfeito...,29
2,Oppenheimer,3. Oppenheimer,2023,8.6,Biografia,a história do cientista americano j. robert op...,17
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023,7.5,Animação,os irmãos tartaruga trabalham para conquistar ...,19
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023,6.4,Animação,um cachorro abandonado se une a outros vira-la...,15


In [297]:
nltk.download('stopwords')
stopwords = nltk.corpus.stopwords.words('portuguese')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/henricobela/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [298]:
df_processed['sinopse_no_stopwords'] = df_processed['sinopse'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stopwords)]))

In [299]:
df_processed

Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse,word_count,sinopse_no_stopwords
0,Besouro Azul,1. Besouro Azul,2023,6.8,Ação,"jaime reyes, um adolescente de origem mexicana...",24,"jaime reyes, adolescente origem mexicana encon..."
1,Barbie,2. Barbie,2023,7.4,Aventura,viver na terra da barbie é ser um ser perfeito...,29,viver terra barbie perfeito lugar perfeito. me...
2,Oppenheimer,3. Oppenheimer,2023,8.6,Biografia,a história do cientista americano j. robert op...,17,história cientista americano j. robert oppenhe...
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023,7.5,Animação,os irmãos tartaruga trabalham para conquistar ...,19,irmãos tartaruga trabalham conquistar amor cid...
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023,6.4,Animação,um cachorro abandonado se une a outros vira-la...,15,cachorro abandonado une outros vira-latas ving...
...,...,...,...,...,...,...,...,...
352,Meus Sogros Tão pro Crime,Meus Sogros Tão pro Crime,2023,5.4,Ação,um conservador gerente de banco prestes a se c...,33,conservador gerente banco prestes casar amor v...
353,Dunkirk,Dunkirk,2017,7.8,Ação,"soldados aliados da bélgica, do império britân...",25,"soldados aliados bélgica, império britânico fr..."
354,Guardiões da Galáxia,Guardiões da Galáxia,2014,8.0,Ação,um grupo de criminosos intergalácticos deve se...,20,grupo criminosos intergalácticos deve unir imp...
355,Titanic,Titanic,1997,7.9,Drama,uma aristocrata de dezessete anos se apaixona ...,20,aristocrata dezessete anos apaixona artista ge...


# Preparing Data to Model

In [300]:
vectorizer = TfidfVectorizer(sublinear_tf=True, min_df=5, max_df=0.95)
X = vectorizer.fit_transform(df_processed['sinopse_no_stopwords'])
X.shape

(355, 154)

In [301]:
X.toarray()

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.75043195, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [302]:
df_all = pd.concat([
                    df_processed,
                    pd.DataFrame(X.toarray())
                   ]
                   , axis = 1
                   )
df_all

Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse,word_count,sinopse_no_stopwords,0,1,...,144,145,146,147,148,149,150,151,152,153
0,Besouro Azul,1. Besouro Azul,2023.0,6.8,Ação,"jaime reyes, um adolescente de origem mexicana...",24.0,"jaime reyes, adolescente origem mexicana encon...",0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
1,Barbie,2. Barbie,2023.0,7.4,Aventura,viver na terra da barbie é ser um ser perfeito...,29.0,viver terra barbie perfeito lugar perfeito. me...,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.750432,0.0,0.0
2,Oppenheimer,3. Oppenheimer,2023.0,8.6,Biografia,a história do cientista americano j. robert op...,17.0,história cientista americano j. robert oppenhe...,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023.0,7.5,Animação,os irmãos tartaruga trabalham para conquistar ...,19.0,irmãos tartaruga trabalham conquistar amor cid...,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023.0,6.4,Animação,um cachorro abandonado se une a outros vira-la...,15.0,cachorro abandonado une outros vira-latas ving...,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354,Guardiões da Galáxia,Guardiões da Galáxia,2014.0,8.0,Ação,um grupo de criminosos intergalácticos deve se...,20.0,grupo criminosos intergalácticos deve unir imp...,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
355,Titanic,Titanic,1997.0,7.9,Drama,uma aristocrata de dezessete anos se apaixona ...,20.0,aristocrata dezessete anos apaixona artista ge...,,,...,,,,,,,,,,
356,Você Não Tá Convidada pro Meu Bat Mitzvá!,Você Não Tá Convidada pro Meu Bat Mitzvá!,2023.0,6.0,Comédia,stacey friedman se prepara para seu bat mitzva...,19.0,"stacey friedman prepara bat mitzvah, planos de...",,,...,,,,,,,,,,
275,,,,,,,,,0.0,0.0,...,0.0,0.0,0.301708,0.0,0.0,0.0,0.0,0.000000,0.0,0.0


In [303]:
df_all['movieID'] = df_all.index
df_all.columns = df_all.columns.astype(str)

In [304]:
df_all

Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse,word_count,sinopse_no_stopwords,0,1,...,145,146,147,148,149,150,151,152,153,movieID
0,Besouro Azul,1. Besouro Azul,2023.0,6.8,Ação,"jaime reyes, um adolescente de origem mexicana...",24.0,"jaime reyes, adolescente origem mexicana encon...",0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0
1,Barbie,2. Barbie,2023.0,7.4,Aventura,viver na terra da barbie é ser um ser perfeito...,29.0,viver terra barbie perfeito lugar perfeito. me...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.750432,0.0,0.0,1
2,Oppenheimer,3. Oppenheimer,2023.0,8.6,Biografia,a história do cientista americano j. robert op...,17.0,história cientista americano j. robert oppenhe...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,2
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023.0,7.5,Animação,os irmãos tartaruga trabalham para conquistar ...,19.0,irmãos tartaruga trabalham conquistar amor cid...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,3
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023.0,6.4,Animação,um cachorro abandonado se une a outros vira-la...,15.0,cachorro abandonado une outros vira-latas ving...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354,Guardiões da Galáxia,Guardiões da Galáxia,2014.0,8.0,Ação,um grupo de criminosos intergalácticos deve se...,20.0,grupo criminosos intergalácticos deve unir imp...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,354
355,Titanic,Titanic,1997.0,7.9,Drama,uma aristocrata de dezessete anos se apaixona ...,20.0,aristocrata dezessete anos apaixona artista ge...,,,...,,,,,,,,,,355
356,Você Não Tá Convidada pro Meu Bat Mitzvá!,Você Não Tá Convidada pro Meu Bat Mitzvá!,2023.0,6.0,Comédia,stacey friedman se prepara para seu bat mitzva...,19.0,"stacey friedman prepara bat mitzvah, planos de...",,,...,,,,,,,,,,356
275,,,,,,,,,0.0,0.0,...,0.0,0.301708,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,275


In [305]:
df_all.dropna(inplace = True)
df_all

Unnamed: 0,title_pt,title_en,year,rating,genre,sinopse,word_count,sinopse_no_stopwords,0,1,...,145,146,147,148,149,150,151,152,153,movieID
0,Besouro Azul,1. Besouro Azul,2023.0,6.8,Ação,"jaime reyes, um adolescente de origem mexicana...",24.0,"jaime reyes, adolescente origem mexicana encon...",0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0
1,Barbie,2. Barbie,2023.0,7.4,Aventura,viver na terra da barbie é ser um ser perfeito...,29.0,viver terra barbie perfeito lugar perfeito. me...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.750432,0.0,0.0,1
2,Oppenheimer,3. Oppenheimer,2023.0,8.6,Biografia,a história do cientista americano j. robert op...,17.0,história cientista americano j. robert oppenhe...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,2
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023.0,7.5,Animação,os irmãos tartaruga trabalham para conquistar ...,19.0,irmãos tartaruga trabalham conquistar amor cid...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,3
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023.0,6.4,Animação,um cachorro abandonado se une a outros vira-la...,15.0,cachorro abandonado une outros vira-latas ving...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,Nefarious,Nefarious,2023.0,6.4,Terror,"no dia de sua execução, um assassino em série ...",35.0,"dia execução, assassino série passa avaliação ...",0.0,0.0,...,0.0,0.507967,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,350
351,Beau Tem Medo,Beau Tem Medo,2023.0,6.8,Comédia,"após a morte repentina de sua mãe, um homem ge...",32.0,"após morte repentina mãe, homem gentil atormen...",0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,351
352,Meus Sogros Tão pro Crime,Meus Sogros Tão pro Crime,2023.0,5.4,Ação,um conservador gerente de banco prestes a se c...,33.0,conservador gerente banco prestes casar amor v...,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,352
353,Dunkirk,Dunkirk,2017.0,7.8,Ação,"soldados aliados da bélgica, do império britân...",25.0,"soldados aliados bélgica, império britânico fr...",0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,353


In [306]:
df_all = df_all.drop(columns = ["sinopse", "sinopse_no_stopwords"])

In [307]:
df_all

Unnamed: 0,title_pt,title_en,year,rating,genre,word_count,0,1,2,3,...,145,146,147,148,149,150,151,152,153,movieID
0,Besouro Azul,1. Besouro Azul,2023.0,6.8,Ação,24.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0
1,Barbie,2. Barbie,2023.0,7.4,Aventura,29.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.750432,0.0,0.0,1
2,Oppenheimer,3. Oppenheimer,2023.0,8.6,Biografia,17.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,2
3,As Tartarugas Ninja: Caos Mutante,4. As Tartarugas Ninja: Caos Mutante,2023.0,7.5,Animação,19.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,3
4,Ruim Pra Cachorro,5. Ruim Pra Cachorro,2023.0,6.4,Animação,15.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,Nefarious,Nefarious,2023.0,6.4,Terror,35.0,0.0,0.0,0.0,0.0,...,0.0,0.507967,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,350
351,Beau Tem Medo,Beau Tem Medo,2023.0,6.8,Comédia,32.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,351
352,Meus Sogros Tão pro Crime,Meus Sogros Tão pro Crime,2023.0,5.4,Ação,33.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,352
353,Dunkirk,Dunkirk,2017.0,7.8,Ação,25.0,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,353


In [308]:
df_all = pd.get_dummies(df_all)
df_all

Unnamed: 0,year,rating,word_count,0,1,2,3,4,5,6,...,genre_Comédia,genre_Drama,genre_Fantasia,genre_Faroeste,genre_Ficção científica,genre_Filme noir,genre_Mistério,genre_Policial,genre_Suspense,genre_Terror
0,2023.0,6.8,24.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0
1,2023.0,7.4,29.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0
2,2023.0,8.6,17.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0
3,2023.0,7.5,19.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0
4,2023.0,6.4,15.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,2023.0,6.4,35.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,1
351,2023.0,6.8,32.0,0.0,0.0,0.0,0.0,0.401654,0.401654,0.0,...,1,0,0,0,0,0,0,0,0,0
352,2023.0,5.4,33.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0
353,2017.0,7.8,25.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,0,0,0,0


# Setup Model

In [309]:
s = setup(data = df_all,
          session_id = 123,
          normalize = True,
          normalize_method = 'minmax',)

Unnamed: 0,Description,Value
0,Session id,123
1,Original data shape,"(353, 878)"
2,Transformed data shape,"(353, 878)"
3,Numeric features,878
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,Normalize,True
9,Normalize method,minmax


In [310]:
mdl_kmeans_all = create_model('kmeans')

Unnamed: 0,Silhouette,Calinski-Harabasz,Davies-Bouldin,Homogeneity,Rand Index,Completeness
0,0.0668,14.8723,3.6433,0,0,0


In [311]:
df_kmeans_all = assign_model(mdl_kmeans_all)
df_kmeans_all.rename(columns = {'Cluster': 'cluster'}, inplace = True)
df_kmeans_all.columns.tolist()

['year',
 'rating',
 'word_count',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '60',
 '61',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '82',
 '83',
 '84',
 '85',
 '86',
 '87',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '94',
 '95',
 '96',
 '97',
 '98',
 '99',
 '100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '133',
 '1

In [312]:
df_kmeans_all['genre'] = df_processed['genre'].copy()
df_kmeans_all['sinopse_no_stopwords'] = df_processed['sinopse_no_stopwords'].copy()
df_kmeans_all['title_pt'] = df_processed['title_pt'].copy()
# df_kmeans_all['movieID'] = df_all.index

df_kmeans_all

Unnamed: 0,year,rating,word_count,0,1,2,3,4,5,6,...,genre_Ficção científica,genre_Filme noir,genre_Mistério,genre_Policial,genre_Suspense,genre_Terror,cluster,genre,sinopse_no_stopwords,title_pt
0,2023.0,6.8,24.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 0,Ação,"jaime reyes, adolescente origem mexicana encon...",Besouro Azul
1,2023.0,7.4,29.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Aventura,viver terra barbie perfeito lugar perfeito. me...,Barbie
2,2023.0,8.6,17.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Biografia,história cientista americano j. robert oppenhe...,Oppenheimer
3,2023.0,7.5,19.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Animação,irmãos tartaruga trabalham conquistar amor cid...,As Tartarugas Ninja: Caos Mutante
4,2023.0,6.4,15.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Animação,cachorro abandonado une outros vira-latas ving...,Ruim Pra Cachorro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,2023.0,6.4,35.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,1,Cluster 1,Terror,"dia execução, assassino série passa avaliação ...",Nefarious
351,2023.0,6.8,32.0,0.0,0.0,0.0,0.0,0.401654,0.401654,0.0,...,0,0,0,0,0,0,Cluster 1,Comédia,"após morte repentina mãe, homem gentil atormen...",Beau Tem Medo
352,2023.0,5.4,33.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 0,Ação,conservador gerente banco prestes casar amor v...,Meus Sogros Tão pro Crime
353,2017.0,7.8,25.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 0,Ação,"soldados aliados bélgica, império britânico fr...",Dunkirk


In [313]:
mdl_kmeans_all.feature_names_in_

array(['year', 'rating', 'word_count', '0', '1', '2', '3', '4', '5', '6',
       '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
       '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28',
       '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39',
       '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50',
       '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61',
       '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72',
       '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83',
       '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94',
       '95', '96', '97', '98', '99', '100', '101', '102', '103', '104',
       '105', '106', '107', '108', '109', '110', '111', '112', '113',
       '114', '115', '116', '117', '118', '119', '120', '121', '122',
       '123', '124', '125', '126', '127', '128', '129', '130', '131',
       '132', '133', '134', '135', '136', '137', '138', '139', 

In [314]:
df_kmeans_all

Unnamed: 0,year,rating,word_count,0,1,2,3,4,5,6,...,genre_Ficção científica,genre_Filme noir,genre_Mistério,genre_Policial,genre_Suspense,genre_Terror,cluster,genre,sinopse_no_stopwords,title_pt
0,2023.0,6.8,24.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 0,Ação,"jaime reyes, adolescente origem mexicana encon...",Besouro Azul
1,2023.0,7.4,29.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Aventura,viver terra barbie perfeito lugar perfeito. me...,Barbie
2,2023.0,8.6,17.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Biografia,história cientista americano j. robert oppenhe...,Oppenheimer
3,2023.0,7.5,19.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Animação,irmãos tartaruga trabalham conquistar amor cid...,As Tartarugas Ninja: Caos Mutante
4,2023.0,6.4,15.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 1,Animação,cachorro abandonado une outros vira-latas ving...,Ruim Pra Cachorro
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,2023.0,6.4,35.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,1,Cluster 1,Terror,"dia execução, assassino série passa avaliação ...",Nefarious
351,2023.0,6.8,32.0,0.0,0.0,0.0,0.0,0.401654,0.401654,0.0,...,0,0,0,0,0,0,Cluster 1,Comédia,"após morte repentina mãe, homem gentil atormen...",Beau Tem Medo
352,2023.0,5.4,33.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 0,Ação,conservador gerente banco prestes casar amor v...,Meus Sogros Tão pro Crime
353,2017.0,7.8,25.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0,0,0,0,0,0,Cluster 0,Ação,"soldados aliados bélgica, império britânico fr...",Dunkirk


In [315]:
def recommend_movies(input_movie_name, data, num_recommends):
    movies_recommended = []
    movie_index = data[data['title_pt'] == input_movie_name]

    for recommend in range(num_recommends):
        previsoes = predict_model(mdl_kmeans_all, data = movie_index)
        filmes_recomendados = previsoes['movieID'].tolist()
        movies_recommended.append(filmes_recomendados[0])
    
    print("Filmes recomendados com base em", input_movie_name, ":")
    for i, movie_id in enumerate(movies_recommended):
        print(f"{i + 1}. Filme ID: {movie_id}")

input_movie = "Besouro Azul"
recommend_movies(input_movie, data = df_kmeans_all, num_recommends = 3)


Filmes recomendados com base em Besouro Azul :
1. Filme ID: 0.0
2. Filme ID: 0.0
3. Filme ID: 0.0


# Save Model

In [316]:
save_model(mdl_kmeans_all, "./model/model")

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=/var/folders/0x/lgbrscr56sq1gtx792kyzgvc0000gn/T/joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['year', 'rating', 'word_count',
                                              '0', '1', '2', '3', '4', '5', '6',
                                              '7', '8', '9', '10', '11', '12',
                                              '13', '14', '15', '16', '17', '18',
                                              '19', '20', '21', '22', '23', '24',
                                              '25', '26', ...],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=[],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('normalize', TransformerWrapper(transformer=MinMaxScaler())),
                 ('trained_model', KMeans(n_clusters=4, random_state=1