In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.metrics import silhouette_score, adjusted_rand_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error
from umap import UMAP
import plotly.express as px
import dearpygui.dearpygui as dpg
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from scipy import stats
import nltk
import warnings
#warnings.filterwarnings("ignore")
nltk.download('punkt')
nltk.download('stopwords')
warnings.filterwarnings("ignore")
# Sample card types for selection
card_types = ["Creature", "Sorcery", "Instant", "Enchantment", "Planeswalker", "Land", "Artifact", "Artifact Creature"]

columns_by_type = {
    'Creature': ['Total Mana Cost', 'Power', 'Toughness', 'Rarity Category','Jaccard Score'],
    'Sorcery': ['Total Mana Cost', 'Rarity Category','Jaccard Score'],
    'Instant': ['Total Mana Cost', 'Rarity Category','Jaccard Score'],
    'Enchantment': ['Total Mana Cost', 'Rarity Category','Jaccard Score'],
    'Planeswalker': ['Total Mana Cost', 'Rarity Category', 'loyalty','Jaccard Score'],
    'Land': ['Rarity Category','Jaccard Score'],
    'Artifact': ['Total Mana Cost', 'Rarity Category','Jaccard Score'],
    'Artifact Creature': ['Total Mana Cost', 'Power', 'Toughness', 'Rarity Category','Jaccard Score']
}
# Sample rarity mapping
rarity_mapping = {'common': 0, 'uncommon': 1, 'rare': 2, 'mythic': 3}

class DataPreprocessing:
    def __init__(self):
        return None

    def merging_data(self, data1, data2):
        self.data1 = data1
        self.data2 = data2
        return data1.merge(data2[['Name', 'rarity', 'loyalty']], on='Name', how='left')
    
    def preprocess(self, data: pd.DataFrame, Card_Type: str):
        self.data = data
        data.dropna()
        data.drop_duplicates(subset='Name', inplace=True)
        data.drop(columns=["Unnamed: 0","Unnamed: 0.1"], inplace=True)
        data.drop(columns=["Expansion Name","Expansion Code"], inplace=True)
        data[['Price1', 'Price2', 'Price3']] = data[['Price1', 'Price2', 'Price3']].replace('[R$]', '', regex=True).replace(',','.',regex=True).apply(pd.to_numeric, errors='coerce')
        # Convert the 'rarity' column to numeric categories
        rarity_mapping = {'common': 0, 'uncommon': 1, 'rare': 2, 'mythic': 3}
        data['Rarity Category'] = data['rarity'].map(rarity_mapping).fillna(-1).astype(int)
        creatures = data[data['Card Type'].str.contains('Creature', na=False)].drop(columns=["Release Date",'Color', 'Mana Cost','loyalty'], inplace=False)
        creatures[['Total Mana Cost', 'Power', 'Toughness']] = creatures[['Total Mana Cost', 'Power', 'Toughness']].apply(pd.to_numeric, errors='coerce')
        creatures = creatures[(creatures['Power'] != -1) & (creatures['Toughness'] != -1)].dropna(subset=['Power', 'Toughness', 'Oracle Text'])
        sorcery = data[data['Card Type'].str.contains('Sorcery', na=False)].drop(columns=["Release Date", 'Color', 'Mana Cost','Power','Toughness','loyalty'], inplace=False)
        instant = data[data['Card Type'].str.contains('Instant', na=False)].drop(columns=["Release Date", 'Color', 'Mana Cost','Power','Toughness','loyalty'], inplace=False)
        enchantment = data[data['Card Type'].str.contains('Enchantment', na=False)].drop(columns=["Release Date", 'Color', 'Mana Cost','Power','Toughness','loyalty'], inplace=False)
        planeswalker = data[data['Card Type'].str.contains('Planeswalker', na=False)].drop(columns=["Release Date", 'Color', 'Mana Cost','Power','Toughness'], inplace=False)
        terr = data[data['Card Type'].str.contains('Land', na=False)].drop(columns=["Release Date", 'Color','Total Mana Cost', 'Mana Cost','Power','Toughness','loyalty'], inplace=False)
        art = data[data['Card Type'].str.contains('Artifact', na=False)].drop(columns=["Release Date", 'Color', 'Mana Cost','loyalty'], inplace=False)
        art_creatures = art[art['Card Type'].str.contains('Creature', na=False)]
        art_creatures = art_creatures[(art_creatures['Power'] != -1) & (art_creatures['Toughness'] != -1)].dropna(subset=['Power', 'Toughness', 'Oracle Text'])
        art_creatures[['Total Mana Cost', 'Power', 'Toughness']] = art_creatures[['Total Mana Cost', 'Power', 'Toughness']].apply(pd.to_numeric, errors='coerce')
        art = art[~art['Card Type'].str.contains('Creature', na=False)].drop(columns=['Power','Toughness'], inplace=False)
        art = art[~art['Card Type'].str.contains('Land', na=False)]

        if(Card_Type == 'Creature'):
            return creatures
        elif(Card_Type == 'Sorcery'):
            return sorcery
        elif(Card_Type == 'Instant'):
            return instant
        elif(Card_Type == 'Enchantment'):
            return enchantment
        elif(Card_Type == 'Planeswalker'):
            return planeswalker
        elif(Card_Type == 'Land'):
            return terr
        elif(Card_Type == 'Artifact'):
            return art
        elif(Card_Type == 'Artifact Creature'):
            return art_creatures
        else:
            return pd.DataFrame()  # Return an empty DataFrame for invalid Card Type

class CardAnalyzer:
    def __init__(self, df):
        self.df = df
        self.stop_words = set(stopwords.words('english'))
        self.clustering_model = None
        self.pipe = None
        self.scale = MinMaxScaler()
        self.rfr = RandomForestRegressor(n_estimators=100, random_state=42)
        self.knn_regressor = KNeighborsRegressor(n_neighbors=5)

    def clean_data(self, Card_Type: str):
        print(self.df.columns)

        """Clean and filter the dataframe for creatures."""
        if(Card_Type == "Creature" or Card_Type == "Artifact Creature"):
            self.df = self.df[(self.df['Power'] != -1) & (self.df['Toughness'] != -1)]
            self.df = self.df.dropna(subset=['Power', 'Toughness', 'Oracle Text'])
        elif(Card_Type == "Planeswalker"):
            self.df = self.df.dropna(subset=['loyalty'])
            self.df = self.df[self.df['loyalty'] != 'X']
            self.df = self.df[self.df['Total Mana Cost'] != 'X']
        elif(Card_Type == "Sorcery" or Card_Type == "Instant" or Card_Type == "Enchantment"):
            self.df = self.df.dropna(subset=['Oracle Text'])
            self.df = self.df.dropna(subset=['Price1','Price2','Price3'])
        elif(Card_Type == "Land"):
            self.df = self.df.dropna(subset=['Price1','Price2','Price3'])
        elif(Card_Type == "Artifact"):
            self.df = self.df.dropna(subset=['Oracle Text'])
            self.df = self.df.dropna(subset=['Price1','Price2','Price3'])
        else:
            print("Tipo de carta não suportado")
            return None


    def preprocess_text(self, text):
        """Tokenize and preprocess the text for Jaccard similarity."""
        tokens = word_tokenize(text.lower())
        tokens = [token for token in tokens if token.isalnum() and token not in self.stop_words]
        return set(tokens)
    
    def compute_jaccard_similarity(self, query):
        """Compute Jaccard similarity between a query and Oracle Text in the dataframe."""
        tokenized_query = self.preprocess_text(query)
        self.df['Tokenized Oracle Text'] = self.df['Oracle Text'].dropna().apply(self.preprocess_text)
        jaccard_scores = self.df['Tokenized Oracle Text'].apply(
            lambda doc: self.jaccard_similarity(tokenized_query, doc)
        )
        self.df['Jaccard Score'] = 0
        self.df.loc[self.df['Oracle Text'].notna(), 'Jaccard Score'] = jaccard_scores

    @staticmethod
    def jaccard_similarity(query_set, doc_set):
        """Compute Jaccard similarity between two sets of tokens."""
        intersection = len(query_set.intersection(doc_set))
        union = len(query_set.union(doc_set))
        return intersection / union if union != 0 else 0

    def perform_clustering(self):
        """Cluster the dataset based on Jaccard scores."""
        self.clustering_model = KMeans(n_clusters=len(self.df['Jaccard Score'].unique()), random_state=42)
        self.clustering_model.fit(self.df[['Jaccard Score']])
        self.df["predicted_cluster"] = self.clustering_model.labels_
        return self.df, self.clustering_model

    def train_pipe(self, feature, target):
        """Train the preprocessing pipeline and clustering model."""
        # Encode target labels
        label_encoder = LabelEncoder()
        true_labels = label_encoder.fit_transform(target)
        n_clusters = len(label_encoder.classes_)
        print(f"Number of clusters: {n_clusters}")

        # Define preprocessing and clustering pipelines
        preprocessor = Pipeline(
            [
                ("scaler", MinMaxScaler()),
            ]
        )
        clusterer = Pipeline(
            [
                (
                    "kmeans",
                    KMeans(
                        n_clusters=n_clusters,
                        init="k-means++",
                        n_init=100,
                        max_iter=10000,
                        random_state=42,
                    )
                ),
            ]
        )
        self.pipe = Pipeline([("preprocessor", preprocessor), ("clusterer", clusterer)])
        self.pipe.fit(feature)

        # Transform data and obtain clustering results
        preprocessed_data = self.pipe.named_steps['preprocessor'].transform(feature)
        predicted_labels = self.pipe.named_steps['clusterer'].named_steps['kmeans'].labels_

        # Evaluate clustering performance
        silhouette = silhouette_score(preprocessed_data, predicted_labels)
        ari = adjusted_rand_score(true_labels, predicted_labels)
        print(f"Silhouette Score: {silhouette}")
        print(f"Adjusted Rand Index: {ari}")

        # Assign clusters to the dataframe
        self.df["predicted_cluster"] = predicted_labels

        # Store the clustering model
        self.clustering_model = self.pipe.named_steps['clusterer'].named_steps['kmeans']

        return self.df, self.clustering_model

    def predict_and_filter(self, clustering_model,busca):
        """Predict the cluster for a test text and filter the dataset accordingly."""
        test_text_jaccard_score = 1.0  # Assuming a given score
        assigned_cluster = clustering_model.predict([[test_text_jaccard_score]])[0]
        busca = np.append(busca,assigned_cluster)
        
        if self.pipe is None:
            raise ValueError("Pipeline 'pipe' is not initialized.")
        
        predicted = self.pipe.predict(busca.reshape(1, -1))
        filtered_predict = self.df.loc[self.df['predicted_cluster'] == predicted.item()]
        return filtered_predict

    def visualize_3d(self, filtered_predict, Card_Type: str):

        print('Carta mais semelhante: ',
              filtered_predict.iloc[filtered_predict['Jaccard Score'].argmax()]['Name'],
              'Texto da carta: ',
              filtered_predict.iloc[filtered_predict['Jaccard Score'].argmax()]['Oracle Text'])
        """Visualize filtered predictions in a 3D scatter plot."""
        features = filtered_predict.drop(columns=['Oracle Text', 'Name', 'Card Type', 'Rarity Category', 'rarity', 'Tokenized Oracle Text'])
        features = features.dropna()

        umap_3d = UMAP(n_components=3, init="random", random_state=42)
        proj_3d = umap_3d.fit_transform(features)
        y = filtered_predict["Total Mana Cost"] if Card_Type != "Land" else filtered_predict["Rarity Category"]
        fig_3d = px.scatter_3d(
            proj_3d,
            y=y,
            x=filtered_predict["Jaccard Score"],
            z=filtered_predict["Price1"],
            color=filtered_predict["Jaccard Score"],
            labels={
                "Jaccard Score": "Jaccard",
                "Total Mana Cost": "CMC",
                "Price1": "Menor Preço",
                "color": "Jaccard Score"
            },
            hover_data={
                'Name': filtered_predict['Name']
            },
            title="Cartas Semelhantes",
        )
        fig_3d.update_layout(
        scene=dict(
            xaxis_title='Jaccard',
            yaxis_title='CMC',
            zaxis_title='Menor Preço'
            )
        )

        fig_3d.update_traces(marker=dict(size=5))
        fig_3d.show()

    def bayesian_analysis(self, filtered_predict):
        """Perform Bayesian analysis on the filtered data."""
        value_list = filtered_predict[["Price1"]]
        print('Média de Preço Mínimo para a carta: ',stats.bayes_mvs(value_list, 0.908))
        value_list = filtered_predict[["Price2"]]
        print('Média de Preço Médio para a carta: ',stats.bayes_mvs(value_list, 0.908))
        value_list = filtered_predict[["Price3"]]
        print('Média de Preço Máximo para a carta: ',stats.bayes_mvs(value_list, 0.908))
        

    def scale_data(self, features, target):
        """Scale the data using RobustScaler."""
        self.scale = MinMaxScaler()

        return self.scale.fit_transform(features), self.scale.fit_transform(target.values.reshape(-1, 1))

    def train_random_forest(self, X_train, y_train):
        """Train a Random Forest Regressor."""
        self.rfr.fit(X_train, y_train)

    def train_knn(self, X_train, y_train):
        """Train a K-Nearest Neighbors Regressor."""
        try:
            self.knn_regressor.fit(X_train, y_train)
        except:
            print('Poucas amostras para treino')

    def evaluate_model(self, X_test, y_test):
        """Evaluate Random Forest and KNN models on test data."""
        no_knn = False
        y_pred_rfr = self.rfr.predict(X_test)
        try:
            y_pred_knn = self.knn_regressor.predict(X_test)
        except:
            print('Poucas amostras para teste')
            no_knn = True
        
        if(no_knn):
            mse_knn = 0
        else:
            mse_knn = mean_absolute_error(y_test, y_pred_knn)
            
        mse_rfr = mean_absolute_error(y_test, y_pred_rfr)
        
        return mse_rfr, mse_knn, no_knn

    def predict_value(self, busca, no_knn):
        """Predict values using both models."""
        test_predict_rfr = self.scale.inverse_transform(self.rfr.predict(busca.reshape(1, -1)).reshape(-1, 1))
        if(no_knn):
            test_predict_knn = 0
        else:
            test_predict_knn = self.scale.inverse_transform(self.knn_regressor.predict(busca.reshape(1, -1)).reshape(-1, 1))
        
        return test_predict_rfr, test_predict_knn   

def create_busca(Card_Type: str, Power: int, Toughness: int, CMC: int, Rarity: str, Loyalty: int):
    if(Card_Type == "Planeswalker"):
        busca = np.array([CMC,Rarity,Loyalty,])
    elif(Card_Type == "Creature" or Card_Type == "Artifact Creature"):
        busca = np.array([CMC,Power,Toughness,Rarity])
    elif(Card_Type == "Sorcery" or Card_Type == "Instant" or Card_Type == "Enchantment"):
        busca = np.array([CMC,Rarity])
    elif(Card_Type == "Land"):
        busca = np.array([Rarity])
    elif(Card_Type == "Artifact"):
        busca = np.array([CMC,Rarity])
    else:
        print("Tipo de carta não suportado")
        return None
    return busca

def run(Card_Type: str, Texto_a_procurar: str, Power: int, Toughness: int, CMC: int, Rarity: str, Loyalty: int):
    data1 = pd.read_csv(r'C:\Users\drodm\OneDrive\Documentos\GitHub\Mystic-Speculation-Clusterization-and-Semantic-search-for-Price-aproximation-on-Magic-Cards\updated_merged_cleaned_with_expansions.csv')
    data2 = pd.read_csv(r'C:\Users\drodm\OneDrive\Documentos\GitHub\Mystic-Speculation-Clusterization-and-Semantic-search-for-Price-aproximation-on-Magic-Cards\expanded_expansions_with_cards.csv')        
    # Initialize DataPreprocessing and preprocess data
    data_preprocessor = DataPreprocessing()
    data = data_preprocessor.merging_data(data1, data2)
    Card_Type = Card_Type
    busca = create_busca(Card_Type, Power, Toughness, CMC, Rarity, Loyalty)
    
    df = data_preprocessor.preprocess(data, Card_Type)
    # Initialize CardAnalyzer
    analyzer = CardAnalyzer(df)
    analyzer.clean_data(Card_Type)
    Texto_a_procurar = Texto_a_procurar
    
    analyzer.compute_jaccard_similarity(Texto_a_procurar)
    df, clustering_model = analyzer.perform_clustering()
    if Card_Type == "Planeswalker":
        # Train the pipeline
        analyzer.train_pipe(df[['Total Mana Cost', 'Rarity Category','predicted_cluster','loyalty']], df["Jaccard Score"])
    elif Card_Type == "Creature" or Card_Type == "Artifact Creature":
        # Train the pipeline
        analyzer.train_pipe(df[['Total Mana Cost', 'Power', 'Toughness', 'Rarity Category','predicted_cluster']], df["Jaccard Score"])
    elif Card_Type == "Sorcery" or Card_Type == "Instant" or Card_Type == "Enchantment":
        # Train the pipeline
        analyzer.train_pipe(df[['Total Mana Cost', 'Rarity Category','predicted_cluster']], df["Jaccard Score"])
    elif Card_Type == "Land":
        # Train the pipeline
        analyzer.train_pipe(df[['Rarity Category','predicted_cluster']], df["Jaccard Score"])
    elif Card_Type == "Artifact":
        # Train the pipeline
        analyzer.train_pipe(df[['Total Mana Cost', 'Rarity Category','predicted_cluster']], df["Jaccard Score"])
    # Predict and filter
    else:
        print("Tipo de carta não suportado")
        return None

    
    filtered_predict = analyzer.predict_and_filter(clustering_model,busca)
    features, target = analyzer.scale_data(filtered_predict[columns_by_type[Card_Type]].dropna(), filtered_predict["Price1"])
    X_train_Minimum , X_test_Minimum, y_train_Minimum, y_test_Minimum = train_test_split(features, target, test_size=0.1, random_state=42)
    features, target = analyzer.scale_data(filtered_predict[columns_by_type[Card_Type]].dropna(), filtered_predict["Price2"])
    X_train_Medium  , X_test_Medium, y_train_Medium, y_test_Medium = train_test_split(features, target, test_size=0.1, random_state=42)
    features, target = analyzer.scale_data(filtered_predict[columns_by_type[Card_Type]].dropna(), filtered_predict["Price3"])
    X_train_Maximum, X_test_Maximum, y_train_Maximum, y_test_Maximum = train_test_split(features, target, test_size=0.1, random_state=42)
    # Continue with visualization and analysis

    analyzer.visualize_3d(filtered_predict, Card_Type)
    analyzer.bayesian_analysis(filtered_predict)
    analyzer.train_random_forest(X_train_Minimum, y_train_Minimum)
    analyzer.train_knn(X_train_Minimum, y_train_Minimum)
    mse_rfr, mse_knn, no_knn = analyzer.evaluate_model(X_test_Minimum, y_test_Minimum)

    print(f"Mea Absolute Error (Random Forest): {mse_rfr}, Mean Absolute Error (KNN): {mse_knn}, Menor Preço")
    test_predict_rfr, test_predict_knn = analyzer.predict_value(busca=np.append(busca,np.array([1.0])), no_knn=no_knn)
    print(test_predict_rfr, test_predict_knn)

    analyzer.train_random_forest(X_train_Medium, y_train_Medium)
    analyzer.train_knn(X_train_Medium, y_train_Medium)
    mse_rfr, mse_knn, no_knn = analyzer.evaluate_model(X_test_Medium, y_test_Medium)

    print(f"Mea Absolute Error (Random Forest): {mse_rfr}, Mean Absolute Error (KNN): {mse_knn}, Preço Médio")
    test_predict_rfr, test_predict_knn = analyzer.predict_value(busca=np.append(busca,np.array([1.0])), no_knn=no_knn)
    print(test_predict_rfr, test_predict_knn)

    analyzer.train_random_forest(X_train_Maximum, y_train_Maximum)
    analyzer.train_knn(X_train_Maximum, y_train_Maximum)
    mse_rfr, mse_knn, no_knn = analyzer.evaluate_model(X_test_Maximum, y_test_Maximum)

    print(f"Mea Absolute Error (Random Forest): {mse_rfr}, Mean Absolute Error (KNN): {mse_knn}, Preço Máximo")
    test_predict_rfr, test_predict_knn = analyzer.predict_value(busca=np.append(busca,np.array([1.0])), no_knn=no_knn)
    print(test_predict_rfr, test_predict_knn)

# Function to perform analysis after input
# Function to perform analysis after input
def run_analysis(card_type, mana_cost, card_text, power, toughness, rarity, loyalty):
    # Print for debugging to ensure values are captured
    print(f"Card Type: {card_type}, Mana Cost: {mana_cost}, Text: {card_text}, Power: {power}, Toughness: {toughness}, Rarity: {rarity}, Loyalty: {loyalty}")
    
    run(Card_Type=card_type, Texto_a_procurar=card_text, Power=power, Toughness=toughness, CMC=mana_cost, Rarity=rarity, Loyalty=loyalty)

# Main GUI setup
def main():
    with dpg.window(label="Card Analyzer", width=400, height=400):

        # Card Type selector
        dpg.add_combo(label="Select Card Type", items=card_types, default_value="Creature", tag="card_type_selector_unique")
        
        # Mana cost input
        mana_cost = dpg.add_input_int(label="Mana Cost", tag="mana_cost_input_unique")

        # Card text input
        card_text = dpg.add_input_text(label="Card Text", tag="card_text_input_unique")

        # Power and Toughness (visible only if Creature or Artifact Creature)
        with dpg.group(tag="creature_inputs_unique", show=True):
            power = dpg.add_input_int(label="Power", tag="power_input_unique")
            toughness = dpg.add_input_int(label="Toughness", tag="toughness_input_unique")

        # Loyalty (visible only if Planeswalker)
        loyalty = dpg.add_input_int(label="Loyalty", tag="loyalty_input_unique", show=False)

        # Rarity input
        rarity = dpg.add_combo(label="Rarity", items=list(rarity_mapping.keys()), tag="rarity_input_unique")
        
        # Show/Hide inputs based on card type selection
        def card_type_change_callback(sender, app_data):
            card_type = dpg.get_value("card_type_selector_unique")
            if card_type in ["Creature", "Artifact Creature"]:
                dpg.configure_item("creature_inputs_unique", show=True)
                dpg.configure_item("loyalty_input_unique", show=False)
            elif card_type == "Planeswalker":
                dpg.configure_item("creature_inputs_unique", show=False)
                dpg.configure_item("loyalty_input_unique", show=True)
            elif card_type == "Land":
                dpg.configure_item("creature_inputs_unique", show=False)
                dpg.configure_item("loyalty_input_unique", show=False)
                dpg.configure_item("mana_cost_input_unique", show=False)
            else:
                dpg.configure_item("creature_inputs_unique", show=False)
                dpg.configure_item("loyalty_input_unique", show=False)
                dpg.configure_item("mana_cost_input_unique", show=True)
            

        # Link card type change event
        dpg.set_item_callback("card_type_selector_unique", card_type_change_callback)

        # Button to run the analysis
        def on_button_click():
            # Forcefully retrieve values from the input fields
            card_type = dpg.get_value("card_type_selector_unique")
            try:
                mana_cost = dpg.get_value("mana_cost_input_unique")
            except:
                mana_cost = 0
            try:
                card_text = dpg.get_value("card_text_input_unique")
            except:
                card_text = ""
            try:
                power = dpg.get_value("power_input_unique")
            except:
                power = 0
            try:
                toughness = dpg.get_value("toughness_input_unique")
            except:
                toughness = 0
            try:
                rarity = dpg.get_value("rarity_input_unique")
                numeric_rarity = rarity_mapping.get(rarity.lower(), -1)  # Retorna -1 se a raridade não for encontrada
            except:
                numeric_rarity = -1
            if numeric_rarity == -1:
                dpg.set_value("output_text", f"Raridade inválida: '{rarity}'. Por favor, selecione uma raridade válida.")
                return  
            try:
                loyalty = dpg.get_value("loyalty_input_unique")
            except:
                loyalty = 0

        
            # Force pass values to run_analysis
            run_analysis(card_type, mana_cost, card_text, power, toughness, numeric_rarity, loyalty)
        
        dpg.add_button(label="Run Analysis", callback=on_button_click)

# Initialize DearPyGui
dpg.create_context()
main()
dpg.create_viewport(title='Card Analysis Tool', width=600, height=600)
dpg.setup_dearpygui()
dpg.show_viewport()
dpg.start_dearpygui()
dpg.destroy_context()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\drodm\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\drodm\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Card Type: Sorcery, Mana Cost: 1, Text: Draw a card, Power: 0, Toughness: 0, Rarity: 2, Loyalty: 0
Index(['Name', 'Total Mana Cost', 'Card Type', 'Oracle Text', 'Price1',
       'Price2', 'Price3', 'rarity', 'Rarity Category'],
      dtype='object')



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.04   0.     0.     ... 0.     0.     0.0625]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Number of clusters: 48
Silhouette Score: 0.5829042287785701
Adjusted Rand Index: 0.06959432537041228
Carta mais semelhante:  Glimpse of Nature Texto da carta:  Whenever you cast a creature spell this turn, draw a card.



X does not have valid feature names, but KMeans was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



Média de Preço Mínimo para a carta:  (Mean(statistic=11.44090909090909, minmax=(2.618579684466626, 20.26323849735155)), Variance(statistic=2415.154754438503, minmax=(1859.6396578045335, 3107.7210757893617)), Std_dev(statistic=48.99989786097558, minmax=(43.123539486045594, 55.746937815357725)))
Média de Preço Médio para a carta:  (Mean(statistic=16.36931818181818, minmax=(7.121153045313623, 25.617483318322734)), Variance(statistic=2653.9306936363632, minmax=(2043.4942141412762, 3414.9680616286228)), Std_dev(statistic=51.365022344239534, minmax=(45.20502421347959, 58.437728066965626)))
Média de Preço Máximo para a carta:  (Mean(statistic=29.244545454545456, minmax=(17.76871085175679, 40.72038005733412)), Variance(statistic=4086.4574939037434, minmax=(3146.5223508473237, 5258.284197227168)), Std_dev(statistic=63.73767034763703, minmax=(56.09387088485982, 72.51402758933727)))
Mea Absolute Error (Random Forest): 0.04121255848925527, Mean Absolute Error (KNN): 0.028526405903164465, Menor Pre


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



Mea Absolute Error (Random Forest): 0.049625449209128765, Mean Absolute Error (KNN): 0.04758052277923311, Preço Médio
[[64.53520195]] [[37.25488883]]
Mea Absolute Error (Random Forest): 0.05781803946934219, Mean Absolute Error (KNN): 0.07760763541628081, Preço Máximo
[[128.31191174]] [[78.39]]



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



Card Type: Instant, Mana Cost: 1, Text: Draw a card, Power: 0, Toughness: 0, Rarity: 2, Loyalty: 0
Index(['Name', 'Total Mana Cost', 'Card Type', 'Oracle Text', 'Price1',
       'Price2', 'Price3', 'rarity', 'Rarity Category'],
      dtype='object')



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.         0.08333333 0.0625     ... 0.11111111 0.05263158 0.        ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Number of clusters: 47
Silhouette Score: 0.7415287151827004
Adjusted Rand Index: 0.08121826622263219
Carta mais semelhante:  Brand Texto da carta:  Gain control of all permanents you own. (This effect lasts indefinitely.)
Cycling {2} ({2}, Discard this card: Draw a card.)



X does not have valid feature names, but KMeans was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



Média de Preço Mínimo para a carta:  (Mean(statistic=28.208571428571428, minmax=(11.697556226731027, 44.719586630411825)), Variance(statistic=3372.148825892857, minmax=(2201.2211439263297, 5037.413385956515)), Std_dev(statistic=57.618373260198695, minmax=(46.917173230346364, 70.97473766035712)))
Média de Preço Médio para a carta:  (Mean(statistic=36.212857142857146, minmax=(18.324465763186772, 54.10124852252751)), Variance(statistic=3958.236647321428, minmax=(2583.798832912136, 5912.928314105829)), Std_dev(statistic=62.42499319021165, minmax=(50.83108136673993, 76.89556758426215)))
Média de Preço Máximo para a carta:  (Mean(statistic=51.78942857142857, minmax=(30.616144059580556, 72.96271308327658)), Variance(statistic=5545.435337142857, minmax=(3619.866781283909, 8283.931593939466)), Std_dev(statistic=73.88826157776529, minmax=(60.16532873078904, 91.01610623367418)))
Mea Absolute Error (Random Forest): 0.09872909861354913, Mean Absolute Error (KNN): 0.10143779954157117, Menor Preço
[[


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



Mea Absolute Error (Random Forest): 0.08992662956373995, Mean Absolute Error (KNN): 0.07032073081282922, Preço Máximo
[[89.03954]] [[99.282]]
Card Type: Artifact, Mana Cost: 1, Text: Draw a card, Power: 0, Toughness: 0, Rarity: 0, Loyalty: 0
Index(['Name', 'Total Mana Cost', 'Card Type', 'Oracle Text', 'Price1',
       'Price2', 'Price3', 'rarity', 'Rarity Category'],
      dtype='object')



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '[0.2        0.07692308 0.         ... 0.         0.         0.        ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Number of clusters: 50
Silhouette Score: 0.7579273121083682
Adjusted Rand Index: 0.07114829908819592
Carta mais semelhante:  Nihil Spellbomb Texto da carta:  {T}, Sacrifice Nihil Spellbomb: Exile target player's graveyard.
When Nihil Spellbomb is put into a graveyard from the battlefield, you may pay {B}. If you do, draw a card.



X does not have valid feature names, but KMeans was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



Média de Preço Mínimo para a carta:  (Mean(statistic=2.0295918367346935, minmax=(-0.12596083699368066, 4.185144510463067)), Variance(statistic=80.36829982253771, minmax=(56.31046741701004, 112.73456608645986)), Std_dev(statistic=8.916250886453945, minmax=(7.5040300783652265, 10.617653511320654)))
Média de Preço Médio para a carta:  (Mean(statistic=3.5734693877551025, minmax=(-0.06756872481021059, 7.214507500320414)), Variance(statistic=229.3073371783496, minmax=(160.6653788517965, 321.65497110582675)), Std_dev(statistic=15.060828572850726, minmax=(12.675384761489354, 17.93474201391887)))
Média de Preço Máximo para a carta:  (Mean(statistic=6.302448979591838, minmax=(0.6526809760778995, 11.952216983105775)), Variance(statistic=552.1139631765749, minmax=(386.84152088050377, 774.4636654803567)), Std_dev(statistic=23.369760147154782, minmax=(19.668287187259185, 27.82918729464367)))
Mea Absolute Error (Random Forest): 0.05868703261098933, Mean Absolute Error (KNN): 0.051098497495826366, Men


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



Mea Absolute Error (Random Forest): 0.054701905959058904, Mean Absolute Error (KNN): 0.048977916748094596, Preço Médio
[[5.83930502]] [[8.642564]]
Mea Absolute Error (Random Forest): 0.05268015380211174, Mean Absolute Error (KNN): 0.045845000000000004, Preço Máximo
[[5.62834917]] [[7.468]]



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().

