# Questão 2: Implementação do Naive Bayes em Python:

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# 1: Ler o csv
colunas = ['Aparencia', 'Temperatura', 'Umidade', 'Ventando', 'Jogar']
data = pd.read_csv("/content/jogar.csv", names=colunas, skiprows=1)


# 2: Pre processamento dos dados
# string > numerico
labelEnc = {}
colunasCategoricas = ['Aparencia', 'Temperatura', 'Umidade', 'Ventando']

for col in colunasCategoricas:
    labelEnc[col] = LabelEncoder()
    data[col] = labelEnc[col].fit_transform(data[col])

# colunas e classe
X = data[['Aparencia', 'Temperatura', 'Umidade', 'Ventando']]
y = data['Jogar']

# 3: split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4: Criar e treinar
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# 5: Testando com o registro pedido
teste = pd.DataFrame({'Aparencia': ['chuva'],
                             'Temperatura': ['fria'],
                             'Umidade': ['normal'],
                             'Ventando': ['sim']})

# Codificando registro de teste
for col in colunasCategoricas:
    teste[col] = labelEnc[col].transform(teste[col])

# Prever resultado
prediction = nb_classifier.predict(teste)

y_pred = nb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisao: {accuracy:.2f}')

# Printar resultado
print(f'Previsao para o registro: {"sim" if prediction[0] == 1 else "nao"}')


# Questão 3


## Implementação do RandomForest em Python

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# 1: Ler csv
csv_path = "/content/jogar.csv"
column_names = ['Aparencia', 'Temperatura', 'Umidade', 'Ventando', 'Jogar']
data = pd.read_csv(csv_path, names=column_names, sep=',', skiprows=1)

# 2: Preprocessamento
# string > numerico
label_encoders = {}
categorical_cols = ['Aparencia', 'Temperatura', 'Umidade', 'Ventando']

for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

# colunas e classe
X = data[['Aparencia', 'Temperatura', 'Umidade', 'Ventando']]
y = data['Jogar']

# 3: Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4: Criar e treinar
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

# 5: Testar algoritmo e calcular accuracy
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisao: {accuracy:.2f}')

# 6: Testando com o registro pedido
test_record = pd.DataFrame({'Aparencia': ['chuva'],
                             'Temperatura': ['fria'],
                             'Umidade': ['normal'],
                             'Ventando': ['sim']})

# Codificando registro de teste
for col in categorical_cols:
    test_record[col] = label_encoders[col].transform(test_record[col])

# Prever resultado
prediction = rf_classifier.predict(test_record)

# Printar previsao
print(f'Previsao para o registro: {"sim" if prediction[0] == 1 else "nao"}')


Accuracy: 0.67
Prediction for the test record: nao


## RandomSearch

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

# Definir hiperparametros
param_dist = {
    'n_estimators': randint(10, 200),
    'max_depth': [None] + list(randint(1, 20).rvs(10)),
    'min_samples_split': randint(2, 11)
}

rf_classifier = RandomForestClassifier(random_state=42)

# Inicializar
random_search = RandomizedSearchCV(estimator=rf_classifier, param_distributions=param_dist, n_iter=20, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

# Printar
print("Melhores hiperparametros:")
print(random_search.best_params_)

# Testar modelo com novos hiperparametros
best_rf_classifier = random_search.best_estimator_
y_pred = best_rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisao com melhores hiperparametros: {accuracy:.2f}')




Melhores hiperparametros:
{'max_depth': 3, 'min_samples_split': 5, 'n_estimators': 73}
Accuracy com melhores hiperparametros: 0.33


## GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV

# Definir hiperparametros
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

rf_classifier = RandomForestClassifier(random_state=42)

# Inicializar
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Printar
print("Melhores hiperparametros:")
print(grid_search.best_params_)

# Testar modelo com novos hiperparametros
best_rf_classifier = grid_search.best_estimator_
y_pred = best_rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy com Melhores hiperparametros: {accuracy:.2f}')




Melhores hiperparametros:
{'max_depth': None, 'min_samples_split': 5, 'n_estimators': 50}
Accuracy com Melhores hiperparametros: 0.33


# Questão 4

## Bagging

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Carregar o conjunto de dados de treinamento
train_data = pd.read_csv('/content/train.csv')

# Carregar o conjunto de dados de teste
test_data = pd.read_csv('/content/test.csv')

# Definir recursos (X) e alvo (y)
X_train = train_data.drop(columns=['Survived'])
y_train = train_data['Survived']

# Pre-processamento

X_train = X_train.drop(columns=['Name', 'Ticket', 'Cabin'])
X_train['Sex'] = X_train['Sex'].map({'male': 0, 'female': 1})
X_train['Embarked'] = X_train['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
X_train = X_train.fillna(0)  # Preencher valores ausentes com zeros

# Dividir os dados em conjuntos de treinamento e validacao
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Criar um BaggingClassifier com um estimador base DecisionTree
base_classifier = DecisionTreeClassifier()
bagging_classifier = BaggingClassifier(base_classifier, n_estimators=100, random_state=42)

# Ajustar o BaggingClassifier aos dados de treinamento
bagging_classifier.fit(X_train, y_train)

# Fazer previsoes no conjunto de validação
y_pred = bagging_classifier.predict(X_val)

# Calcular a precisao
accuracy = accuracy_score(y_val, y_pred)
print(f'Precisao: {accuracy:.2f}')


Precisão: 0.82


## Boosting

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier  # Importar o AdaBoost
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Carregar o conjunto de dados de treinamento
train_data = pd.read_csv('/content/train.csv')

# Carregar o conjunto de dados de teste
test_data = pd.read_csv('/content/test.csv')

# Definir recursos (X) e alvo (y)
X_train = train_data.drop(columns=['Survived'])
y_train = train_data['Survived']

# Pre-processamento

X_train = X_train.drop(columns=['Name', 'Ticket', 'Cabin'])
X_train['Sex'] = X_train['Sex'].map({'male': 0, 'female': 1})
X_train['Embarked'] = X_train['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
X_train = X_train.fillna(0)  # Preencher valores ausentes com zeros

# Dividir os dados em conjuntos de treinamento e validacao
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Criar um AdaBoostClassifier com um estimador base DecisionTree
base_classifier = DecisionTreeClassifier(max_depth=1)  # Estimador base fraco
adaboost_classifier = AdaBoostClassifier(base_classifier, n_estimators=100, random_state=42)

# Ajustar o AdaBoostClassifier aos dados de treinamento
adaboost_classifier.fit(X_train, y_train)

# Fazer previsoes no conjunto de validação
y_pred = adaboost_classifier.predict(X_val)

# Calcular a precisao
accuracy = accuracy_score(y_val, y_pred)
print(f'Precisao: {accuracy:.2f}')


Precisão: 0.78


## Random Forest

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Carregar o conjunto de dados de treinamento
train_data = pd.read_csv('train.csv')

# Carregar o conjunto de dados de teste
test_data = pd.read_csv('test.csv')

# Definir recursos (X) e alvo (y)
X_train = train_data.drop(columns=['Survived'])
y_train = train_data['Survived']

# Pre-processamento

X_train = X_train.drop(columns=['Name', 'Ticket', 'Cabin'])
X_train['Sex'] = X_train['Sex'].map({'male': 0, 'female': 1})
X_train['Embarked'] = X_train['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})
X_train = X_train.fillna(0)  # Preencher valores ausentes com zeros

# Dividir os dados em conjuntos de treinamento e validacao
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Criar um RandomForestClassifier
random_forest_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Ajustar o RandomForestClassifier aos dados de treinamento
random_forest_classifier.fit(X_train, y_train)

# Fazer previsões no conjunto de validacao
y_pred = random_forest_classifier.predict(X_val)

# Calcular a precisao
accuracy = accuracy_score(y_val, y_pred)
print(f'Precisao: {accuracy:.2f}')


Precisão: 0.82
