# Aula de Regressão Logística
## Importação das bibliotecas e inicialização do dataframe

In [4]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("cat_breeds.csv", sep=";")
df

Unnamed: 0,Breed,Age_in_years,Age_in_months,Gender,Neutered_or_spayed,Body_length,Weight,Fur_colour_dominant,Fur_pattern,Eye_colour,Allowed_outdoor,Preferred_food,Owner_play_time_minutes,Sleep_time_hours,Country,Latitude,Longitude
0,Angora,0.25,3,female,False,19,2.0,white,solid,blue,False,wet,46,16,France,43.296482,5.369780
1,Angora,0.33,4,male,False,19,2.5,white,solid,blue,False,wet,48,16,France,43.611660,3.877710
2,Angora,0.50,6,male,False,20,2.8,black,solid,green,False,wet,41,11,France,44.837789,-0.579180
3,Angora,0.50,6,female,False,21,3.0,white,solid,blue,False,wet,24,8,France,43.611660,3.877710
4,Angora,0.50,6,male,False,21,3.0,red/cream,tabby,green,False,wet,51,10,France,48.864716,2.349014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1066,Maine coon,0.17,2,female,False,15,1.2,white,solid,blue,False,wet,35,20,UK,51.507351,-0.127758
1067,Maine coon,0.17,2,female,False,17,1.0,black,bicolor,blue,False,wet,36,19,UK,51.507351,-0.127758
1068,Maine coon,0.17,2,male,False,14,0.7,red/cream,tabby,blue,False,wet,20,20,UK,51.507351,-0.127758
1069,Maine coon,0.17,2,male,False,16,1.1,red/cream,tabby,green,False,wet,34,19,UK,52.486244,-1.890401


In [5]:
df.isnull().sum()

Breed                      0
Age_in_years               0
Age_in_months              0
Gender                     0
Neutered_or_spayed         0
Body_length                0
Weight                     0
Fur_colour_dominant        0
Fur_pattern                0
Eye_colour                 0
Allowed_outdoor            0
Preferred_food             0
Owner_play_time_minutes    0
Sleep_time_hours           0
Country                    0
Latitude                   0
Longitude                  0
dtype: int64

## Limpando as colunas do dataframe

In [6]:
features = [
    'Body_length', 'Weight', 'Fur_colour_dominant', 'Fur_pattern', 'Eye_colour', 'Country', 'Latitude', 'Longitude'
]
target = 'Breed'

clean_base = df[features]
clean_target = df[target].copy()

## Utilizando LabelEncoder para transformar colunas não numéricas

In [7]:
from sklearn.preprocessing import LabelEncoder

features_les = {}
base_encoded = clean_base.copy()
for feat in features:
    if base_encoded[feat].dtype == 'object':
        features_les[feat] = LabelEncoder()
        base_encoded[feat] = features_les[feat].fit_transform(base_encoded[feat])

target_le = LabelEncoder()
target_encoded = clean_target.copy()
if(clean_target.dtype == 'object'):
    target_encoded = target_le.fit_transform(target_encoded)
target_encoded = pd.Series(target_encoded, name=target)

## Usando StandardScaler para deixar os dados na mesma escala (média e desvio padrão)

In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
base_scaled = scaler.fit_transform(base_encoded)

## Selecionando dados para teste e treino com Train Test Split

In [9]:
from sklearn.model_selection import train_test_split

x = base_scaled.copy()
y = target_encoded.copy()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 11)

# Utilizando diferentes algoritmos de classificação
## Regressão Logística

In [10]:
from sklearn.linear_model import LogisticRegression

logistic_r = LogisticRegression(random_state=1, max_iter=100)
logistic_r.fit(x_train, y_train)

model = logistic_r

## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

x_train, x_test, y_train, y_test = train_test_split(base_encoded, target_encoded, test_size = 0.2, random_state = 11)

g_naivebayes = GaussianNB()
g_naivebayes.fit(x_train, y_train)

model = g_naivebayes

## Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree

tree = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42)
tree.fit(x, y)

model = tree


plt.figure(figsize=(12,8))
plot_tree(tree, feature_names=features, class_names=clean_target.unique(), filled=True)
plt.show()