In [None]:
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
import pickle

# Load your dataset
data = pd.read_csv('Animalset.csv', encoding='unicode_escape')

# Separate features (x) and target variable (y)
x = data.drop(['Stage'], axis='columns')
y = data['Stage']

# Encode categorical features (Name and Species)
le_data = LabelEncoder()
Name_l = le_data.fit_transform(x['Name'])
Species_l = le_data.fit_transform(x['Species'])
x['Name'] = Name_l
x['Species'] = Species_l

# Create a decision tree classifier
model = tree.DecisionTreeClassifier()

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Define the transformation and model pipeline
onehot = OneHotEncoder()
onehot.fit(x[['Name', 'Species']])
a = make_column_transformer((OneHotEncoder(categories=onehot.categories_, handle_unknown='ignore'), ['Name', 'Species']), remainder='passthrough')
pipe = make_pipeline(a, model)

# Fit the model to the training data
pipe.fit(x_train, y_train)

# Evaluate the model on the test data
accuracy = pipe.score(x_test, y_test)
print(f"Model Accuracy: {accuracy}")

# Pickle the trained model
with open('predic.pkl', 'wb') as file:
    pickle.dump(pipe, file)
