### Loading the Data

In [1]:
import pandas as pd

# Load the dinosaur dataset
file_path = 'data.csv'
data = pd.read_csv(file_path)

In [2]:
data

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...
3,achillobator,carnivorous,Late Cretaceous 99-84 million years ago,Mongolia,large theropod,5.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Perle Norell and Clark (1999),giganteus,https://www.nhm.ac.uk/discover/dino-directory/...
4,acrocanthosaurus,carnivorous,Early Cretaceous 115-105 million years ago,USA,large theropod,12.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Stovall and Langston (1950),atokensis,https://www.nhm.ac.uk/discover/dino-directory/...
...,...,...,...,...,...,...,...,...,...,...
304,yuanmousaurus,herbivorous,Mid Jurassic 180-159 million years ago,China,sauropod,17.0m,Dinosauria Saurischia Sauropodomorpha Sauropod...,Lü Li Ji Wang Zhang and Dong (2006),jiangyiensis,https://www.nhm.ac.uk/discover/dino-directory/...
305,yunnanosaurus,omnivorous,Early Jurassic 205-190 million years ago,China,sauropod,7.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Young (1942),huangi,https://www.nhm.ac.uk/discover/dino-directory/...
306,zalmoxes,herbivorous,Late Cretaceous 69 million years ago,Romania,euornithopod,3.0m,Dinosauria Ornithischia Genasauria Cerapoda Or...,Nopcsa (1902),robustus,https://www.nhm.ac.uk/discover/dino-directory/...
307,zephyrosaurus,herbivorous,Early Cretaceous 120-110 million years ago,USA,euornithopod,1.8m,Dinosauria Ornithischia Genasauria Cerapoda Or...,Sues (1980),schaffi,https://www.nhm.ac.uk/discover/dino-directory/...


### Data Preprocessing

In [3]:
# Check for missing values
print(data.isnull().sum())

name         0
diet         0
period       0
lived_in     1
type         0
length      18
taxonomy     0
named_by     0
species      5
link         0
dtype: int64


In [4]:
# Drop rows with missing values (this is just an example)
data.dropna(inplace=True)

In [5]:
# Convert 'length ' column to numeric (removing 'm' and converting to float)
data['length'] = data['length'].str.replace('m', '').astype(float)

In [6]:
from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Encode the column
data['type_encoded'] = label_encoder.fit_transform(data['type'])
data['species_encoded'] = label_encoder.fit_transform(data['species'])
data['diet_encoded'] = label_encoder.fit_transform(data['diet'])

In [7]:
data

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link,type_encoded,species_encoded,diet_encoded
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...,4,57,1
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...,3,62,0
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...,1,107,1
3,achillobator,carnivorous,Late Cretaceous 99-84 million years ago,Mongolia,large theropod,5.0,Dinosauria Saurischia Theropoda Neotheropoda T...,Perle Norell and Clark (1999),giganteus,https://www.nhm.ac.uk/discover/dino-directory/...,3,89,0
4,acrocanthosaurus,carnivorous,Early Cretaceous 115-105 million years ago,USA,large theropod,12.0,Dinosauria Saurischia Theropoda Neotheropoda T...,Stovall and Langston (1950),atokensis,https://www.nhm.ac.uk/discover/dino-directory/...,3,23,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
303,yinlong,herbivorous,Mid Jurassic 159-154 million years ago,China,ceratopsian,1.2,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Xu Forster Clark and Mo (2006),downsi,https://www.nhm.ac.uk/discover/dino-directory/...,1,72,1
304,yuanmousaurus,herbivorous,Mid Jurassic 180-159 million years ago,China,sauropod,17.0,Dinosauria Saurischia Sauropodomorpha Sauropod...,Lü Li Ji Wang Zhang and Dong (2006),jiangyiensis,https://www.nhm.ac.uk/discover/dino-directory/...,4,120,1
305,yunnanosaurus,omnivorous,Early Jurassic 205-190 million years ago,China,sauropod,7.0,Dinosauria Saurischia Sauropodomorpha Prosauro...,Young (1942),huangi,https://www.nhm.ac.uk/discover/dino-directory/...,4,109,3
306,zalmoxes,herbivorous,Late Cretaceous 69 million years ago,Romania,euornithopod,3.0,Dinosauria Ornithischia Genasauria Cerapoda Or...,Nopcsa (1902),robustus,https://www.nhm.ac.uk/discover/dino-directory/...,2,202,1


In [8]:
from sklearn.model_selection import train_test_split

# Assuming 'data' is your DataFrame
# Features and Target Variable
X = data.drop(columns=['name', 'type', 'type_encoded', 'link', 'period',	 'lived_in', 'taxonomy',	'named_by', 'diet', 'species'])
y = data[['type_encoded']]  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, y_train)
dt_predictions = dt_classifier.predict(X_test)

# Evaluate Decision Tree Classifier
dt_accuracy = accuracy_score(y_test, dt_predictions)
print(f"Decision Tree Classifier Accuracy: {dt_accuracy:.4f}")


Decision Tree Classifier Accuracy: 0.5789


In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100)
rf_classifier.fit(X_train, y_train)
rf_predictions = rf_classifier.predict(X_test)

# Evaluate Random Forest Classifier
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Classifier Accuracy: {rf_accuracy:.4f}")


  rf_classifier.fit(X_train, y_train)


Random Forest Classifier Accuracy: 0.5965


In [11]:
train_data = X_train.copy()
train_data['type_encoded'] = y_train

In [12]:
train_data = train_data.drop(columns=['length'])

In [13]:
train_data

Unnamed: 0,species_encoded,diet_encoded,type_encoded
290,91,1,2
25,115,1,4
91,142,1,0
159,24,1,2
149,248,1,4
...,...,...,...
204,166,1,2
74,208,0,5
114,54,0,3
291,200,0,3


In [14]:
!pip install pgmpy



In [15]:
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
import pandas as pd


model = BayesianNetwork([ ('species_encoded', 'type_encoded'), ('diet_encoded', 'type_encoded'), ])

# Fit the model using Maximum Likelihood Estimation
model.fit(train_data, estimator=MaximumLikelihoodEstimator)


In [16]:
test_data = X_test.copy()
test_data['type_encoded'] = y_test

In [17]:
test_data = test_data.drop(columns=['length'])

In [18]:
test_data.shape

(57, 3)

In [19]:
test_data = test_data[(test_data.species_encoded<200) & (test_data.type_encoded <4) ]

In [20]:
test_data

Unnamed: 0,species_encoded,diet_encoded,type_encoded
9,165,1,1
267,19,1,0
249,126,1,2
185,39,3,3
247,99,1,0
153,11,1,2
33,87,0,3
117,71,1,0
234,194,1,2
56,33,1,0


In [21]:
# Assuming 'model' is your trained Bayesian Network model
# Assuming 'test_data' is your test dataset

# Predict the target variable using the trained model
predictions = model.predict(test_data.drop(columns=['type_encoded']))

# Calculate the number of correctly predicted samples
correct_predictions = (predictions == test_data['type_encoded']).sum()


  0%|          | 0/25 [00:00<?, ?it/s]

  correct_predictions = (predictions == test_data['type_encoded']).sum()


In [22]:
from sklearn.metrics import accuracy_score

y_pred_df = pd.DataFrame(predictions, columns=['type_encoded'])

# Calculating accuracy
accuracy = accuracy_score(test_data['type_encoded'], y_pred_df['type_encoded'])

print(f"Accuracy of the Bayesian Network model: {accuracy * 100:.2f}%")

Accuracy of the Bayesian Network model: 20.00%
