# Jurassic Park: A Dino Data Adventure

Welcome to a playful exploration of the most famous (and extinct) cast in cinematic history. Using an exhaustive dataset of dinosaurs featured in the Jurassic Park universe, this project dives into the traits, sizes, diets, and timelines of creatures that once roamed the Earth—or at least, Hollywood.

In [17]:
import pandas as pd 
import seaborn as sns

In [29]:
dinosaurs = pd.read_csv('../data/raw/data.csv')
dinosaurs.head(3)

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...


In [30]:
dinosaurs[['period name', 'period start (million years ago)', 'period end (million years ago)']] = dinosaurs['period'].str.extract(r'^(.*?)\s+(\d+)[–-](\d+)\s+million years ago$')
dinosaurs.head(3)

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link,period name,period start (million years ago),period end (million years ago)
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...,Early Jurassic,199,189
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...,Late Cretaceous,74,70
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...,Late Cretaceous,83,70


In [31]:
dinosaurs = dinosaurs.drop(['period'], axis=1)
dinosaurs.head(3)

Unnamed: 0,name,diet,lived_in,type,length,taxonomy,named_by,species,link,period name,period start (million years ago),period end (million years ago)
0,aardonyx,herbivorous,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...,Early Jurassic,199,189
1,abelisaurus,carnivorous,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...,Late Cretaceous,74,70
2,achelousaurus,herbivorous,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...,Late Cretaceous,83,70


In [32]:
taxonomy_splt = dinosaurs['taxonomy'].str.split()

In [33]:
max_clades = taxonomy_splt.apply(len).max()

In [34]:
taxonomy_df = pd.DataFrame(
    taxonomy_splt.tolist(),
    columns=[f'Clade_{i+1}' for i in range(max_clades)])

In [35]:
dinosaurs = pd.concat([dinosaurs, taxonomy_df], axis=1)

In [36]:
dinosaurs = dinosaurs.drop(['Clade_8', 'Clade_9', 'Clade_10', 'Clade_11', 'Clade_12', 'Clade_13', 'Clade_14', 'Clade_15', 'Clade_16', 'Clade_17'], axis=1)

In [37]:
dinosaurs = dinosaurs.rename(columns={
    'Clade_1': 'Kingdom',
    'Clade_2': 'Phylum',
    'Clade_3': 'Class',
    'Clade_4': 'Order',
    'Clade_5': 'Suborder',
    'Clade_6': 'Infraorder',
    'Clade_7': 'Family'
})

In [38]:
dinosaurs.head(3)

Unnamed: 0,name,diet,lived_in,type,length,taxonomy,named_by,species,link,period name,period start (million years ago),period end (million years ago),Kingdom,Phylum,Class,Order,Suborder,Infraorder,Family
0,aardonyx,herbivorous,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...,Early Jurassic,199,189,Dinosauria,Saurischia,Sauropodomorpha,Prosauropoda,Anchisauria,,
1,abelisaurus,carnivorous,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...,Late Cretaceous,74,70,Dinosauria,Saurischia,Theropoda,Neotheropoda,Ceratosauria,Neoceratosauria,Abelisauridae
2,achelousaurus,herbivorous,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...,Late Cretaceous,83,70,Dinosauria,Ornithischia,Genasauria,Cerapoda,Marginocephalia,Ceratopsia,Neoceratopsia


In [39]:
dinosaurs.to_csv('dinosaursclean.csv', index=False)