# Data Analysis of Dinosaur Features

By Elsie Wang

Purpose: Import and clean data, perform data analysis

Date: 04/06/24

In [113]:
# Import statements
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import regex as re
import matplotlib.pyplot as plt
import seaborn as sns

In [95]:
# Import datasets
jurassic = pd.read_csv('../data/jurassic_park.csv')
jurassic.head()

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...
3,achillobator,carnivorous,Late Cretaceous 99-84 million years ago,Mongolia,large theropod,5.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Perle Norell and Clark (1999),giganteus,https://www.nhm.ac.uk/discover/dino-directory/...
4,acrocanthosaurus,carnivorous,Early Cretaceous 115-105 million years ago,USA,large theropod,12.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Stovall and Langston (1950),atokensis,https://www.nhm.ac.uk/discover/dino-directory/...


## Data Cleaning

In [96]:
def extract_duration(row):
    """Takes period text and returns the time span"""
    pattern = r'(\d+)-(\d+)'
    match = re.search(pattern, row)
    if match:
        start_year = int(match.group(1))
        end_year = int(match.group(2))
        return np.abs(end_year - start_year)
    else:
        return 

In [97]:
# Create column extract time period
pattern = r'^\w+\s+(\w+)'
jurassic['time_period'] = jurassic['period'].str.extract(pattern)

# Create column extract time span
jurassic['time_span'] = jurassic['period'].apply(extract_duration)

# Drop NaN values
jurassic_cleaned = jurassic.dropna()
jurassic_cleaned

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link,time_period,time_span
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...,Jurassic,10.0
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,4.0
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,13.0
3,achillobator,carnivorous,Late Cretaceous 99-84 million years ago,Mongolia,large theropod,5.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Perle Norell and Clark (1999),giganteus,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,15.0
4,acrocanthosaurus,carnivorous,Early Cretaceous 115-105 million years ago,USA,large theropod,12.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Stovall and Langston (1950),atokensis,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...
301,yimenosaurus,herbivorous,Early Jurassic 195-190 million years ago,China,sauropod,9.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Bai Yang and Wang (1990),youngi,https://www.nhm.ac.uk/discover/dino-directory/...,Jurassic,5.0
303,yinlong,herbivorous,Mid Jurassic 159-154 million years ago,China,ceratopsian,1.2m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Xu Forster Clark and Mo (2006),downsi,https://www.nhm.ac.uk/discover/dino-directory/...,Jurassic,5.0
304,yuanmousaurus,herbivorous,Mid Jurassic 180-159 million years ago,China,sauropod,17.0m,Dinosauria Saurischia Sauropodomorpha Sauropod...,Lü Li Ji Wang Zhang and Dong (2006),jiangyiensis,https://www.nhm.ac.uk/discover/dino-directory/...,Jurassic,21.0
305,yunnanosaurus,omnivorous,Early Jurassic 205-190 million years ago,China,sauropod,7.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Young (1942),huangi,https://www.nhm.ac.uk/discover/dino-directory/...,Jurassic,15.0


In [102]:
# Create column to group countries by continent
countries_continents = { 'Argentina': 'South America', 
                        'Australia': 'Australia', 
                        'Brazil': 'South America', 
                        'Canada': 'North America', 
                        'China': 'Asia', 
                        'Egypt': 'Africa', 
                        'France': 'Europe', 
                        'Germany': 'Europe', 
                        'India': 'Asia', 
                        'Japan': 'Asia', 
                        'Kazakhstan': 'Asia', 
                        'Lesotho': 'Africa', 
                        'Madagascar': 'Africa', 
                        'Malawi': 'Africa', 
                        'Mongolia': 'Asia', 
                        'Morocco': 'Africa', 
                        'Niger': 'Africa', 
                        'North Africa': 'Africa', 
                        'Romania': 'Europe', 
                        'Russia': 'Asia', 
                        'South Africa': 'Africa', 
                        'Spain': 'Europe', 
                        'Tanzania': 'Africa', 
                        'Tunisia': 'Africa', 
                        'USA': 'North America', 
                        'United Kingdom': 'Europe', 
                        'Uruguay': 'South America', 
                        'Uzbekistan': 'Asia', 
                        'Zimbabwe': 'Africa' } 

jurassic_cleaned['continent'] = jurassic_cleaned['lived_in'].replace(countries_continents)
jurassic_cleaned.head()

Unnamed: 0,name,diet,period,lived_in,type,length,taxonomy,named_by,species,link,time_period,time_span,continent
0,aardonyx,herbivorous,Early Jurassic 199-189 million years ago,South Africa,sauropod,8.0m,Dinosauria Saurischia Sauropodomorpha Prosauro...,Yates Bonnan Neveling Chinsamy and Blackbeard ...,celestae,https://www.nhm.ac.uk/discover/dino-directory/...,Jurassic,10.0,Africa
1,abelisaurus,carnivorous,Late Cretaceous 74-70 million years ago,Argentina,large theropod,9.0m,Dinosauria Saurischia Theropoda Neotheropoda C...,Bonaparte and Novas (1985),comahuensis,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,4.0,South America
2,achelousaurus,herbivorous,Late Cretaceous 83-70 million years ago,USA,ceratopsian,6.0m,Dinosauria Ornithischia Genasauria Cerapoda Ma...,Sampson (1995),horneri,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,13.0,North America
3,achillobator,carnivorous,Late Cretaceous 99-84 million years ago,Mongolia,large theropod,5.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Perle Norell and Clark (1999),giganteus,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,15.0,Asia
4,acrocanthosaurus,carnivorous,Early Cretaceous 115-105 million years ago,USA,large theropod,12.0m,Dinosauria Saurischia Theropoda Neotheropoda T...,Stovall and Langston (1950),atokensis,https://www.nhm.ac.uk/discover/dino-directory/...,Cretaceous,10.0,North America


## Exploratory Data Analysis

In [115]:
def generate_plots(period, continent):
    """Return list of plots comparing timspan and feature variables"""

    # Filter by time period and continent
    df = jurassic_cleaned[(jurassic_cleaned['time_period'] == period) & (jurassic_cleaned['continent'] == continent)]

    # Line graph
    

    
    
    return 

In [116]:
generate_plots('Cretaceous', 'Africa')

NameError: name 'timespan' is not defined