Data Wrangling Notebook for Sykes Bone Data
<br />
Neeka Sewnath
<br />
nsewnath@ufl.edu

In [33]:
import pandas as pd
import numpy as np
import re
import uuid 

Silencing warnings that are unnecessary

In [34]:
try:
    import warnings
    warnings.filterwarnings('ignore')
except:
    pass

Import Sykes Bone Data

In [35]:
df = pd.read_csv("./../Original_Data/sykes_bone_data.csv")

Create unique eventID for each row 

In [36]:
df = df.assign(eventID = '')
df['eventID'] = [uuid.uuid4().hex for _ in range(len(df.index))]

In [37]:
df

Unnamed: 0,site_name,reference,period,bone_id,species,element,1,10b,10l,Bd,Bp,BFp,eventID
0,"AB78, London",Unpublished MoLA collections,Roman,1,Dama dama (European),scapula,\N,\N,\N,\N,\N,\N,2bc554b8bba84434942986690680de92
1,Abri Dufaure,"Weinstock, J. 2000. <em>Late Pleistocene reind...",Palaeolithic,1,Rangifer tarandus,atlas,\N,\N,\N,\N,\N,\N,5b9a74c5879c420f959ca6dc2200253e
2,Abri Dufaure,"Weinstock, J. 2000. <em>Late Pleistocene reind...",Palaeolithic,2,Rangifer tarandus,axis,\N,\N,\N,\N,\N,\N,594427a17a6b4fab95f3859c796e048c
3,Abri Dufaure,"Weinstock, J. 2000. <em>Late Pleistocene reind...",Palaeolithic,3,Rangifer tarandus,calcaneus,\N,\N,\N,\N,\N,\N,fc851d1049d0412584e97322a2b813e2
4,Abri Dufaure,"Weinstock, J. 2000. <em>Late Pleistocene reind...",Palaeolithic,4,Rangifer tarandus,calcaneus,\N,\N,\N,\N,\N,\N,f0dcc6b982084353a4a193e837c7d7d1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Alonia,Croft P. 1996. Animal remains. In D. Frankel &...,Bronze Age,974,Dama dama (Persian),axis,\N,\N,\N,\N,\N,\N,53776dcc2fef447bb655f1da3ca35ab7
996,Alonia,Croft P. 1996. Animal remains. In D. Frankel &...,Bronze Age,975,Dama dama (Persian),axis,\N,\N,\N,\N,\N,\N,e4b590dd8d9b4bcbac898dda10417523
997,Alonia,Croft P. 1996. Animal remains. In D. Frankel &...,Bronze Age,976,Dama dama (Persian),axis,\N,\N,\N,\N,\N,\N,15cf9ae0b1de4035b70be2fd04edf76b
998,Alonia,Croft P. 1996. Animal remains. In D. Frankel &...,Bronze Age,977,Dama dama (Persian),axis,\N,\N,\N,\N,\N,\N,a8a4a7bcc79e41e2a9983065f8e892d9


Remove rows without a measurement value

In [40]:
# Bone measurement names vector
bone_measure_cols_names = ['1','10b','10l','Bd','Bp','BFp']

# Replace \N by space first in excel
sample_df = df[bone_measure_cols_names]

#if all columns have a \N value in row, delete row

for index, row in df.iterrows():
    print(row)


site_name                        AB78, London
reference        Unpublished MoLA collections
period                                  Roman
bone_id                                     1
species                  Dama dama (European)
element                               scapula
1                                          \N
10b                                        \N
10l                                        \N
Bd                                         \N
Bp                                         \N
BFp                                        \N
eventID      2bc554b8bba84434942986690680de92
Name: 0, dtype: object
site_name                                         Abri Dufaure
reference    Weinstock, J. 2000. <em>Late Pleistocene reind...
period                                            Palaeolithic
bone_id                                                      1
species                                      Rangifer tarandus
element                                                  atlas
1

Name: 449, dtype: object
site_name                                               Alonia
reference    Croft P. 1996. Animal remains. In D. Frankel &...
period                                              Bronze Age
bone_id                                                    424
species                                    Dama dama (Persian)
element                                                 radius
1                                                           \N
10b                                                         \N
10l                                                         \N
Bd                                                          \N
Bp                                                        47.4
BFp                                                       43.4
eventID                       d39234e8c8924f168ffc9c3b91caedea
Name: 450, dtype: object
site_name                                               Alonia
reference    Croft P. 1996. Animal remains. In D. Frankel &...
perio

Name: 850, dtype: object
site_name                                               Alonia
reference    Croft P. 1996. Animal remains. In D. Frankel &...
period                                              Bronze Age
bone_id                                                    830
species                                    Dama dama (Persian)
element                                                  tibia
1                                                           \N
10b                                                         \N
10l                                                         \N
Bd                                                          \N
Bp                                                          \N
BFp                                                         \N
eventID                       3df6008cf79343428aaf6c9b5b3e5f7c
Name: 851, dtype: object
site_name                                               Alonia
reference    Croft P. 1996. Animal remains. In D. Frankel &...
perio

In [41]:
sample_df

Unnamed: 0,1,10b,10l,Bd,Bp,BFp
0,\N,\N,\N,\N,\N,\N
1,\N,\N,\N,\N,\N,\N
2,\N,\N,\N,\N,\N,\N
3,\N,\N,\N,\N,\N,\N
4,\N,\N,\N,\N,\N,\N
...,...,...,...,...,...,...
995,\N,\N,\N,\N,\N,\N
996,\N,\N,\N,\N,\N,\N
997,\N,\N,\N,\N,\N,\N
998,\N,\N,\N,\N,\N,\N


Parse through element rows 1,10b, 10l, Bd, and BFp and separate into new rows