The dataset that we’re going to be working with in this lesson is taken from The Trans-Atlantic Slave Trade Database, part of the Slave Voyages project. The larger database includes information about 35,000 slave-trading voyages from 1514-1866. 

In [57]:
import pandas as pd

In [58]:
# set display setting to 100 rows
pd.options.display.max_rows = 100

In [59]:
df = pd.read_csv('data.csv', delimiter=",", encoding='utf-8')

In [60]:
df

Unnamed: 0,Year of arrival at port of disembarkation,Voyage ID,Vessel name,Vessel owner,Flag of vessel,Rig or type of vessel,Voyage itinerary imputed port where began (ptdepimp) place,Voyage itinerary imputed principal place of slave purchase (mjbyptimp),Voyage itinerary imputed principal port of slave disembarkation (mjslptimp) place,VOYAGEID2,...,Total disembarked,Captives arrived at 1st port,Percent men,Percent women,Percent children,Duration of captives' crossing (in days),"Voyage duration, homeport to disembarkation (in days)",Captain's name,particular outcome label,Sources
0,1714.0,16109,Freeke Gally,"Freeke, Phillip*",Great Britain,,Bristol,,Kingston,,...,283.0,283.0,,,,,282.0,"Neale, Alexander",Voyage completed as intended,"['JRL, English MS, 1390, 1, f. 3v<><p><em>John..."
1,1713.0,16110,Greyhound Gally,"Hollister, Lawrence*",Great Britain,,Bristol,,"Jamaica, place unspecified",,...,179.0,,,,,,,"Selkirk, Alexander<br/> Forrest, Henry",Sold slaves in Americas - subsequent fate unknown,"['JRL, English, MS, 1390, 1, f. 3v<><p><em>Joh..."
2,1714.0,16111,Jacob,"Crow, Francis*",Great Britain,Sloop,Bristol,,Kingston,,...,130.0,130.0,,,,,236.0,"Nicholls, Philip",Voyage completed as intended,"['JRL, English MS 1390, 1, f. 3r<><p><em>John ..."
3,1714.0,16112,Jason Gally,"Becher, John*",Great Britain,,Bristol,,Port Royal,,...,278.0,278.0,,,,,305.0,"Plummer, John",Voyage completed as intended,"['JRL, English MS 1390, 1, f. 3r<><p><em>John ..."
4,1713.0,16113,Lawford Gally,"Norman, John*",Great Britain,,Bristol,"Africa, port unspecified",Newcastle (Nevis),,...,190.0,,,,,,,"Stretton, Joseph",Voyage completed as intended,"['Richardson,I<><p>Richardson, David, <em>Bris..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36146,1850.0,900234,Duas Clementinas,,France,Brig,Rio de Janeiro,Benguela,Rio de Janeiro,,...,444.0,,,,,,,,"Arrived in Africa, subsequent fate unknown","['AHNA, Códice E-20-5 (Benguela)<><i>Arquivo H..."
36147,1851.0,900235,,,,,Havana,Benguela,,,...,367.0,,,,,,,,"Left home port, no further record","['TobiasMonteiro,63/3/4,84<><i>Biblioteca Naci..."
36148,1851.0,900236,Rio Tâmega,,Portugal / Brazil,Patacho,"Southeast Brazil, port unspecified",Benguela,Rio de Janeiro,,...,105.0,,,,,,,,"Arrived in Africa, subsequent fate unknown","['AHNA, Códice E-20-5 (Benguela)<><i>Arquivo H..."
36149,1851.0,900237,Rosa,,,Brig,Rio de Janeiro,Benguela,Rio de Janeiro,,...,458.0,,,,,,,,"Arrived in Africa, subsequent fate unknown","['AHNA, Códice E-20-5 (Benguela)<><i>Arquivo H..."


In [61]:
df = df.rename(columns={
    'Year of arrival at port of disembarkation': 'year_arrival',
    'Flag of vessel': 'national_affiliation',
    'Voyage ID': 'id',
    'Vessel name': 'vessel_name',
    'Vessel owner': 'vessel_owner',
    'Rig or type of vessel': 'vessel_type',
    'Voyage itinerary imputed port where began (ptdepimp) place': 'start_place',
    'Voyage itinerary imputed principal place of slave purchase (mjbyptimp) ': 'purchase_place',
    'Voyage itinerary imputed principal port of slave disembarkation (mjslptimp) place': 'end_place',
    'VOYAGEID2': 'alternative_id',
    'Total embarked': 'total_embarked',
    'Total disembarked': 'total_disembarked',
    'Captives arrived at 1st port': 'captives_arrived',
    'Percent men': 'percent_men',
    'Percent women': 'percent_women',
    'Percent children': 'percent_children',
    'Duration of captives\' crossing (in days)': 'crossing_duration',
    'Voyage duration, homeport to disembarkation (in days)': 'voyage_duration',
    'Captain\'s name': 'captain_name',
    ' particular outcome label': 'outcome',
    'Sources': 'sources'
    })

## Examine

In [62]:
df.shape

(36151, 21)

In [63]:
df.dtypes

year_arrival            float64
id                        int64
vessel_name              object
vessel_owner             object
national_affiliation     object
vessel_type              object
start_place              object
purchase_place           object
end_place                object
alternative_id           object
total_embarked          float64
total_disembarked       float64
captives_arrived        float64
percent_men             float64
percent_women           float64
percent_children        float64
crossing_duration       float64
voyage_duration         float64
captain_name             object
outcome                  object
sources                  object
dtype: object

In [64]:
df.columns

Index(['year_arrival', 'id', 'vessel_name', 'vessel_owner',
       'national_affiliation', 'vessel_type', 'start_place', 'purchase_place',
       'end_place', 'alternative_id', 'total_embarked', 'total_disembarked',
       'captives_arrived', 'percent_men', 'percent_women', 'percent_children',
       'crossing_duration', 'voyage_duration', 'captain_name', 'outcome',
       'sources'],
      dtype='object')

In [65]:
df.describe(include='all')

Unnamed: 0,year_arrival,id,vessel_name,vessel_owner,national_affiliation,vessel_type,start_place,purchase_place,end_place,alternative_id,...,total_disembarked,captives_arrived,percent_men,percent_women,percent_children,crossing_duration,voyage_duration,captain_name,outcome,sources
count,36150.0,36151.0,34537,21778,33910,23787,31643,33941,31960,50,...,34231.0,18408.0,3477.0,3478.0,4215.0,7339.0,13541.0,32123,36150,36137
unique,,,9496,9680,11,73,241,188,280,50,...,,,,,,,,18977,166,24105
top,,,Mary,Royal African Company,Great Britain,Ship,Liverpool,"Africa, port unspecified","Bahia, place unspecified",[107172],...,,,,,,,,"Smith, John",Voyage completed as intended,"['mettas,I<>Mettas, Jean, <i>Répertoire des E..."
freq,,,252,642,12036,4854,4973,7216,4226,1,...,,,,,,,,38,16925,1212
mean,1764.436293,42836.879533,,,,,,,,,...,268.667436,276.116688,0.491053,0.263807,0.215463,60.462733,290.432317,,,
std,59.235218,72670.177399,,,,,,,,,...,137.799831,158.751905,0.147106,0.120455,0.167172,33.067241,130.751558,,,
min,1514.0,1.0,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,2.0,39.0,,,
25%,1732.0,16149.5,,,,,,,,,...,174.0,158.0,0.39701,0.183063,0.074715,38.0,210.0,,,
50%,1773.0,32514.0,,,,,,,,,...,260.0,254.0,0.48936,0.255455,0.19565,53.0,269.0,,,
75%,1806.0,50318.5,,,,,,,,,...,350.0,370.25,0.58469,0.338915,0.31859,74.0,345.0,,,


In [66]:
df[df['percent_women'].notna()]

Unnamed: 0,year_arrival,id,vessel_name,vessel_owner,national_affiliation,vessel_type,start_place,purchase_place,end_place,alternative_id,...,total_disembarked,captives_arrived,percent_men,percent_women,percent_children,crossing_duration,voyage_duration,captain_name,outcome,sources
46,1717.0,16159,Charfield,"Jacob, Samuel*",Great Britain,,Bristol,Gambia,Rappahannock,,...,156.0,156.0,0.473333,0.220000,0.306667,,,"Roberts, William",Voyage completed as intended,"['VHSTayloe,v. Mss1 T2118 b, Account book 1708..."
192,1723.0,16315,Greyhound,"Hobhouse, Isaac*<br/> Challoner, William",Great Britain,,Bristol,"Bight of Biafra and Gulf of Guinea Islands, po...",York River,,...,209.0,,0.557520,0.377580,0.064900,51.0,184.0,"Hallden, Edward",Voyage completed as intended,"['Richardson,I<><p>Richardson, David, <em>Bris..."
482,1731.0,16614,Burroughs,"Tonge, Henry*",Great Britain,,Bristol,Cape Coast Castle,"Jamaica, place unspecified",,...,,,0.809524,0.142857,0.047619,,322.0,"Wallington, John",Voyage completed as intended,"['JRL, English MS, 1390, 1, f. 9r<><p><em>John..."
489,1730.0,16621,Freke,"Freke, William*",Great Britain,,Bristol,Bonny,"Barbados, place unspecified",,...,345.0,345.0,0.428570,0.227960,0.343470,,314.0,"Bartlett, John",Voyage completed as intended,"['Richardson,II<><p>Richardson, David, <em>Bri..."
1013,1715.0,32452,Saint Jean d'Afrique,"Feray<br/> Cossart, Jean",France,,Le Havre,"West Central Africa and St. Helena, port unspe...",Cap Français,,...,495.0,495.0,0.672730,0.129290,0.197980,,331.0,"Chauvel, Louis (a) Schovel",Voyage completed as intended,"['mettas,II<>Mettas, Jean, <i>Répertoire des ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35860,1771.0,91595,Andromache,"Davenport, William<br/> Lace, Ambrose<br/> Was...",Great Britain,Brig,Liverpool,Calabar,"Dominica, place unspecified",,...,186.0,186.0,0.532260,0.252690,0.215050,,,"Sharpe, James",Voyage completed as intended,"['LST,1744-1786<>Richardson, David, Katherine ..."
35882,1772.0,91617,Barbados Packet,"Dillon, Gerard<br/> Mackmillan, Robert<br/> Sa...",Great Britain,Brigantine,Liverpool,"Windward Coast, place unspecified",Zion Hill,,...,96.0,,0.458330,0.270830,0.270830,,401.0,"Barwis, David<br/> Hudson, Harrison",Voyage completed as intended,"['LST,1744-1786<>Richardson, David, Katherine ..."
35886,1771.0,91621,Fox,"Hasell, Chris<br/> Dobson, John<br/> Davenport...",Great Britain,Brigantine,Liverpool,Calabar,"St. Kitts, port unspecified",,...,154.0,154.0,0.331080,0.189190,0.479730,,206.0,"Beard, John",Voyage completed as intended,"['LST,1744-1786<>Richardson, David, Katherine ..."
35907,1771.0,91643,True Blue,"Hasell, Chris<br/> Welch, John<br/> Goad, John...",Great Britain,Ship,Liverpool,Benin,Kingston,,...,206.0,,0.421570,0.382350,0.196080,,,"Griffith, Richard<br/> Goad, William",Voyage completed as intended,"['LST,1744-1786<>Richardson, David, Katherine ..."


In [68]:
df['percent_women'].isna().value_counts()

percent_women
True     32673
False     3478
Name: count, dtype: int64

In [69]:
df['percent_women'].isna().value_counts(normalize=True)

percent_women
True     0.903792
False    0.096208
Name: proportion, dtype: float64

In [70]:
df.count() / len(df)

year_arrival            0.999972
id                      1.000000
vessel_name             0.955354
vessel_owner            0.602418
national_affiliation    0.938010
vessel_type             0.657990
start_place             0.875301
purchase_place          0.938868
end_place               0.884070
alternative_id          0.001383
total_embarked          0.954745
total_disembarked       0.946889
captives_arrived        0.509198
percent_men             0.096180
percent_women           0.096208
percent_children        0.116594
crossing_duration       0.203010
voyage_duration         0.374568
captain_name            0.888578
outcome                 0.999972
sources                 0.999613
dtype: float64