In [4]:
import pandas as pd
import numpy as np
from pathlib import Path

# ROOT_FOLDER = "NYPL-menus"
ROOT_FOLDER = "/content/drive/My Drive/513_Project"

MENU = 1
MENU_PAGE = 3
MENU_ITEM = 2
DISH = 0

# Data Profiling Workflow Instructions

## Accessing Datasets

Each dataset can be accessed from the dataset collection using the following keys:

```python
dataset["MENU"]          # Menu data
dataset["MENU_PAGE"]     # Menu page data
dataset["MENU_ITEM"]     # Menu item data
dataset["DISH"]          # Dish data
```

## Profiling the Cleaned Dataset

Update the ROOT_FOLDER variable to point to your cleaned data directory:

```python
ROOT_FOLDER = "NYPL-menus-cleaned"  # Path to cleaned data
```

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
dataset = []

for filename in sorted(Path(ROOT_FOLDER).iterdir()):
    print(filename)
    if filename.name.endswith(".csv"):
        dataset.append(pd.read_csv(filename, na_values=[""]))


/content/drive/My Drive/513_Project/Dish.csv
/content/drive/My Drive/513_Project/Menu.csv
/content/drive/My Drive/513_Project/MenuItem.csv
/content/drive/My Drive/513_Project/MenuPage.csv


In [16]:
base_path = '/content/drive/My Drive/513_Project/'
# Your original dictionary-based load
dataset_named = {
    "DISH": pd.read_csv(base_path + "Dish.csv"),
    "MENU": pd.read_csv(base_path + "Menu.csv"),
    "MENU_ITEM": pd.read_csv(base_path + "MenuItem.csv"),
    "MENU_PAGE": pd.read_csv(base_path + "MenuPage.csv")
}

# Now build a list that aligns with their constant indexes
# DISH = 0, MENU = 1, MENU_ITEM = 2, MENU_PAGE = 3
dataset = [None] * 4
dataset[0] = dataset_named["DISH"]
dataset[1] = dataset_named["MENU"]
dataset[2] = dataset_named["MENU_ITEM"]
dataset[3] = dataset_named["MENU_PAGE"]

In [17]:
# IC 2: Date Outliner in Menu

# Extract first 4 digits of call_number (if not null)
dataset[MENU]["call_prefix"] = dataset[MENU]["call_number"].str[:4]

# Extract first 4 digits of date (if not null)
dataset[MENU]["date_prefix"] = dataset[MENU]["date"].str[:4]

# Check constraint: call_prefix == date_year when both exist
ic2_violations = dataset[MENU][
    dataset[MENU]["call_number"].notna() &
    dataset[MENU]["date"].notna() &
    dataset[MENU]["call_prefix"].str.isnumeric() &
    (dataset[MENU]["date"].str.match(r"[^1]+") |
    dataset[MENU]["date"].str.match(r"[1][^89]+")) &
    (dataset[MENU]["call_prefix"] != dataset[MENU]["date_prefix"])
]

print(f"Before Cleaning Applied: {len(ic2_violations)}")
dataset[MENU].loc[ic2_violations.index][["id", "call_number", "date", "call_prefix", "date_prefix"]]

Before Cleaning Applied: 5


Unnamed: 0,id,call_number,date,call_prefix,date_prefix
525,13112,1900-2328,0190-03-06,1900,190
6195,22951,1901-213,1091-01-27,1901,1091
14659,32265,1918-0387_wotm,2928-03-26,1918,2928
16917,34727,1912-0667_wotm,0001-01-01,1912,1
16918,34728,1912-0668_wotm,0001-01-01,1912,1


In [18]:
# IC 3: Date Blank with call number year in Menu

# Extract first 4 digits of call_number (if not null)
dataset[MENU]["call_prefix"] = dataset[MENU]["call_number"].str[:4]

# Extract first 4 digits of date (if not null)
dataset[MENU]["date_prefix"] = dataset[MENU]["date"].str[:4]

# Check constraint: date should not be blank when call_prefix is numeric
ic3_violations = dataset[MENU][
    dataset[MENU]["call_number"].notna() &
    dataset[MENU]["date"].isna() &
    dataset[MENU]["call_prefix"].str.isnumeric()
]

print(f"Violations found: {len(ic3_violations)}")
ic3_violations[["id", "call_number", "date", "call_prefix", "date_prefix"]].head(20)

Violations found: 47


Unnamed: 0,id,call_number,date,call_prefix,date_prefix
465,13042,1900-2517,,1900,
4598,20978,1906-783,,1906,
5025,21467,1886-036,,1886,
5400,21969,1887-028,,1887,
8633,25998,1900-189,,1900,
8732,26119,1899-606,,1899,
10093,27576,1910-881,,1910,
10426,27912,1973-0020_wotm,,1973,
10526,28012,1977-0004_wotm,,1977,
10560,28062,1978-0021_wotm,,1978,


In [19]:
# IC 4: Date Blank with no date info in call number in Menu

# Extract first 4 digits of call_number (if not null)
dataset[MENU]["call_prefix"] = dataset[MENU]["call_number"].str[:4]

# Extract first 4 digits of date (if not null)
dataset[MENU]["date_prefix"] = dataset[MENU]["date"].str[:4]

# Check constraint: date should not be blank when call_prefix is numeric
ic4_violations = dataset[MENU][
    dataset[MENU]["call_number"].notna() &
    dataset[MENU]["date"].isna() & (
    (dataset[MENU]["call_prefix"] == 'Zand') |
    (dataset[MENU]["call_prefix"] == 'Soet') |
    (dataset[MENU]["call_prefix"] == 'soet') |
    (dataset[MENU]["call_prefix"] == 'Bara') |
    (dataset[MENU]["call_prefix"] == '_wot'))
]

print(f"Violations found: {len(ic4_violations)}")
dataset[MENU].loc[ic4_violations.index]

Violations found: 536


Unnamed: 0,id,name,sponsor,event,venue,place,physical_description,occasion,notes,call_number,...,date,location,location_type,currency,currency_symbol,status,page_count,dish_count,call_prefix,date_prefix
8913,26347,,Pan American,,AIRLINE,,Folder; 8.5 x 6 inches,,Dieter Zander Collection; cover is an illustra...,Zander 11,...,,Pan American,,,,complete,3,30,Zand,
8915,26349,,American Export Lines,Captain's Dinner,STEAMSHIP,S.S. Independence,Booklet; 8.5 x 11.5 inches,,"Dieter Zander Collection; captain, officers, a...",Zander 13,...,,American Export Lines,,,,complete,4,28,Zand,
8918,26352,,American Airlines,,AIRLINE,,Folded set of postcards; 7 x 4.5 in. folded; 7...,,Dieter Zander Collection; Americana themed; sc...,Zander 16,...,,American Airlines,,,,complete,3,34,Zand,
8921,26355,,Pan American,,AIRLINE,,Folder; 11 x 9 inches,,Dieter Zander Collection; cover is a watercolo...,Zander 19 undated,...,,Pan American,,,,complete,3,68,Zand,
8922,26356,,Pan American,,AIRLINE,,Tri-fold; 9.75 x 13.25 in. folded; 9.75 x 26 i...,,"Dieter Zander Collection, cover is a watercolo...",Zander 21 undated,...,,Pan American,,,,complete,3,47,Zand,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10224,27708,,Dinner held by Pennsylvania Railroad (Railroad...,,,,10 x 7 in. fold. ; ill.,,,Soete 235A-B,...,,Pennsylvania Railroad,,Dollars,$,complete,4,101,Soet,
10225,27709,,Dinner held by Pennsylvania Railroad (Railroad...,,,,10 x 7 in. fold. ; ill.,,,Soete 237A-C,...,,Pennsylvania Railroad,,Dollars,$,complete,5,91,Soet,
10226,27710,,Dinner held by Pennsylvania Railroad,,,,,,,Soete 209A,...,,Dinner Held By Pennsylvania Railroad,,Dollars,$,complete,2,101,Soet,
10228,27712,,Exec Committe California Midwinter Int Exposition,,,,,,,soete 56,...,,Exec Committe California Midwinter Int Exposition,,,,complete,3,0,soet,


In [20]:
# IC 5: Date range outsite of 1890-1970 in Menu
# Create a temporary year column, keeping NaNs from crashing it
dataset[MENU]["year"] = pd.to_numeric(dataset[MENU]["date"].astype(str).str[:4], errors="coerce")

# Now apply the IC5 logic safely
ic5_violations = dataset[MENU][
    dataset[MENU]["year"].notna() &
    ((dataset[MENU]["year"] < 1890) | (dataset[MENU]["year"] > 1970))
]["id"]

print(f"Violations found: {len(ic5_violations)}")
print(f"Menu dataset size: {len(dataset[MENU])}")
print(f"MenuPage dataset size: {len(dataset[MENU_PAGE])}")
print(f"MenuItem dataset size: {len(dataset[MENU_ITEM])}")

# View violating rows
dataset[MENU].loc[ic5_violations.index]

Violations found: 1006
Menu dataset size: 17545
MenuPage dataset size: 66937
MenuItem dataset size: 1332726


Unnamed: 0,id,name,sponsor,event,venue,place,physical_description,occasion,notes,call_number,...,location,location_type,currency,currency_symbol,status,page_count,dish_count,call_prefix,date_prefix,year
39,12503,,POLICE DEPARTMENT OF THE CITY OF NEW YORK,SEVENTH ANNUAL DINNER,GOVT;,DELMONICO'S,FOL; 4.75 x 7.25;,,SEAL ON COVER; FRENCH; INCLUDES WINES SERVED W...,1888-0010,...,Police Department Of The City Of New York,,,,complete,3,30,1888,1888,1888.0
48,12515,,THE ALBANY,LUNCH,?,"DENVER, COLO;",CARD;3.5 X 5;,,,1888-0611,...,The Albany,,,,complete,2,30,1888,1888,1888.0
49,12516,,REVERE HOUSE,COMPLIMENTARY BANQUET GIVEN BY THE CITY GOVERN...,RESTAURANT,"BOSTON, MA",BROADSIDE; ILLUS; 4.25 X 11.75,,MENU PRINTED IN BLACK ON CREAM SILK RIBBON WIT...,1865-0001,...,Parker House,,Dollars,$,complete,4,422,1865,1865,1865.0
147,12635,,,,,,,,,,...,The Albany,,,,complete,2,30,,1888,1888.0
148,12636,,,,,,,,,,...,Revere House,,Dollars,$,complete,4,403,,1865,1865.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11880,29401,,Le Riveria,,,,21x15cm folded; 21x30cm open,,,1990-0012_wotm,...,Le Riveria,,,,complete,3,7,1990,1990,1990.0
14659,32265,Healy's Forty-second Street restaurant,Healy's Forty-second Street restaurant,,,,33x18cm,,dinner,1918-0387_wotm,...,Healy's Forty-second Street restaurant,,Dollars,$,complete,2,333,1918,2928,2928.0
16425,34079,"New Map of Brooklyn and vicinity, for the Broo...","New Map of Brooklyn and vicinity, for the Broo...",,,,1 map ; 56 x 69 cm.,,,wotm,...,"New Map of Brooklyn and vicinity, for the Broo...",,Dollars,$,complete,19,110,wotm,1880,1880.0
16917,34727,Hofbraü Restaurant,Hofbraü Restaurant,,,,,,Dinner of the Explorers Club,1912-0667_wotm,...,Hofbraü Restaurant,,,,complete,1,11,1912,0001,1.0


In [22]:
# IC 6: Date blank in Dish with Dependency on Menu

# Extract first 4 digits of date (if not null)
dataset[MENU]["date_prefix"] = pd.to_numeric(dataset[MENU]["date"].str[:4], errors='coerce').astype('Int64')

dish_appearances = (
    dataset[MENU_ITEM][['id', 'dish_id', 'menu_page_id']]
    .merge(dataset[MENU_PAGE][['id', 'menu_id']],
            left_on='menu_page_id', right_on='id', suffixes=('', '_page'))
    .merge(dataset[MENU], left_on='menu_id', right_on='id')
    .groupby('dish_id')['date_prefix']
    .agg(['min', 'max'])
    .reset_index()
    .rename(columns={'min': 'calc_first', 'max': 'calc_last'})
)

dishes = dataset[DISH].merge(
    dish_appearances, left_on='id', right_on='dish_id', how='left')

ic6_violations_6_first = dishes[
    (dishes['calc_first'].notna() &(
    (dishes['first_appeared'].isna()) |
    (dishes['first_appeared'] == 0) |
    (dishes['first_appeared'] == 1) |
    (dishes['calc_first'].notna() & (dishes['first_appeared'] > dishes['calc_first']))))
]

ic6_violations_6_last = dishes[
    (dishes['calc_last'].notna()) & (
    (dishes['last_appeared'].isna()) |
    (dishes['last_appeared'] == 0) |
    (dishes['last_appeared'] == 2928) |
    (dishes['calc_last'].notna() & (dishes['last_appeared'] < dishes['calc_last'])))
]

print(f"Violations found: {len(ic6_violations_6_first) + len(ic6_violations_6_last)}")
dishes.loc[ic6_violations_6_first.index ][
    ["id", "name", "first_appeared", "last_appeared", "calc_first", "calc_last"]
]

Violations found: 38118


Unnamed: 0,id,name,first_appeared,last_appeared,calc_first,calc_last
5,7,Radishes,1854,2928,1091,2928
13,15,Celery,1,2928,1,2928
15,17,Caviar,1880,1987,1091,1987
24,26,Clams,1881,1970,1859,1970
25,27,Oysters,1862,1963,1859,1963
...,...,...,...,...,...,...
423392,515673,Boiled: Corned beef & cabbage,0,0,1882,1882
423393,515674,Boiled: Knuckle of Veal & Bacon,0,0,1882,1882
423394,515675,Roast: Turkey & Cranberry Sauce,0,0,1882,1882
423395,515676,"Claret: Chateau Larose, Cruse et Fils Freres",0,0,1883,1883


In [23]:
# IC 7: Date Zero in Dish with no dependencies on Menu
ic7_violations_1 = dataset[DISH][
    (dataset[DISH]["first_appeared"] == 0) |
    (dataset[DISH]["last_appeared"] == 0)
]

ic7_violations_2 = dataset[MENU_ITEM][
    dataset[MENU_ITEM]["dish_id"].isin(dataset[DISH].loc[ic7_violations_1.index, "id"])
]

ic7_violations_3 = dataset[MENU_PAGE][
    dataset[MENU_PAGE]["id"].isin(ic7_violations_2["menu_page_id"])
]

ic7_violations_4 = dataset[MENU][
    dataset[MENU]["id"].isin(ic7_violations_3["menu_id"])
]

ic7_violations_first = dataset[DISH][(dataset[DISH]['first_appeared'] == 0) | (dataset[DISH]['first_appeared'] == 1)]

ic7_violations_last = dataset[DISH][dataset[DISH]['last_appeared'] == 0]

print(f"Violations found: {len(ic7_violations_1)}")
dataset[DISH].loc[ic7_violations_1.index]
# dataset[MENU_ITEM].loc[ic7_violations_2.index]
# dataset[MENU_PAGE].loc[ic7_violations_3.index]
# dataset[MENU].loc[ic7_violations_4.index]


Violations found: 55293


Unnamed: 0,id,name,description,menus_appeared,times_appeared,first_appeared,last_appeared,lowest_price,highest_price
290,340,Raw on the Half Shell,,1,1,0,0,0.0,0.0
1690,2005,cheese or ham,,1,1,0,0,0.0,0.0
1723,2056,40,,4,4,0,0,0.0,0.0
4643,5651,"coffee, tea",,1,1,0,0,0.4,0.4
8794,10840,Cardinal,,1,1,0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
423392,515673,Boiled: Corned beef & cabbage,,1,1,0,0,0.0,0.0
423393,515674,Boiled: Knuckle of Veal & Bacon,,1,1,0,0,0.0,0.0
423394,515675,Roast: Turkey & Cranberry Sauce,,1,1,0,0,0.0,0.0
423395,515676,"Claret: Chateau Larose, Cruse et Fils Freres",,1,1,0,0,0.0,0.0


In [24]:
# IC 8: Temporal consistency in Dish
ic8_violations = dataset[DISH][dataset[DISH]["first_appeared"] > dataset[DISH]["last_appeared"]]

print(f"Violations found: {len(ic8_violations)}")
ic8_violations.head(10)

Violations found: 6


Unnamed: 0,id,name,description,menus_appeared,times_appeared,first_appeared,last_appeared,lowest_price,highest_price
131193,164029,Clear beef broth,,0,1,1900,0,0.25,0.25
163257,204888,Hot roast beef with gravy,,0,1,1900,0,0.25,0.25
197050,250693,SURI LEBERLI - Shredded Calf's Liver Flambe in...,,0,1,1945,0,,
197053,250699,"SWISS MINCED VEAL, ROESTI",,0,1,1945,0,,
237740,301736,Cafe Glacee,,0,2,1940,0,0.4,0.4
244534,309629,Garlic Butter,,0,1,1947,0,0.4,0.4


In [25]:
# Date range percentile in Dish

# Filter out invalid entries (NaN or temporal inconsistencies)
valid_dishes = dataset[DISH][
    dataset[DISH]['first_appeared'].notna() &
    dataset[DISH]['last_appeared'].notna()
]

# Calculate percentiles
start_year = int(np.percentile(valid_dishes['first_appeared'], 5))  # 5th percentile start
end_year = int(np.percentile(valid_dishes['last_appeared'], 98))    # 95th percentile end

# Count dishes within this range
in_range = valid_dishes[
    (valid_dishes['first_appeared'] >= start_year) &
    (valid_dishes['last_appeared'] <= end_year)
]
coverage = len(in_range) / len(valid_dishes)

print(f"90% percentile range: {start_year}-{end_year}")
print(f"Covers {coverage:.1%} of dishes ({len(in_range)}/{len(valid_dishes)})")

90% percentile range: 0-1987
Covers 98.0% of dishes (415134/423397)


In [26]:
# IC 9: Date range outsite of 1880-2000 in Dish
ic9_violations = dataset[DISH][
    dataset[DISH]["first_appeared"].notna() &
    dataset[DISH]["last_appeared"].notna() & (
    (dataset[DISH]["first_appeared"] > 2000) |
    (dataset[DISH]["last_appeared"] < 1880))
]
print(f"Violations found: {len(ic9_violations)}")
dataset[DISH].loc[ic9_violations.index][["id", "name", "first_appeared", "last_appeared"]].head(10)

Violations found: 59858


Unnamed: 0,id,name,first_appeared,last_appeared
290,340,Raw on the Half Shell,0,0
1690,2005,cheese or ham,0,0
1723,2056,40,0,0
2173,2605,SAUTERNES.,1865,1865
3252,4055,"Soups without meats,",1865,1865
3269,4078,Bropiled Blue Fish,1865,1865
3273,4082,"Broiled Cusk, Cream Sauce",1865,1865
3373,4203,Stewed Eels,1858,1865
3401,4239,"Baked Cod, Port Sauce",1858,1865
3424,4265,"Chicken and Pork, White Sauce",1865,1865


In [27]:
# IC 10: Blank lowest_price and highest_price in Dish
ic10_violations_1 = dataset[DISH][
    dataset[DISH]["lowest_price"].isna() &
    dataset[DISH]["highest_price"].isna()
]
print(f"Violations found: {len(ic10_violations_1)}")
dataset[DISH].loc[ic10_violations_1.index][["id", "name", "lowest_price", "highest_price"]].head(10)

Violations found: 29100


Unnamed: 0,id,name,lowest_price,highest_price
32,34,Russian Caviare on Toast,,
35,39,Potage a la Victoria,,
55,60,Hafergrutze,,
58,63,Apfelsinen,,
60,65,Milchreis,,
79,87,Hot or cold ribs of beef,,
127,135,Consomme aux Quenelle's,,
128,136,Milk rice,,
161,170,Baked Stuffed Mullet & Sauce Pomard,,
293,346,Grilled Mutton Chops,,


In [28]:
# IC 10: Blank lowest_price and highest_price in Dish with Dependency on Menu Item
menu_item_price = (
    dataset[MENU_ITEM][['id', 'dish_id', 'price']]
    .groupby('dish_id')['price']
    .agg(['min', 'max'])
    .reset_index()
    .rename(columns={'min': 'calc_lowest', 'max': 'calc_highest'})
)

dataset[DISH] = dataset[DISH].merge(
    menu_item_price, left_on='id', right_on='dish_id', how='left')

ic10_violations_2 = dataset[DISH][
    (dataset[DISH]["lowest_price"].isna() | dataset[DISH]["lowest_price"].isna()) &
    (dataset[DISH]["calc_lowest"].notna() | dataset[DISH]["calc_highest"].notna())
]

print(f"Violations found: {len(ic10_violations_2)}")
dataset[DISH].loc[ic10_violations_2.index]

Violations found: 117


Unnamed: 0,id,name,description,menus_appeared,times_appeared,first_appeared,last_appeared,lowest_price,highest_price,dish_id,calc_lowest,calc_highest
17256,21809,Aepfel,,78,78,1899,1910,,,21809.0,1.00,1.00
20268,25612,Green Gage Pie,,17,17,1896,1916,,,25612.0,0.25,0.25
21635,27359,Salami Sausage,,14,15,1900,1970,,,27359.0,0.45,0.45
25874,32795,Bass Ale on Draught,,2,2,1900,1900,,,32795.0,0.10,0.10
28177,35844,French Fried Potatoes.,,2,2,1900,1900,,,35844.0,0.10,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...
387341,475769,Fresh Cream,,2,2,1964,1964,,,475769.0,2.00,2.00
388090,476680,Green Goddess,,2,2,0,0,,,476680.0,0.75,0.75
401675,491556,Crème à la Reine,,2,2,1891,1891,,,491556.0,5.25,5.25
402879,492912,Sauté potatoes,,2,2,1913,1913,,,492912.0,0.25,0.25


In [29]:
# IC 10: Blank lowest_price and highest_price in Dish with No price info in Menu Item
ic10_violations_3 = dataset[DISH][
    dataset[DISH]["lowest_price"].isna() &
    dataset[DISH]["highest_price"].isna() &
    dataset[DISH]["calc_lowest"].isna() &
    dataset[DISH]["calc_highest"].isna()
]

print(f"Violations found: {len(ic10_violations_3)}")
dataset[DISH].loc[ic10_violations_3.index]

Violations found: 28983


Unnamed: 0,id,name,description,menus_appeared,times_appeared,first_appeared,last_appeared,lowest_price,highest_price,dish_id,calc_lowest,calc_highest
32,34,Russian Caviare on Toast,,3,3,1900,1900,,,34.0,,
35,39,Potage a la Victoria,,5,5,1899,1901,,,39.0,,
55,60,Hafergrutze,,205,218,1899,1910,,,60.0,,
58,63,Apfelsinen,,181,184,1899,1935,,,63.0,,
60,65,Milchreis,,135,135,1899,1910,,,65.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
421213,513487,Beurre noir,,1,2,1899,1899,,,513487.0,,
421214,513488,North-sea-Turbot,,1,1,1899,1899,,,513488.0,,
421215,513489,Caulyflower au gratin,,1,1,1899,1899,,,513489.0,,
421216,513490,Veal-tongue,,1,1,1899,1899,,,513490.0,,


In [31]:
# IC 11: Missing or Null Place Values in Menu
ic11_violations = dataset[MENU][
    dataset[MENU]["place"].isna() |
    (dataset[MENU]["place"].str.strip() == "")
]

print(f"IC 11 Violations (missing/blank place): {len(ic11_violations)}")
dataset[MENU].loc[ic11_violations.index][["id", "place"]].head()

IC 11 Violations (missing/blank place): 9422


Unnamed: 0,id,place
15,12478,
58,12526,
102,12583,
103,12584,
104,12585,


In [34]:
# IC 12: Place values that are overly specific or not cleanly grouped
valid_groups = [
    "United States", "Italy", "France", "Canada", "England", "Japan", "Germany",
    "China", "Austria", "Bahamas", "Hungary", "Cuba", "Switzerland",
    "Shipboard", "Trainboard", "Unknown", "Other"
]

ic12_violations = dataset[MENU][
    ~dataset[MENU]["place"].isin(valid_groups)
]

print(f"IC 12 Violations (uncategorized place values): {len(ic12_violations)}")
dataset[MENU].loc[ic12_violations.index][["id", "place"]].head()

IC 12 Violations (uncategorized place values): 17533


Unnamed: 0,id,place
0,12463,"HOT SPRINGS, AR"
1,12464,"MILWAUKEE, [WI];"
2,12465,DAMPFER KAISER WILHELM DER GROSSE;
3,12466,DAMPFER KAISER WILHELM DER GROSSE;
4,12467,DAMPFER KAISER WILHELM DER GROSSE;
