#### Imports

In [6]:
import pandas as pd
from random import randint
pd.set_option('display.max_rows', 5)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

#### Download the latest [Pleiades places csv file](https://atlantides.org/downloads/pleiades/dumps/pleiades-places-latest.csv.gz) and read it.

In [7]:
places = pd.read_csv("pleiades-places.csv")

#### Print the first two rows

In [8]:
places.iloc[0:2]

Unnamed: 0,authors,bbox,connectsWith,created,creators,currentVersion,description,extent,featureTypes,geoContext,hasConnectionsWith,id,locationPrecision,maxDate,minDate,modified,path,reprLat,reprLatLong,reprLong,tags,timePeriods,timePeriodsKeys,timePeriodsRange,title,uid
0,"Becker, J., T. Elliott","13.4119837, 42.082885, 13.4119837, 42.082885",413005,2016-11-04T16:36:09Z,"jbecker, thomase",1.0,The post-Roman settlement at Alba Fucens becam...,"{""type"": ""Point"", ""coordinates"": [13.4119837, ...",settlement,,,48210385,precise,1453.0,640.0,2016-11-08T21:58:28Z,/places/48210385,42.082885,"42.082885,13.4119837",13.411984,,M,mediaeval-byzantine,"640.0,1453.0",Borgo Medievale,ece5760c4c6d42c1a331aad543c4ecc4
1,"Becker, J., T. Elliott","11.6285463, 42.4193742, 11.6285463, 42.4193742",413393,2016-11-04T16:39:09Z,"jbecker, thomase",2.0,A major urban sanctuary at Vulci with a long p...,"{""type"": ""Point"", ""coordinates"": [11.6285463, ...",temple-2,,,48210386,precise,300.0,-750.0,2016-12-05T11:47:10Z,/places/48210386,42.419374,"42.4193742,11.6285463",11.628546,"sanctuary, extant remains, temple",ACHR,"archaic,classical,hellenistic-republican,roman","-750.0,300.0",Tempio Grande at Vulci,4e06898f2de74dbc9f3a3bdba6d74ba2


#### Use the `describe()` method to show basic statistics (mean, median, std, etc.) of numeric columns

In [9]:
places.describe()

Unnamed: 0,currentVersion,id,maxDate,minDate,reprLat,reprLong
count,40132.000000,4.037400e+04,37648.00000,37648.000000,32841.000000,32841.000000
mean,3.861906,5.969153e+12,669.66378,-1260.404802,39.379466,20.256801
...,...,...,...,...,...,...
75%,5.000000,8.767158e+05,640.00000,-30.000000,43.471274,31.218177
max,67.000000,6.521790e+15,2100.00000,2000.000000,62.500000,111.078300


#### Print the name of the columns

In [10]:
for col in places.columns:
    print(col)

authors
bbox
connectsWith
created
creators
currentVersion
description
extent
featureTypes
geoContext
hasConnectionsWith
id
locationPrecision
maxDate
minDate
modified
path
reprLat
reprLatLong
reprLong
tags
timePeriods
timePeriodsKeys
timePeriodsRange
title
uid


#### Find how many _uniques_ values are in the featureTypes column.

In [11]:
print(places['featureTypes'].nunique()-1)  ## had to subtract one for column heading

1235


Result: 1235

#### Find Athens
Hint: It is recorded as "Athenae."

In [12]:
## keyword-searched the .csv file, will be in "title" column
l = []
for i in range(len(places.title)):
    if 'Athenae' == places.title[i]:
        l.append(i)
print(str(l) + " is the row index Athenae appears on")

[22109] is the row index Athenae appears on


# Find all sanctuaries.
Hint: Have a look first in column "featureTypes". There are more than one piece of information in some cells. How does this affect our code? We have to search for cells with strings that contain the string we are looking for.

In [13]:
find_sanctuary = places[places['featureTypes'].str.contains('sanctuary', na=False)]
print (find_sanctuary)

                                                 authors                                            bbox connectsWith               created                                  creators  currentVersion                                        description                                             extent       featureTypes                      geoContext hasConnectionsWith         id locationPrecision  maxDate  minDate              modified               path    reprLat            reprLatLong   reprLong                                               tags timePeriods                                   timePeriodsKeys timePeriodsRange                     title                               uid
19     Spann, P., DARMC, R. Talbert, R. Warner, J. Be...      -1.143263, 38.208848, -1.143263, 38.208848          NaN  2010-09-24T19:02:53Z                                P.O. Spann             5.0      An ancient place, cited: BAtlas 27 D3 Fortuna  {"type": "Point", "coordinates": [-1.143263, 3...        

You may get an error 'ValueError: Cannot mask with non-boolean array containing NA / NaN values'. If yes, then use the argument `na=False` within the `str.contains()` function.

Result: 606 rows × 26 columns

#### Find all temples and sanctuaries
Hint: Have a look first in column "featureTypes". There are more than one piece of information in some cells. How does this affect your code?

In [14]:
find_place = places[places['featureTypes'].str.contains('sanctuary|temple', na=False)]
print (find_place)

                                                 authors                                            bbox connectsWith               created          creators  currentVersion                                        description                                             extent featureTypes                      geoContext hasConnectionsWith         id locationPrecision  maxDate  minDate              modified               path    reprLat                 reprLatLong   reprLong                                               tags timePeriods                                    timePeriodsKeys timePeriodsRange                   title                               uid
1                                 Becker, J., T. Elliott  11.6285463, 42.4193742, 11.6285463, 42.4193742       413393  2016-11-04T16:39:09Z  jbecker, thomase             2.0  A major urban sanctuary at Vulci with a long p...  {"type": "Point", "coordinates": [11.6285463, ...     temple-2                             NaN             

#### Find which is the northernest, southernest, easternest, and westernest sanctuary of the database.

##### northernest

In [15]:
## This will be the largest latitude in column 'reprLat'
largest_lat = places['reprLat'].max()
places.loc[places['reprLat'] == largest_lat]

Unnamed: 0,authors,bbox,connectsWith,created,creators,currentVersion,description,extent,featureTypes,geoContext,hasConnectionsWith,id,locationPrecision,maxDate,minDate,modified,path,reprLat,reprLatLong,reprLong,tags,timePeriods,timePeriodsKeys,timePeriodsRange,title,uid
29964,"Warner, R., A. Bursche, R. Talbert, S. Gillies...","5.0, 60.0, 10.0, 65.0",,2010-09-22T20:46:22Z,"R. Warner, A. Bursche",3.0,The sixth century AD historian Iordanes descri...,"{""type"": ""Polygon"", ""coordinates"": [[[5.0, 60....",region,NOR/SWE,,20595,rough,640.0,300.0,2013-10-27T15:41:29Z,/places/20595,62.5,"62.5,7.5",7.5,,L,late-antique,"300.0,640.0",Scadinavia/Scandza,ea9d282bba80ccfc5c21e40562bc3d6b


##### southernest

In [16]:
## This will be the smallest latitude in column 'reprLong'
smallest_lat = places['reprLat'].min()
places.loc[places['reprLat'] == smallest_lat]

Unnamed: 0,authors,bbox,connectsWith,created,creators,currentVersion,description,extent,featureTypes,geoContext,hasConnectionsWith,id,locationPrecision,maxDate,minDate,modified,path,reprLat,reprLatLong,reprLong,tags,timePeriods,timePeriodsKeys,timePeriodsRange,title,uid
1774,"Hausleiter, A., M. Roaf, St J. Simpson, R. Wen...","31.1406026, -29.5443422, 31.1411004, -29.5432314",,2022-07-26T20:32:13Z,"A. Hausleiter, M. Roaf, St J. Simpson, R. Wenke",4.0,"Kas(h)kar was a significant Sasanian city, ori...","{""type"": ""Polygon"", ""coordinates"": [[[31.14060...","settlement, settlement-modern",opposite Wasit IRQ,,912880,precise,2099.0,-30.0,2022-07-27T02:08:12Z,/places/912880,-29.54377,"-29.5437696898,31.1408074997",31.140807,,RLT,"roman,late-antique,twenty-first-ce","-30.0,2099.0",Kas(h)kar,91cbfced9f7047504fe506c711ac3862


##### easternest

In [17]:
## This will be the largest longitude in column 'reprLong'
largest_long = places['reprLong'].max()
places.loc[places['reprLong'] == largest_long]

Unnamed: 0,authors,bbox,connectsWith,created,creators,currentVersion,description,extent,featureTypes,geoContext,hasConnectionsWith,id,locationPrecision,maxDate,minDate,modified,path,reprLat,reprLatLong,reprLong,tags,timePeriods,timePeriodsKeys,timePeriodsRange,title,uid
24655,"Mckee, G., J. Becker","111.077259, 48.5376447, 111.0793082, 48.5392573",,2017-11-01T18:00:55Z,gmckee,3.0,Xiongnu tomb site near the modern village of B...,"{""type"": ""Polygon"", ""coordinates"": [[[111.0784...",tomb,,,475881408,precise,250.0,-330.0,2020-11-07T03:26:08Z,/places/475881408,48.53836,"48.5383602338,111.07830005",111.0783,UWHS Submitted,HKFS,"hellenistic-central-asia,kangju-yuezhi-kushan-...","-330.0,250.0",Duurlig Nars,b533f3c95f904e12bc791209cd4650a1


##### westernest

In [18]:
## This will be the smallest longitude in column 'reprLong'
smallest_long = places['reprLong'].min()
places.loc[places['reprLong'] == smallest_long]

Unnamed: 0,authors,bbox,connectsWith,created,creators,currentVersion,description,extent,featureTypes,geoContext,hasConnectionsWith,id,locationPrecision,maxDate,minDate,modified,path,reprLat,reprLatLong,reprLong,tags,timePeriods,timePeriodsKeys,timePeriodsRange,title,uid
2541,"Pearce, M., P. Tozzi, DARMC, R. Talbert, S. Gi...","-117.221676, 34.132162, -117.221676, 34.132162",,2010-11-15T17:54:49Z,"M. Pearce, P. Tozzi",8.0,"An ancient place, cited: BAtlas 39 D5 Alba Doc...","{""type"": ""Point"", ""coordinates"": [-117.221676,...","station, archaeological-site",Albisola,,383563,precise,640.0,-30.0,2023-04-04T23:42:38Z,/places/383563,34.132162,"34.132162,-117.221676",-117.221676,"dare:ancient=1, dare:major=0, dare:feature=sta...",RL,"roman,late-antique","-30.0,640.0",Alba Docilia,ca9b667dcc6764585883ac16d764a08c


---

#### By using the methin `isin()`, check which items of list1 are _not_ in list2.
Hint: The following code creates two list of numbers from 1 to 100, removes randomly one number from each of them, and transforms the lists to Series.

In [22]:
list1 = [i for i in range(1,101)]
list1.pop(randint(1, 101))
ser1 = pd.Series(list1)
list2 = [i for i in range(1,101)]
list2.pop(randint(1, 101))
ser2 = pd.Series(list2)

In [27]:
output = ser1.isin(ser2) ## create boolean series
output_dict = output.to_dict() ## turn boolean series into dictionary
output_dict_keys = list(output_dict.keys()) ## index the keys
output_dict_values = list(output_dict.values()) ## index the values
false_index = output_dict_values.index(False) ## find the index of the value that is false
result = output_dict_keys[false_index] ## use the false value index to find the corresponding number
print(result)

57


---