In [1]:
import wikidata as wd
from geopandas import GeoDataFrame
from sqlalchemy import create_engine
from geoalchemy2 import Geometry, WKTElement
import psycopg2

In [2]:
# print(wd.people_count(None,1000))

In [3]:
fields = ["name", "desc", "birth", "birthplace", "death", "deathplace"]
start = None
end   = 1000

In [4]:
print(wd.people_query(start, end, fields))


select ?person ?name ?desc ?birthTime ?birthPrecision ?birthPlace ?birthCoords ?birthPlaceName ?deathTime ?deathPrecision ?deathPlace ?deathCoords ?deathPlaceName
where {
  ?person wdt:P31 wd:Q5;
         wdt:P569 ?birthDate.
  hint:Prior hint:rangeSafe "true"^^xsd:boolean.
  FILTER((?birthDate <  "1000-01-01"^^xsd:dateTime))
  
    OPTIONAL {
        ?person rdfs:label ?name.
        FILTER (LANG(?name) = "en").
    }

    OPTIONAL {
       ?person schema:description ?desc
       FILTER (LANG(?desc) = "en").
    }

    OPTIONAL {
       ?person p:P569/psv:P569 ?birthNode.
       ?birthNode wikibase:timeValue ?birthTime.
       ?birthNode wikibase:timePrecision ?birthPrecision.
    }

    OPTIONAL {
        ?person wdt:P19  ?birthPlace.
        ?birthPlace wdt:P625 ?birthCoords.
        ?birthPlace rdfs:label ?birthPlaceName
        FILTER (LANG(?birthPlaceName) = "en").
    }

    OPTIONAL {
        ?person p:P570/psv:P570 ?deathTime.
        ?deathTime wikibase:timePrecision ?deathP

In [5]:
data = wd.pull_from_wikidata(wd.people_query(start, end, fields))

In [6]:
df_full = wd.to_pandas(data)

In [7]:
df = df_full.dropna(subset=["birthCoords"]).sort_values(by=["person","birthTime","deathTime"]).drop_duplicates("person",keep="first")

In [8]:
(df.shape, df.person.unique().shape)

((7400, 13), (7400,))

In [9]:
df.head()

Unnamed: 0,birthCoords,birthPlace,birthPlaceName,birthPrecision,birthTime,deathCoords,deathPlace,deathPlaceName,deathPrecision,deathTime,desc,name,person
4443,POINT (106.33329 20.75007),http://www.wikidata.org/entity/Q3031628,Ninh Giang,7,0801-01-01T00:00:00Z,POINT (105.84117 21.0245),http://www.wikidata.org/entity/Q1858,Hanoi,9.0,http://www.wikidata.org/value/9b8830d4fb778054...,Tang Dynasty jiedushi,Khuc Thua Du,http://www.wikidata.org/entity/Q1001807
14994,POINT (9.185321 45.462907),http://www.wikidata.org/entity/Q729978,Mediolanum,9,0350-01-01T00:00:00Z,,,,,,Ancient Roman writer,Mallius Theodorus,http://www.wikidata.org/entity/Q1001947
14995,POINT (17 1),http://www.wikidata.org/entity/Q15,Africa,7,0400-01-01T00:00:00Z,,,,7.0,http://www.wikidata.org/value/ee8bbd8cbe108b25...,roman grammarian,Charisius,http://www.wikidata.org/entity/Q1001949
15002,POINT (3.283333333 48.197222222),http://www.wikidata.org/entity/Q212420,Sens,6,0900-01-01T00:00:00Z,,,,,,,Bouchard Ratepilate,http://www.wikidata.org/entity/Q1010348
4444,POINT (106.33329 20.75007),http://www.wikidata.org/entity/Q3031628,Ninh Giang,7,0850-01-01T00:00:00Z,POINT (105.84117 21.0245),http://www.wikidata.org/entity/Q1858,Hanoi,9.0,http://www.wikidata.org/value/0da0fd5bbd4b8745...,Vietnamese ruler,Khuc Hao,http://www.wikidata.org/entity/Q1010526


In [10]:
crs = {'init': 'epsg:4326'}
gdf = GeoDataFrame(df, crs=crs, geometry="birthCoords")

In [11]:
engine = create_engine('postgresql://geo:geo123@localhost:5432/geobrowser')

In [12]:
gdf['birthCoords'] = gdf['birthCoords'].apply(lambda x: WKTElement(x.wkt, srid=4326) if x else None)
gdf['deathCoords'] = gdf['deathCoords'].apply(lambda x: WKTElement(x.wkt, srid=4326) if x else None)

In [28]:
DSN_Params = dict(
  user = "geo",
  password = "geo123",
  host = "127.0.0.1",
  port = "5432",
  database = "geobrowser"
)
with psycopg2.connect(**DSN_Params) as conn:
    with conn.cursor() as curs:
        curs.execute("delete from people")

In [29]:
df.to_sql('people', engine, if_exists='append', 
          dtype={
              'birthCoords': Geometry(geometry_type='POINT', srid= 4326),
              'deathCoords': Geometry(geometry_type='POINT', srid= 4326),
          })

In [26]:
df[df.person == "https://www.wikidata.org/entity/Q64850505"]

Unnamed: 0,birthCoords,birthPlace,birthPlaceName,birthPrecision,birthTime,deathCoords,deathPlace,deathPlaceName,deathPrecision,deathTime,desc,name,person


# Items

* Just person by birthdate: 32460
* Adding name: 64920. Doubled up and I don't know why

In [31]:
df.keys()

Index(['birthCoords', 'birthPlace', 'birthPlaceName', 'birthPrecision',
       'birthTime', 'deathCoords', 'deathPlace', 'deathPlaceName',
       'deathPrecision', 'deathTime', 'desc', 'name', 'person'],
      dtype='object')

In [9]:
df.person.unique().shape

(32076,)

In [9]:
engine = create_engine('postgresql://geo:geo123@localhost:5432/geobrowser')
df.to_sql('people', engine)

In [13]:
import ppygis3 as ppygis

In [14]:
ppygis.Point(1.0, 2.0).write_ewkb()

b'0101000000000000000000f03f0000000000000040'

In [15]:
import psycopg2
connection = psycopg2.connect(database = 'geobrowser', user = 'geo', password = 'geo123')

In [31]:
cursor = connection.cursor()

In [32]:
def create_people_table(table_name = "people"):
    return """
        CREATE TABLE {} (
        	autoid SERIAL PRIMARY KEY,
        	qid VARCHAR(15),
            name TEXT,
            description TEXT,

        	birthDate TIMESTAMPTZ,
            birthPrecision INTEGER,
            birthQID VARCHAR(15),
            birthPlaceName TEXT,
        	birthCoords GEOGRAPHY(Point),

        	deathDate TIMESTAMPTZ,
            deathPrecision INTEGER,
            deathQID VARCHAR(15),
            deathPlaceName TEXT,
        	deathCoords GEOGRAPHY(Point)
        )
	""".format(table_name)

In [33]:
create_people_table("test_people")

'\n        CREATE TABLE test_people (\n        \tautoid SERIAL PRIMARY KEY,\n        \tqid VARCHAR(15),\n            name TEXT,\n            description TEXT,\n\n        \tbirthDate TIMESTAMPTZ,\n            birthPrecision INTEGER,\n            birthQID VARCHAR(15),\n            birthPlaceName TEXT,\n        \tbirthCoords GEOGRAPHY(Point),\n\n        \tdeathDate TIMESTAMPTZ,\n            deathPrecision INTEGER,\n            deathQID VARCHAR(15),\n            deathPlaceName TEXT,\n        \tdeathCoords GEOGRAPHY(Point)\n        )\n\t'

In [35]:
connection.rollback()
cursor.execute(create_people_table("test_people"))

In [36]:
cursor.close()

In [37]:
connection.commit()

In [39]:
from io import StringIO

In [40]:
buffer = StringIO()

In [42]:
df.head()

Unnamed: 0,birthCoords,birthDate,birthPlace,birthPlaceName,birthPrecision,deathCoords,deathDate,deathPlace,deathPlaceName,deathPrecision,desc,name,person
0,Point(10.7167 48.8667),1501-01-27T00:00:00Z,http://www.wikidata.org/entity/Q502758,Wemding,11,Point(9.0556 48.52),1566-05-10T00:00:00Z,http://www.wikidata.org/entity/Q3806,Tübingen,11,German physician and botanist,Leonhart Fuchs,http://www.wikidata.org/entity/Q60756
1,Point(10.7167 48.8667),1501-01-27T00:00:00Z,http://www.wikidata.org/entity/Q502758,Wemding,11,Point(9.0556 48.52),1566-05-10T00:00:00Z,http://www.wikidata.org/entity/Q3806,Tübingen,11,German physician and botanist,Leonhart Fuchs,http://www.wikidata.org/entity/Q60756
2,Point(10.7167 48.8667),1501-01-27T00:00:00Z,http://www.wikidata.org/entity/Q502758,Wemding,11,Point(9.0556 48.52),1566-05-10T00:00:00Z,http://www.wikidata.org/entity/Q3806,Tübingen,11,German physician and botanist,Leonhart Fuchs,http://www.wikidata.org/entity/Q60756
3,Point(10.7167 48.8667),1501-01-27T00:00:00Z,http://www.wikidata.org/entity/Q502758,Wemding,11,Point(9.0556 48.52),1566-05-10T00:00:00Z,http://www.wikidata.org/entity/Q3806,Tübingen,11,German physician and botanist,Leonhart Fuchs,http://www.wikidata.org/entity/Q60756
4,Point(10.7167 48.8667),1501-01-27T00:00:00Z,http://www.wikidata.org/entity/Q502758,Wemding,11,Point(9.0556 48.52),1566-05-10T00:00:00Z,http://www.wikidata.org/entity/Q3806,Tübingen,11,German physician and botanist,Leonhart Fuchs,http://www.wikidata.org/entity/Q60756


In [None]:
def write_row(df):
    