In [8]:
import pandas as pd
import tempfile
from glob import glob
epsg = 4326

from tqdm import tqdm
from shapely import wkt


In [24]:
from sqlalchemy import create_engine
import tempfile

In [25]:
engine = create_engine('postgresql://postgres:spacewars@localhost:5432/spacewars')

In [3]:
df = pd.read_csv('data/borders/countryborders.csv')
dbname = 'countryborders'

In [6]:
df.rename(columns={'Unnamed: 0':'id'}, inplace=True)

In [48]:
df = pd.read_csv('data/Wikibattles.csv')
dbname = 'battles'

In [49]:
def prepare_dataset(geodf):
    """
    Adds a lat and a lon column to the GeoDF to be used
    with the mapping
    """
    geodf = geodf.dropna()
    list_lat, list_long = [], []
    for point in geodf['geometry']:
        lat = point.centroid.y
        long = point.centroid.x
        list_lat.append(lat)
        list_long.append(long)
    geodf['lat'] = list_lat
    geodf['lon'] = list_long
    del geodf['geometry']
    return geodf


In [50]:
def open_battle_file(filepath):
    """
    Specific function to open DataFrame containing battle related data
    """
    df = pd.read_csv(filepath)
    for col in df.columns:
        if col.startswith('Unnamed'):
            del df[col]
    df['displaydate'] = pd.to_datetime(df['displaydate'], format="%Y-%m-%d")
    df['displaystart'] = pd.to_datetime(df['displaystart'], format="%Y-%m-%d")
    df['displayend'] = pd.to_datetime(df['displayend'], format="%Y-%m-%d")
    df['year'] = pd.DatetimeIndex(df['displaydate']).year
    df['year'] = df['year'].astype(str)

    df['coordinates'] = df['coordinates'].str.replace('\(\(', '(')
    df['coordinates'] = df['coordinates'].str.replace('\)\)', ')')

    ## "entity" in the URL is automatically converted to "wiki" when searching the URL in a browser
    ## like in the wikidata links from NewsEye
    ## I'll just change the URL so they can both match
    df['subject'] = df['subject'].str.replace('entity', 'wiki')
    df['location'] = df['location'].str.replace('entity', 'wiki')
#     df['is_in_radius'] = False

    geometry = []
    for x in df['coordinates']:
        if isinstance(x, str):
            geometry.append(wkt.loads(x))
        else:
            geometry.append(None)
    return df
    # 2263
#     crs = {'init': f'epsg:{epsg}'}  # http://www.spatialreference.org/ref/epsg/2263/
#     geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
#     del geo_df['coordinates']
#     return geo_df

In [51]:
import geopandas as gpd

In [52]:
df = open_battle_file('data/Wikibattles.csv')
df = prepare_dataset(battles)

  df['coordinates'] = df['coordinates'].str.replace('\(\(', '(')
  df['coordinates'] = df['coordinates'].str.replace('\)\)', ')')


In [53]:
df.columns

Index(['subject', 'label', 'LOClabel', 'location', 'country', 'displaydate',
       'displaystart', 'displayend', 'Duration', 'Notes', 'year',
       'is_in_radius', 'lat', 'lon'],
      dtype='object')

In [54]:
df.reset_index(inplace = True)
df.rename(columns={'index':'id'}, inplace=True)
# df = df.iloc[:, :-1]

In [55]:
df

Unnamed: 0,id,subject,label,LOClabel,location,country,displaydate,displaystart,displayend,Duration,Notes,year,is_in_radius,lat,lon
0,0,http://www.wikidata.org/wiki/Q5447484,Fifth Battle of Ypres,Belgium,http://www.wikidata.org/wiki/Q31,Belgium,1918-10-02,1918-09-28,1918-10-02,4,WestFront,1918.0,False,50.900300,3.021110
1,1,http://www.wikidata.org/wiki/Q1998995,Battle of Thiepval Ridge,France,http://www.wikidata.org/wiki/Q142,France,1916-09-28,1916-09-25,1916-09-28,3,WestFront,1916.0,False,50.054528,2.688389
2,2,http://www.wikidata.org/wiki/Q1088364,Battle of the Lys,Flanders,http://www.wikidata.org/wiki/Q234,Belgium,1918-01-01,1918-04-07,1918-04-29,22,WestFront,1918.0,False,50.705556,2.900000
3,3,http://www.wikidata.org/wiki/Q1144824,Battle of Messines,West Flanders,http://www.wikidata.org/wiki/Q1113,Belgium,1917-01-01,1917-06-07,1917-06-14,7,WestFront,1917.0,False,50.762500,2.895278
4,4,http://www.wikidata.org/wiki/Q989093,First Battle of Champagne,Champagne-Ardenne,http://www.wikidata.org/wiki/Q14103,France,1914-01-01,1914-09-28,1914-10-10,12,WestFront,1914.0,False,49.000000,4.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,241,http://www.wikidata.org/wiki/Q10681750,Battle of Syrjäntaka,Syrjäntaka,http://www.wikidata.org/wiki/Q12377444,Finland,1918-04-29,1918-04-28,1918-04-29,1,Finnish civil war,1918.0,False,61.116257,24.831663
154,253,http://www.wikidata.org/wiki/Q10681750,Battle of Syrjäntaka,Tuulos,http://www.wikidata.org/wiki/Q1013612,Finland,1918-04-29,1918-04-28,1918-04-29,1,Finnish civil war,1918.0,False,61.116257,24.831663
155,254,http://www.wikidata.org/wiki/Q38249092,Battle of Ahvenkoski,Ahvenkoski,http://www.wikidata.org/wiki/Q1811020,Finland,1918-05-05,1918-04-10,1918-05-05,25,Finnish civil war,1918.0,False,60.497777,26.459642
156,255,http://www.wikidata.org/wiki/Q697748,Battle of Cer,Cer,http://www.wikidata.org/wiki/Q859564,Serbia,1914-08-24,1914-08-15,1914-08-24,9,Serbian Campaign of World War I,1914.0,False,44.603150,19.493960


In [56]:
df.to_sql(
    dbname,
    engine,
    index=False
)

In [39]:
dbname = 'entities'
for f in tqdm(glob('data/DATASET/**/*.csv', recursive=True)):
    print(f)
    df = pd.read_csv(f)
    df.rename(columns={'Unnamed: 0':'id'}, inplace=True)
    df.to_sql(
    dbname,
    engine,
    index=False,
    if_exists='append'
    
)
#     break

  0%|          | 0/32 [00:00<?, ?it/s]

data/DATASET/fr/combined_data_l_oeuvre_1920.csv


  3%|▎         | 1/32 [00:04<02:19,  4.50s/it]

data/DATASET/fr/combined_data_le_matin_1917.csv


  6%|▋         | 2/32 [00:20<05:38, 11.27s/it]

data/DATASET/fr/combined_data_l_oeuvre_1918.csv


  9%|▉         | 3/32 [00:24<03:53,  8.06s/it]

data/DATASET/fr/combined_data_le_matin_1915.csv


 12%|█▎        | 4/32 [00:41<05:20, 11.44s/it]

data/DATASET/fr/combined_data_le_matin_1913.csv


 16%|█▌        | 5/32 [01:15<08:46, 19.51s/it]

data/DATASET/fr/combined_data_le_matin_1914.csv


 19%|█▉        | 6/32 [01:47<10:22, 23.94s/it]

data/DATASET/fr/combined_data_le_matin_1918.csv


 22%|██▏       | 7/32 [02:01<08:37, 20.68s/it]

data/DATASET/fr/combined_data_le_matin_1916.csv


 25%|██▌       | 8/32 [02:15<07:21, 18.38s/it]

data/DATASET/fr/combined_data_l_oeuvre_1917.csv


 28%|██▊       | 9/32 [02:18<05:17, 13.81s/it]

data/DATASET/fr/combined_data_l_oeuvre_1919.csv


 31%|███▏      | 10/32 [02:24<04:08, 11.31s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1918.csv


 34%|███▍      | 11/32 [02:33<03:42, 10.60s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1915.csv


 38%|███▊      | 12/32 [02:48<03:55, 11.78s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1917.csv


 41%|████      | 13/32 [02:54<03:15, 10.30s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1917.csv


 44%|████▍     | 14/32 [03:04<03:03, 10.19s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1918.csv


 47%|████▋     | 15/32 [03:12<02:37,  9.27s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1913.csv


 50%|█████     | 16/32 [03:23<02:40, 10.00s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1915.csv


 53%|█████▎    | 17/32 [03:32<02:25,  9.71s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1919.csv


 56%|█████▋    | 18/32 [03:38<01:57,  8.43s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1920.csv


 59%|█████▉    | 19/32 [03:42<01:34,  7.28s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1914.csv


 62%|██████▎   | 20/32 [03:56<01:50,  9.22s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1913.csv


 66%|██████▌   | 21/32 [04:10<01:57, 10.73s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1914.csv


 69%|██████▉   | 22/32 [04:19<01:42, 10.26s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1920.csv


 72%|███████▏  | 23/32 [04:32<01:37, 10.84s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1916.csv


 75%|███████▌  | 24/32 [04:43<01:27, 10.93s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1916.csv


 78%|███████▊  | 25/32 [04:55<01:18, 11.16s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1919.csv


 81%|████████▏ | 26/32 [05:04<01:04, 10.78s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1916.csv


 84%|████████▍ | 27/32 [05:06<00:39,  7.94s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1918.csv


 88%|████████▊ | 28/32 [05:07<00:23,  5.96s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1914.csv


 91%|█████████ | 29/32 [05:09<00:13,  4.66s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1917.csv


 94%|█████████▍| 30/32 [05:10<00:07,  3.62s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1915.csv


 97%|█████████▋| 31/32 [05:11<00:03,  3.00s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1913.csv


100%|██████████| 32/32 [05:13<00:00,  9.81s/it]


In [28]:
q_freq = '''SELECT "geometry", COUNT("geometry") FROM entities
GROUP BY "geometry"
ORDER BY COUNT("geometry") DESC'''
freq = read_sql_tmpfile(q_freq, engine)

In [29]:
freq

Unnamed: 0,geometry,count
0,POINT (2.3514616 48.8566969),141766
1,POINT (16.3725042 48.2083537),132535
2,POINT (1.8883335 46.603354),82769
3,POINT (10.4234469 51.0834196),76117
4,POINT (-0.1276474 51.5073219),60515
...,...,...
21706,POINT (16.3476275 48.2056113),4
21707,POINT (-58.4959922339418 -34.614384),4
21708,POINT (-58.49779192001101 -34.5908475),4
21709,POINT (-58.49814870408164 -34.6369143),4


In [None]:
# out = (df1.merge(df2, left_on='store', right_on='store_code')
          .reindex(columns=['id', 'store', 'address', 'warehouse']))

In [48]:
dbname = 'entities_2'
for f in tqdm(glob('data/DATASET/**/*.csv', recursive=True)):
    print(f)
    df = pd.read_csv(f)
    merge = df.merge(freq, left_on='geometry', right_on='geometry')
#     del merge['freq']
    merge.rename(columns={'count': 'total_freq', 'Unnamed: 0':'id'}, inplace=True)
    merge.to_sql(
    dbname,
    engine,
    index=False,
    if_exists='append'

    )
#     break

  0%|          | 0/32 [00:00<?, ?it/s]

data/DATASET/fr/combined_data_l_oeuvre_1920.csv


  3%|▎         | 1/32 [00:05<02:39,  5.16s/it]

data/DATASET/fr/combined_data_le_matin_1917.csv


  6%|▋         | 2/32 [00:19<05:19, 10.65s/it]

data/DATASET/fr/combined_data_l_oeuvre_1918.csv


  9%|▉         | 3/32 [00:23<03:44,  7.73s/it]

data/DATASET/fr/combined_data_le_matin_1915.csv


 12%|█▎        | 4/32 [00:39<05:05, 10.92s/it]

data/DATASET/fr/combined_data_le_matin_1913.csv


 16%|█▌        | 5/32 [01:10<08:04, 17.96s/it]

data/DATASET/fr/combined_data_le_matin_1914.csv


 19%|█▉        | 6/32 [01:35<08:54, 20.56s/it]

data/DATASET/fr/combined_data_le_matin_1918.csv


 22%|██▏       | 7/32 [01:46<07:12, 17.29s/it]

data/DATASET/fr/combined_data_le_matin_1916.csv


 25%|██▌       | 8/32 [02:00<06:27, 16.17s/it]

data/DATASET/fr/combined_data_l_oeuvre_1917.csv


 28%|██▊       | 9/32 [02:04<04:45, 12.43s/it]

data/DATASET/fr/combined_data_l_oeuvre_1919.csv


 31%|███▏      | 10/32 [02:09<03:46, 10.31s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1918.csv


 34%|███▍      | 11/32 [02:18<03:24,  9.75s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1915.csv


 38%|███▊      | 12/32 [02:28<03:15,  9.77s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1917.csv


 41%|████      | 13/32 [02:33<02:41,  8.48s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1917.csv


 44%|████▍     | 14/32 [02:42<02:31,  8.44s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1918.csv


 47%|████▋     | 15/32 [02:47<02:06,  7.43s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1913.csv


 50%|█████     | 16/32 [02:54<01:57,  7.37s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1915.csv


 53%|█████▎    | 17/32 [03:01<01:48,  7.23s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1919.csv


 56%|█████▋    | 18/32 [03:05<01:30,  6.43s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1920.csv


 59%|█████▉    | 19/32 [03:09<01:12,  5.58s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1914.csv


 62%|██████▎   | 20/32 [03:20<01:28,  7.37s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1913.csv


 66%|██████▌   | 21/32 [03:33<01:39,  9.02s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1914.csv


 69%|██████▉   | 22/32 [03:43<01:30,  9.09s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1920.csv


 72%|███████▏  | 23/32 [03:52<01:23,  9.32s/it]

data/DATASET/de/combined_data_illustrierte_kronen_zeitung_1916.csv


 75%|███████▌  | 24/32 [04:00<01:10,  8.77s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1916.csv


 78%|███████▊  | 25/32 [04:09<01:03,  9.01s/it]

data/DATASET/de/combined_data_arbeiter_zeitung_1919.csv


 81%|████████▏ | 26/32 [04:19<00:54,  9.07s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1916.csv


 84%|████████▍ | 27/32 [04:20<00:33,  6.75s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1918.csv


 88%|████████▊ | 28/32 [04:21<00:20,  5.08s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1914.csv


 91%|█████████ | 29/32 [04:23<00:11,  3.98s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1917.csv


 94%|█████████▍| 30/32 [04:24<00:06,  3.14s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1915.csv


 97%|█████████▋| 31/32 [04:26<00:02,  2.94s/it]

data/DATASET/fi/combined_data_helsingin_sanomat_1913.csv


100%|██████████| 32/32 [04:28<00:00,  8.40s/it]


In [62]:
def read_sql_tmpfile(query, db_engine, arg = None):
    with tempfile.TemporaryFile() as tmpfile:

        conn = db_engine.raw_connection()
        cur = conn.cursor()
        # needed to escape raw SQL query
        if arg:
            query = cur.mogrify(query, arg).decode('utf-8')
            print(query)
            
        copy_sql = "COPY ({query}) TO STDOUT WITH CSV {head}".format(
           query=query, head="HEADER"
        )
        cur.copy_expert(copy_sql, tmpfile)
        tmpfile.seek(0)
        df = pd.read_csv(tmpfile)
        return df

In [19]:
def execute_query(query, con):
    return con.execute(query)

In [13]:
q = 'SELECT * FROM battles WHERE "Duration" = 4'
t = read_sql_tmpfile(q, engine)

In [70]:
lg = ",".join(['fr'])
news = ",".join(["L'Œuvre"])
start_date = '1914-01-01'
end_date = '1915-01-01'
arg = [lg, news, start_date, end_date]

q = '''SELECT * from entities
WHERE "lang" IN (%s)
AND "newspaper" IN (%s)
AND "date" BETWEEN (%s) AND (%s)
'''
# arg = [lg]
# q = '''SELECT * from entities
# WHERE "lang" IN (%s)'''

read_sql_tmpfile(q, engine, arg)

SELECT * from entities
WHERE "lang" IN ('fr')
AND "newspaper" IN ('Le Matin')
AND "date" BETWEEN ('1914-01-01') AND ('1915-01-01')



Unnamed: 0,id,mention_id,mention,start_idx,end_idx,left_context,right_context,article_id,issue_id,article_link,newspaper,date,lang,wikidata_link,address,geometry,lat,lon,freq
0,73788,entity_mention_le_matin_12148-bpt6k5707967_1379,NICE,16,20,8 ou 15 JOURS à,,le_matin_12148-bpt6k5707967_article_1202,le_matin_12148-bpt6k5707967,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-02-13,fr,,"Nice, Alpes-Maritimes, Provence-Alpes-Côte d'A...",POINT (7.2683912 43.7009358),43.700936,7.268391,933
1,73850,entity_mention_le_matin_12148-bpt6k571112c_675,NICE,8,12,"CANNES,",", MONACO, MONTE-CARLO",le_matin_12148-bpt6k571112c_article_764,le_matin_12148-bpt6k571112c,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-12-21,fr,,"Nice, Alpes-Maritimes, Provence-Alpes-Côte d'A...",POINT (7.2683912 43.7009358),43.700936,7.268391,933
2,78674,entity_mention_le_matin_12148-bpt6k5708819_2002,Londres,10,17,Bourse de,,le_matin_12148-bpt6k5708819_article_1602,le_matin_12148-bpt6k5708819,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-05-09,fr,https://www.wikidata.org/wiki/Q84,"London, Greater London, England, United Kingdom",POINT (-0.1276474 51.5073219),51.507322,-0.127647,4398
3,78675,entity_mention_le_matin_12148-bpt6k5708819_2009,Londres,13,20,92 20 Ch. s.,.,le_matin_12148-bpt6k5708819_article_1612,le_matin_12148-bpt6k5708819,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-05-09,fr,https://www.wikidata.org/wiki/Q84,"London, Greater London, England, United Kingdom",POINT (-0.1276474 51.5073219),51.507322,-0.127647,4398
4,78676,entity_mention_le_matin_12148-bpt6k5708819_2012,Londres,13,20,26 69\nCh. s.,\nCh. S. Paris. 6 10,le_matin_12148-bpt6k5708819_article_1256,le_matin_12148-bpt6k5708819,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-05-09,fr,https://www.wikidata.org/wiki/Q84,"London, Greater London, England, United Kingdom",POINT (-0.1276474 51.5073219),51.507322,-0.127647,4398
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255863,297034,entity_mention_le_matin_12148-bpt6k571055k_1203,"inf . ,",418,423,"RARFUMERIE (Mat. prem. pr.). E. Nadal, 25, r.\...",19° Cie. Sans nouvelles depuis le 23 août.\na...,le_matin_12148-bpt6k571055k_article_694,le_matin_12148-bpt6k571055k,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-10-25,fr,,"مطار عين قزام, الطريق العابر للصحراء, In Guezz...",POINT (5.750431435930796 19.56183375),19.561834,5.750431,30
255864,297035,entity_mention_le_matin_12148-bpt6k5710601_914,"inf . ,",547,552,"EROY L. 22, rue des Martyrs, Paris, actuelleme...","9e t. sans nouvelles\nL'dep. 7 sept, où il se...",le_matin_12148-bpt6k5710601_article_700,le_matin_12148-bpt6k5710601,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-10-30,fr,https://www.wikidata.org/wiki/Q1505910,"مطار عين قزام, الطريق العابر للصحراء, In Guezz...",POINT (5.750431435930796 19.56183375),19.561834,5.750431,30
255865,297036,entity_mention_le_matin_12148-bpt6k5710983_1083,"inf . ,",135,140,"IVEULF, capitaine au 9e bat. de chas, blessé à...","ambul.,\nrentrant d'Allemagne dire si interné...",le_matin_12148-bpt6k5710983_article_683,le_matin_12148-bpt6k5710983,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-12-07,fr,https://www.wikidata.org/wiki/Q1505910,"مطار عين قزام, الطريق العابر للصحراء, In Guezz...",POINT (5.750431435930796 19.56183375),19.561834,5.750431,30
255866,297037,entity_mention_le_matin_12148-bpt6k571055k_853,"Côte d ' Azur ,",39,51,A UTO ayant 2 clients désirerait 3° pr,,le_matin_12148-bpt6k571055k_article_829,le_matin_12148-bpt6k571055k,https://platform.newseye.eu/de/catalog/le_mati...,Le Matin,1914-10-25,fr,,"Cote D azur, Wise County, Texas, United States",POINT (-97.8956719 33.2444875),33.244487,-97.895672,62


In [27]:
with engine.connect() as con:

    q = 'SELECT DISTINCT "date" FROM entities;'
    dates = [x[0] for x in execute_query(q, con).all()]
    dates.sort()
    print(dates)
    
    q = 'SELECT DISTINCT "Duration" FROM battles;'
    durations = [x[0] for x in execute_query(q, con).all()]
    durations.sort()
    
    q = 'SELECT DISTINCT "Notes" FROM battles;'
    fronts = [x[0] for x in execute_query(q, con).all()]
    
    return {
        "dates": dates,
        "durations": durations,
        "fronts": fronts
    }
    
    
    # TODO: IL MANQUE MIN MAX FREQ
    
    


['1913-01-01', '1913-01-02', '1913-01-03', '1913-01-04', '1913-01-05', '1913-01-06', '1913-01-07', '1913-01-08', '1913-01-09', '1913-01-10', '1913-01-11', '1913-01-12', '1913-01-13', '1913-01-14', '1913-01-15', '1913-01-16', '1913-01-17', '1913-01-18', '1913-01-19', '1913-01-20', '1913-01-21', '1913-01-22', '1913-01-23', '1913-01-24', '1913-01-25', '1913-01-26', '1913-01-27', '1913-01-28', '1913-01-29', '1913-01-30', '1913-01-31', '1913-02-01', '1913-02-02', '1913-02-03', '1913-02-04', '1913-02-05', '1913-02-06', '1913-02-07', '1913-02-08', '1913-02-09', '1913-02-10', '1913-02-11', '1913-02-12', '1913-02-13', '1913-02-14', '1913-02-15', '1913-02-16', '1913-02-17', '1913-02-18', '1913-02-19', '1913-02-20', '1913-02-21', '1913-02-22', '1913-02-23', '1913-02-24', '1913-02-25', '1913-02-26', '1913-02-27', '1913-02-28', '1913-03-01', '1913-03-02', '1913-03-03', '1913-03-04', '1913-03-05', '1913-03-06', '1913-03-07', '1913-03-08', '1913-03-09', '1913-03-10', '1913-03-11', '1913-03-12', '1913

In [14]:
t

Unnamed: 0,id,subject,label,coordinates,LOClabel,location,country,displaydate,displaystart,displayend,Duration,Notes
0,0,http://www.wikidata.org/entity/Q5447484,Fifth Battle of Ypres,Point(3.02111 50.9003),Belgium,http://www.wikidata.org/entity/Q31,Belgium,1918/10/2,1918/9/28,1918/10/2,4,WestFront
1,17,http://www.wikidata.org/entity/Q233219,Second Battle of Ypres,Point(2.940555555 50.899444444),Ypres,http://www.wikidata.org/entity/Q102728,Belgium,1918/1/1,1918/8/8,1918/8/12,4,WestFront
2,34,http://www.wikidata.org/entity/Q1527262,Battle of the Ailette,Point((49.58112 3.167575)),Ailette,http://www.wikidata.org/entity/Q405073,France,1918/8/30,1918/8/26,1918/8/30,4,WestFront
3,42,http://www.wikidata.org/entity/Q129117,Battle of Kitcheners' Wood,Point(2.922 50.89),,,Belgium,1918/1/1,1918/7/18,1918/7/22,4,WestFront
4,77,http://www.wikidata.org/entity/Q667519,Battle of Gnila Lipa,Point(24.7546 49.1206),Hnyla Lypa,http://www.wikidata.org/entity/Q1037150,Ukraine,1914/8/30,1914/8/26,1914/8/30,4,EastFront
5,78,http://www.wikidata.org/entity/Q667519,Battle of Gnila Lipa,Point(24.68 49.695),Hnyla Lypa,http://www.wikidata.org/entity/Q1037150,Ukraine,1914/8/30,1914/8/26,1914/8/30,4,EastFront
6,89,http://www.wikidata.org/entity/Q934807,Seventh Battle of the Isonzo,Point(13.616 45.933),Soča,http://www.wikidata.org/entity/Q202760,Italy,1916/9/18,1916/9/14,1916/9/18,4,ItalianFront
7,90,http://www.wikidata.org/entity/Q955519,Ninth Battle of the Isonzo,Point(13.63805556 45.90972222),Soča,http://www.wikidata.org/entity/Q202760,Italy,1916/11/4,1916/10/31,1916/11/4,4,ItalianFront
8,118,http://www.wikidata.org/entity/Q4871450,Battle of Kisaki,Point(37.60138889 -7.48611111),"Kisaki, Tanzania",http://www.wikidata.org/entity/Q1000024,Tanzania,,1916/9/7,1916/9/11,4,WW1
9,126,http://www.wikidata.org/entity/Q2890469,Battle of Multien,Point(2.8783 49.0378),Seine-et-Marne,http://www.wikidata.org/entity/Q12753,France,,1914/9/5,1914/9/9,4,WW1


In [57]:
import json

In [58]:
filtered_borders = json.load(open('data/borders/countryborders.geojson'))

In [60]:
bordersdf = pd.read_csv('data/borders/countryborders.csv')


In [61]:
bordersdf

Unnamed: 0.1,Unnamed: 0,cntry_name,area,capname,caplong,caplat,gwcode,gwsdate,gwsyear,gwsmonth,gwsday,gwedate,gweyear,gwemonth,gweday
0,0,Aden,46416.1,Aden,45.03330,12.80000,681,31.03.1937 23:00:00,1937,4,1,02.04.1962 23:00:00,1962,4,3
1,1,Afghanistan,808993.0,Kabul,69.18330,34.51670,700,31.12.1885 23:00:00,1886,1,1,29.12.1888 23:00:00,1888,12,30
2,2,Alaska,1506240.0,Juneau,-134.41200,58.30430,3,31.12.1885 23:00:00,1886,1,1,01.01.1959 23:00:00,1959,1,2
3,3,Albania,28656.5,Vlore,19.49820,40.47740,339,31.12.1912 23:00:00,1913,1,1,05.03.1914 23:00:00,1914,3,6
4,4,Algeria,655481.0,Algiers,3.05056,36.76310,615,31.12.1885 23:00:00,1886,1,1,05.02.1901 23:00:00,1901,2,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247,247,"Yemen, People's Republic of",287228.0,Aden,45.03330,12.80000,680,29.11.1967 23:00:00,1967,11,30,20.05.1990 22:00:00,1990,5,21
248,248,Yugoslavia,157994.0,Belgrade,20.46810,44.81860,345,30.11.1918 23:00:00,1918,12,1,08.09.1919 23:00:00,1919,9,9
249,249,Zambia,751927.0,Lusaka,28.28330,-15.41670,551,17.08.1911 23:00:00,1911,8,18,30.07.1953 23:00:00,1953,7,31
250,250,Zanzibar,2591.2,Zanzibar City,39.19560,-6.16698,511,30.06.1890 23:00:00,1890,7,1,29.06.1895 23:00:00,1895,6,30


In [66]:
len(filtered_borders['features'])

186