In [1]:
import geopandas as gpd
import pandas as pd
import requests
import gzip
import glob
import os
import json

In [2]:
PARENT_PATH = '.\\aups\\'

# Open and check Tar file

In [3]:
tar_list = glob.glob(PARENT_PATH+'*.gz')

In [4]:
def untar_file(tar_file) :
    '''
    Untar file with based on filepath. Return a Json file
    '''
    print(tar_file)
    with gzip.open(tar_file) as f :
        filename = tar_file.split('\\')[-1].replace('.gz','')
        filepath = os.path.join(PARENT_PATH, filename)
        content = f.readlines()
        content = [line.decode('utf8') for line in content]
        #df = gpd.read_file(content)
        with open(filepath,'w') as w :
            w.writelines(content)
    return content

In [10]:
def load_geopandas(json_file) :
    '''
    Load json file. Return geopandas dataframe 
    '''
    with open(json_file) as f : 
        data = json.load(f)
        gdf = gpd.GeoDataFrame.from_features(data["features"]) # geopandas dataframe 
    return gdf

In [6]:
for tar_file in tar_list :
    if tar_file.replace('.gz', '') not in glob.glob(PARENT_PATH+'*.json') :
        r = untar_file(tar_file)

In [7]:
couches = ['parcelle',
           'tsurf'
          ]

In [8]:
json_list = glob.glob(PARENT_PATH+'*.json')
json_list

['.\\aups\\pci-83-parcelle.json', '.\\aups\\pci-83-tsurf.json']

In [11]:
parcelle = load_geopandas(json_list[0])
tsurf = load_geopandas(json_list[1])

In [18]:
tsurf = tsurf[tsurf['SYM'] == '65'] # only pool 
tsurf

Unnamed: 0,geometry,SYM,DATE_OBS,DATE_MAJ,TEX
3,"POLYGON ((6.84958 43.52740, 6.84957 43.52737, ...",65,2016-03-07,2016-03-18,
4,"POLYGON ((6.85265 43.52664, 6.85268 43.52660, ...",65,2016-03-07,2016-03-18,
5,"POLYGON ((6.85323 43.52720, 6.85320 43.52722, ...",65,2016-03-07,2016-03-18,
6,"POLYGON ((6.85334 43.52655, 6.85333 43.52656, ...",65,2016-03-07,2016-03-18,
7,"POLYGON ((6.85505 43.52688, 6.85503 43.52690, ...",65,2016-03-07,2016-03-18,
...,...,...,...,...,...
130892,"POLYGON ((5.93899 43.13797, 5.93900 43.13794, ...",65,2008-07-29,2014-02-07,
130893,"POLYGON ((5.93901 43.13798, 5.93902 43.13794, ...",65,2008-07-29,2014-02-07,
130894,"POLYGON ((5.94354 43.13874, 5.94355 43.13867, ...",65,2008-07-29,2014-02-07,
130895,"POLYGON ((5.94206 43.13858, 5.94207 43.13855, ...",65,2008-07-29,2014-02-07,


In [16]:
inner_join_df = tsurf.sjoin(parcelle, how = 'left')
inner_join_df.head()


Unnamed: 0,geometry,SYM,DATE_OBS_left,DATE_MAJ_left,TEX_left,index_right,INDP,SUPF,TEX_right,IDU,DATE_OBS_right,DATE_MAJ_right,COAR
3,"POLYGON ((6.84958 43.52740, 6.84957 43.52737, ...",65,2016-03-07,2016-03-18,,3152.0,1,1542.0,1909,0010000B1909,2016-03-07,2021-11-30,
4,"POLYGON ((6.85265 43.52664, 6.85268 43.52660, ...",65,2016-03-07,2016-03-18,,3319.0,1,1500.0,1985,0010000B1985,2016-03-07,2021-11-30,A
5,"POLYGON ((6.85323 43.52720, 6.85320 43.52722, ...",65,2016-03-07,2016-03-18,,3161.0,1,149.0,200,0010000B0200,2016-03-07,2021-11-30,
5,"POLYGON ((6.85323 43.52720, 6.85320 43.52722, ...",65,2016-03-07,2016-03-18,,2262.0,1,3116.0,204,0010000B0204,2016-03-07,2021-11-30,
6,"POLYGON ((6.85334 43.52655, 6.85333 43.52656, ...",65,2016-03-07,2016-03-18,,3163.0,1,2000.0,203,0010000B0203,2016-03-07,2021-11-30,


In [17]:
inner_join_df

Unnamed: 0,geometry,SYM,DATE_OBS_left,DATE_MAJ_left,TEX_left,index_right,INDP,SUPF,TEX_right,IDU,DATE_OBS_right,DATE_MAJ_right,COAR
3,"POLYGON ((6.84958 43.52740, 6.84957 43.52737, ...",65,2016-03-07,2016-03-18,,3152.0,01,1542.0,1909,0010000B1909,2016-03-07,2021-11-30,
4,"POLYGON ((6.85265 43.52664, 6.85268 43.52660, ...",65,2016-03-07,2016-03-18,,3319.0,01,1500.0,1985,0010000B1985,2016-03-07,2021-11-30,A
5,"POLYGON ((6.85323 43.52720, 6.85320 43.52722, ...",65,2016-03-07,2016-03-18,,3161.0,01,149.0,200,0010000B0200,2016-03-07,2021-11-30,
5,"POLYGON ((6.85323 43.52720, 6.85320 43.52722, ...",65,2016-03-07,2016-03-18,,2262.0,01,3116.0,204,0010000B0204,2016-03-07,2021-11-30,
6,"POLYGON ((6.85334 43.52655, 6.85333 43.52656, ...",65,2016-03-07,2016-03-18,,3163.0,01,2000.0,203,0010000B0203,2016-03-07,2021-11-30,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
130893,"POLYGON ((5.93901 43.13798, 5.93902 43.13794, ...",65,2008-07-29,2014-02-07,,923303.0,01,400.0,175,137000EY0175,2008-02-23,2014-02-07,
130894,"POLYGON ((5.94354 43.13874, 5.94355 43.13867, ...",65,2008-07-29,2014-02-07,,923323.0,01,2240.0,111,137000EY0111,2008-02-23,2014-02-07,
130895,"POLYGON ((5.94206 43.13858, 5.94207 43.13855, ...",65,2008-07-29,2014-02-07,,923316.0,01,2330.0,109,137000EY0109,2008-02-23,2014-02-07,
130900,"POLYGON ((6.39028 43.41169, 6.39023 43.41170, ...",65,2009-10-22,2018-10-22,,909410.0,01,32.0,29,148000BY0029,2007-01-25,2018-10-22,
