In [34]:
import geopandas as gpd
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup 
import gzip
import glob
import os
from pathlib import Path
import json
import logging
from tqdm import tqdm 

# Open and check Tar file

In [5]:
PARENT_PATH = '.\\raw_data\\' 

In [43]:
url = "https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/geojson/departements/83/raw"

In [13]:
if os.path.exists(PARENT_PATH) == False :
     Path(f"./{PARENT_PATH}").mkdir(parents=True, exist_ok=True)

In [10]:
def untar_file(tar_file) :
    '''
    Untar file based on filepath. Return a Json file
    '''
    print(tar_file)
    with gzip.open(tar_file) as f :
        filename = tar_file.split('\\')[-1].replace('.gz','')
        filepath = os.path.join(PARENT_PATH, filename)
        content = f.readlines()
        content = [line.decode('utf8') for line in content]
        #df = gpd.read_file(content)
        with open(filepath,'w') as w :
            w.writelines(content)
    return content

In [86]:
def load_geopandas(json_file) :
    '''
    Load json file. Return geopandas dataframe 
    '''
    with open(json_file) as f : 
        data = json.load(f)
        gdf = gpd.GeoDataFrame.from_features(data["features"]) # geopandas dataframe 
    return gdf

In [76]:
def download_file(url_file, couche,save_folder = False) :
    '''
    Download and save file from an url. Couche is a keyword for a specific file
    '''
    if save_folder == False :
        save_folder = url_file.split('/')[-2]
        
    with requests.Session() as S :
        r = S.get(url_file, stream = True)
        soup = BeautifulSoup(r.content, 'html.parser')
        for i in soup.findAll('a') :
            if couche in i.text :
                couche = '/'+i.text
                print(couche)
        r = S.get(url_file+couche, stream = True)
    with open(save_folder+couche, 'wb') as f:
        for chunk in r.iter_content():
            f.write(chunk)
            

In [80]:
tar_list = glob.glob(PARENT_PATH+'*.gz')
if len(tar_list) == 0 :
    [download_file(url, couche, PARENT_PATH) for couche in ['tsurf', 'parcelle']]
for tar_file in tar_list :
    if tar_file.replace('.gz', '') not in glob.glob(PARENT_PATH+'*.json') :
        r = untar_file(tar_file)

In [81]:
couches = ['parcelle',
           'tsurf'
          ]

In [84]:
json_list = glob.glob(PARENT_PATH+'*.json')

In [87]:
parcelle = load_geopandas(json_list[0])
tsurf = load_geopandas(json_list[1])

# Prepare Dataset

In [89]:
pool = tsurf[tsurf['SYM'] == '65'] # only pool 

## Joint between parcels and swimming pools

In [12]:
df = pool.sjoin(parcelle, how = 'right', predicate = 'within')

In [13]:
df_with_pool = df[~df['index_left'].isnull()]
df_with_pool = df[df['SUPF'] <= 1000] # Superficie
df_without_pool = df[df['index_left'].isnull()]  
df_without_pool = df[df['SUPF'] <= 10000]


In [14]:
# random_state for reproductability, only 1000 pictures
df_with_pool_sample = df_with_pool.sample(n= 1000, random_state = 6)
df_without_pool_sample = df_without_pool.sample(n= 1000, random_state = 3)

# Google maps API

##### exemple of API call to a specific location

https://maps.googleapis.com/maps/api/staticmap?format=png&size=700x700&zoom=20&maptype=satellite&center=43.52344541523472,6.1504762456711894&key=AIzaSyCshJpLZumLqbStsPdU0BRRqntNHZLFjlU
        

In [77]:
class picture_geometry() :
    '''
    Picture as a class. Use a polygone (with Latitude/longitute coordonates) to center a google maps satellite picture
    with a specified height and width. Zoom should always be 20 for maximum detail.
    '''
    
    def __init__(self,zipcode, havepool, key, _id, polygon, h, w, zoom) :
        self.zipcode = zipcode
        self.key = key
        self.havepool = 1 if havepool == True else 0
        self._id = _id # parcelle id
        self.polygon = polygon
        self.height = h
        self.width = w
        self.zoom = zoom
        
        # --- TO CHANGE 
        self.LOGFILE = './datalog.csv'
        self.API_KEY = 'AIzaSyCshJpLZumLqbStsPdU0BRRqntNHZLFjlU'
        self.BASE_URL = 'https://maps.googleapis.com/maps/api/staticmap?'
        self.form = 'png'
        self.maptype = 'satellite'
        self.border = 'color:0xff0000ff|weight:0|'
        self.filepath = f'{self.havepool}_{self._id}_{self.height}x{self.width}.{self.form}'
        
    def coord_lister(self,geom):
        '''
        Convert polygon into multiple coordinates.
        Return : List of tuples(x,y) coordinates
        '''
        coords = list(geom.exterior.coords)
        return (coords)

    def coordinate_features(self) :
        '''
        convert a list of coordinates into a format accepted by the google maps api. Ex : (6.8499536, 43.5275064) will become 
        43.5275064,6.8499536|
        Return a merged string with all coordinates.
        '''
        coordinates = self.polygon.apply(self.coord_lister)
        coordinates = coordinates.values[0]
        coordinates = [f'{cords[1]},{cords[0]}' for cords in coordinates]
        coordinates_str = "|".join(coordinates)
        self.coordinates_str = coordinates_str
        return coordinates_str
    
    def api_url_generator(self):
        '''
        Merge all necessary parameters to call google maps api 
        https://maps.googleapis.com/maps/api/staticmap?format=png&size=700x700&zoom=20&maptype=satellite&
        path=color:0xff0000ff|weight:2|43.527667,6.849225|43.527667,6.850028|43.5270835,6.850028|43.5270835,6.849225|43.527667,6.849225&key=AIzaSyCshJpLZumLqbStsPdU0BRRqntNHZLFjlU
        '''
        url = f'{self.BASE_URL}format={self.form}&path={self.border}{self.coordinates_str}&size={self.height}x{self.width}&zoom={self.zoom}&maptype={self.maptype}&key={self.API_KEY}'
        #print(url)
        return url
        
    def api_call(self, url) :
        '''
        Context Manager with picture as content 
        '''
        with requests.Session() as S :
            r = S.get(url, stream = True)
        
        return r
            
    def save_picture(self, folder = './') :
        '''
        Save picture in in specific filepath
        '''
        coordinates_str = self.coordinate_features()
        url = self.api_url_generator()
        r = self.api_call(url)
        
        if os.path.exists(folder) == False and folder != './' :
             Path(f"./{folder}").mkdir(parents=True, exist_ok=True)
        
        
        with open(f'{folder}/{self.filepath}', 'wb') as f:
            #print(f'fichier : {self.filepath} // len url {len(url)}')
            for chunk in r.iter_content():
                f.write(chunk)
        
        with open(self.LOGFILE,'+a') as f :
            output = f'{self._id};{self.zipcode};{self.havepool};{self.filepath}\n'
            f.write(output)

In [16]:
for i in tqdm(range(len(df_with_pool_sample))):
    
    AOI = df_with_pool_sample.iloc[[i]]
    _id = AOI['IDU'].values[0]
    poly = AOI['geometry']
    #print(i, ':',_id)
    zipcode = 83
    havepool = True
    h = '700'
    w = '700'
    zoom = '20'
    a = picture_geometry(zipcode, havepool, i, _id, poly, h, w, zoom)
    a.save_picture(folder = './dataset/')


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:03<00:00,  2.47s/it]
