In [1]:
from mylogger import logger

In [2]:
import json
import pandas
import seaborn
import requests
from tqdm.notebook import tqdm
from itertools import cycle
from matplotlib import pyplot as plt

In [3]:
cycol = cycle('bgrcmk')

In [4]:
def get_data(zipcodes):
    logger.info(msg = "Loading data by zipcode", category="REQUEST")
    output = {}
    for zipcode in tqdm(zipcodes) :
        logger.info(msg = f"Loading data for zipcode {zipcode}", category="REQUEST")
        r = requests.get(f'https://api.cquest.org/dvf?code_postal={zipcode}')
        output[zipcode] =  r
    return output

def parse_data(inputs) :
    output = []
    logger.info(msg = "Json parsing data by zipcode", category="PARSE")
    for zipcode, input_ in tqdm(inputs.items()) :
        logger.info(msg = f"Json parsing data for zipcode {zipcode}", category="PARSE")
        resultats = json.loads(input_.text)
        resultats = resultats['resultats']
        output = output + resultats
    return output

In [5]:
zipcodes = ["130{:02d}".format(i) for i in range(1,17)]

In [None]:
results = get_data(zipcodes)
results = parse_data(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))

In [None]:
print(f"number of rows : {len(results)}")

In [None]:
df = pandas.DataFrame(results)
df = df.drop(["articles_1", "articles_2", "articles_3", 
              "articles_4", "articles_5", "numero_disposition",
              "reference_document", "code_service_ch", "commune", 
              "code_departement", "code_commune", "prefixe_section",
              "section", "numero_plan", "numero_volume", "lot_1", 
              "surface_lot_1", "lot_2", "surface_lot_2", "lot_3",
              "surface_lot_3", "lot_4", "surface_lot_4", "lot_5", 
              "surface_lot_51", "nombre_lots", "code_type_local", 
              "type_local", "identifiant_local", "surface_relle_bati",     
              "nature_culture_speciale", "geom"], 1) 

In [None]:
df["date_mutation"] = pandas.to_datetime(df["date_mutation"], format="%Y-%m-%d")
df["code_postal"] = df["code_postal"].astype("category")
df['day'] = df.date_mutation.dt.day
df['month'] = df.date_mutation.dt.month
df['year'] = df.date_mutation.dt.year
df["valeur_fonciere"] = df["valeur_fonciere"].astype("float")
df = df.sort_values(by="date_mutation")

In [None]:
df.tail(5)

In [None]:
print(f"We have data from {df.date_mutation.min()} to {df.date_mutation.max()}")  

In [None]:
plt.figure(figsize=(20,10))
_ = df["date_mutation"].hist(bins=100, color="green", alpha = 0.4)
plt.grid(False)

In [None]:
plt.figure(figsize=(20,10))
_ = df["code_postal"].value_counts()[zipcodes].plot(kind="bar", color="orange", alpha=0.4)
plt.grid(False)

In [None]:
_ = df.groupby(['year', 'code_postal']).size().unstack().plot.bar()
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.grid(False)

In [None]:
for zipcode in zipcodes:
    df_tmp = df[df["code_postal"] == zipcode]
    df_tmp["valeur_a_unite"] = df_tmp["valeur_fonciere"] / df_tmp["surface_terrain"]
    df_tmp = df_tmp[df_tmp.year == 2019]
    seaborn.relplot(x=df_tmp["date_mutation"], y=df_tmp["valeur_a_unite"])
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    plt.grid(False)
    print(df_tmp[df_tmp["valeur_fonciere"] == df_tmp.valeur_fonciere.max()] )
    del df_tmp

In [None]:
interesting = df[df["nombre_pieces_principales"] == 2]
interesting = interesting[interesting["surface_terrain"].notna()] 
interesting = interesting.drop(["articles_1", "articles_2", "articles_3", 
                                "articles_4", "articles_5", "numero_disposition",
                                "reference_document", "code_service_ch", "commune", 
                                 "code_departement", "code_commune", "prefixe_section",
                                 "section", "numero_plan", "numero_volume", 'lot_1', 
                                 'surface_lot_1', 'lot_2', 'surface_lot_2', 'lot_3',
                                 'surface_lot_3', 'lot_4', 'surface_lot_4', 'lot_5', 'surface_lot_51',
                                 'nombre_lots', 'code_type_local', 'type_local', 'identifiant_local',
                                 'surface_relle_bati', "nature_culture_speciale", "geom", "date_mutation"], 1)
interesting.tail(5)

In [None]:
interesting = interesting[interesting["surface_terrain"] >= 50]
interesting.shape

In [None]:
interesting = interesting[interesting["valeur_fonciere"] != interesting["valeur_fonciere"].max()]

In [None]:
interesting = interesting[interesting["surface_terrain"] <= 80]

In [None]:
interesting = interesting[interesting["year"] == 2019]

In [None]:
interesting.sort_values(by="code_postal", inplace=True)
interesting.tail(25)

In [None]:
interesting.sort_values(by="valeur_fonciere", inplace=True)
interesting.tail(25)