In [1]:
import pandas as pd
import requests
import requests_cache
from tqdm import tqdm_notebook
import io
requests_cache.install_cache("geocode",expire_after=10000000)

In [2]:
tqdm_notebook().pandas()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

In [4]:
df = pd.read_excel("../jdd/ITV SDIS77 2019.xlsx")

In [5]:
#!pip install ../../cisu/dist/cisu_python-0.1-py3-none-any.whl

In [6]:
df = df[df["DEPT_NUM"].notnull() & df["COD_INSEE"].notnull()]
df["CODE_INSEE_FULL"] = df.DEPT_NUM.astype(int).astype(str) + df.COD_INSEE.astype(int).astype(str).str.zfill(3)
df["ADDRESS"] = df["Adresse complète (adresse volontairement randomisée par rapport aux natures d'ITV)"]

In [7]:
def geo_code_address(address, city_code):
    params = (
        ('q', address),
        ('citycode', city_code),
    )

    response = requests.get('https://api-adresse.data.gouv.fr/search/', params=params)
    data = response.json()
    try:
        properties = data["features"][0]["properties"]
        properties["coordinates"] = data["features"][0]["geometry"]["coordinates"]
        return properties
    except:
        return {}

In [8]:
df_to_geocode = df[["ADDRESS", "CODE_INSEE_FULL"]]

In [9]:
df_to_geocode.to_csv("api_gouv_to_geocode.csv", index=False)

In [10]:
def geocode_csv(path):
    files = {
        'data': (path, open(path, 'rb')),

    }
    data= {
        'columns': ('ADDRESS'),
        'city_code':("CODE_INSEE_FULL")
    }

    response = requests.post('https://api-adresse.data.gouv.fr/search/csv/', files=files, data=data)
    return response


In [11]:
response = geocode_csv("api_gouv_to_geocode.csv")

In [12]:
df_geocoded = pd.read_csv(io.StringIO(response.content.decode('utf8')))
df_geocoded.index=df.index

  interactivity=interactivity, compiler=compiler, result=result)


In [13]:
df_final = df.join(df_geocoded.drop(['CODE_INSEE_FULL', 'ADDRESS'], axis=1)).dropna(subset=["latitude"])

# Mapping 

In [14]:
df_mapping = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vRtWEP1srjMDRO__rOx_2vj33-Lw5874G44PxSSmzxK5VHvhXzUfcxBr2s5uXiU7kNwdN3JEBsW2tZR/pub?gid=0&single=true&output=csv")

In [15]:
df_final_mapped = df_final.merge(df_mapping[['Code SDIS', "Nature de faits","Pathologie"]],
                                     left_on='LIBELLE SINISTRE', 
                                     right_on='Code SDIS')

In [16]:
df_final_mapped.groupby(df_final_mapped["DAT_DEB"].dt.month).count().plot(kind="bar")

<matplotlib.axes._subplots.AxesSubplot at 0x1210aab00>

In [17]:
df_final_mapped.to_csv("df_final_mapped.csv", index=False)

In [45]:
df_final_mapped["DAT_DEB"].dt.month.value_counts()

1     9260
6     8847
12    8687
7     8460
2     8236
3     8140
10    8129
9     8102
4     7921
11    7843
5     7405
8     7162
Name: DAT_DEB, dtype: int64

# Build EdxlMessage from infos

In [18]:
records = df_final_mapped.to_dict(orient="records")

In [19]:
from cisu.factories.edxl_factory import EdxlMessageFactory
from cisu.entities.commons.common_alerts import WhatsHappen, HealthMotive, MainVictim, LocationKind, RiskThreat
from cisu.entities.commons.severity import Severity
from cisu.constants.constants import WhatsHappenConstants, LocationKindConstants, RiskThreatConstants, HealthMotiveConstants

In [20]:
health_motive_constant = HealthMotiveConstants()
whats_happen_constants = WhatsHappenConstants()

In [21]:
def build_edxl_from_record(record):
    try:
        return EdxlMessageFactory.build_ack_from_simple_infos(
            created_at=record["DAT_DEB"].to_pydatetime(),
            lat=record["latitude"],
            lon=record["longitude"],
            address=record["ADDRESS"],
            severity=Severity.UNKNOWN,
            whatsHappen=whats_happen_constants.get_by_code(code=record['Nature de faits']),
            victims=MainVictim.ADULT,
            locationKind=None,
            healthMotive=health_motive_constant.get_by_code(code=record["Pathologie"]),
            riskThreat=None
        )
    except:
        pass

In [25]:
import uuid
from tqdm import tqdm 
import json
import sys

In [26]:
sys.path.append("../src/")

In [27]:
INDEX_NAME = "affairs"

In [28]:
from domain.affairs.cisu.entities.commons import DateType
from domain.affairs.cisu.entities.commons.cisu_enum import CisuEnum
from domain.affairs.cisu.entities.commons.common_alerts import AttributeType, Victims
from domain.tasks.entities.event_entity import Severity
from domain.tasks.entities.task_entity import TaskType
from datetime import datetime, date
from domain.affairs.cisu.entities.commons.location_type import LocationShape

class SapeurJsonEncoder(json.JSONEncoder):
    def default(self, obj):
        try:
            if isinstance(obj, datetime):
                return str(obj)
            elif isinstance(obj, date):
                return str(obj)
            elif isinstance(obj, DateType):
                return str(obj)
            elif isinstance(obj, CisuEnum):
                return str(obj)
            elif isinstance(obj, LocationShape):
                return str(obj)
            elif isinstance(obj, Severity):
                return str(obj)
            elif isinstance(obj, TaskType):
                return str(obj)
            elif isinstance(obj, AttributeType):
                return obj.to_dict()
            elif isinstance(obj, Victims):
                return obj.to_dict()
            elif obj is None:
                return None
            return json.JSONEncoder.default(self, obj)
        except TypeError as e:
            return str(obj)

In [29]:
from elasticsearch import Elasticsearch

In [30]:
client = Elasticsearch("localhost:9200", http_auth=("elastic", "changeme"))
client.ping()

True

In [47]:
affair_mapping = {
  "mappings": {
      "properties": {
        "createdAt": {
          "type": "date",
            "format":"yyyy-MM-dd HH:mm:ss"
            
        },
        "eventId": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "eventLocation": {
          "properties": {
            "address": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            },
            "coord": {
              "properties": {
                "lat": {
                  "type": "float"
                },
                "lon": {
                  "type": "float"
                }
              }
            },
            "location": {
              "type": "geo_point"
            },
            "name": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            },
            "type": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            }
          }
        },
        "primaryAlert": {
          "properties": {
            "alertCode": {
              "properties": {
                "healthMotive": {
                  "properties": {
                    "code": {
                      "type": "text",
                      "fields": {
                        "keyword": {
                          "type": "keyword",
                          "ignore_above": 256
                        }
                      }
                    },
                    "label": {
                      "type": "text",
                      "fields": {
                        "keyword": {
                          "type": "keyword",
                          "ignore_above": 256
                        }
                      }
                    }
                  }
                },
                "version": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "victims": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "whatsHappen": {
                  "properties": {
                    "code": {
                      "type": "text",
                      "fields": {
                        "keyword": {
                          "type": "keyword",
                          "ignore_above": 256
                        }
                      }
                    },
                    "label": {
                      "type": "text",
                      "fields": {
                        "keyword": {
                          "type": "keyword",
                          "ignore_above": 256
                        }
                      }
                    }
                  }
                }
              }
            },
            "alertId": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            },
            "alertLocation": {
              "properties": {
                "address": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "coord": {
                  "properties": {
                    "lat": {
                      "type": "float"
                    },
                    "lon": {
                      "type": "float"
                    }
                  }
                },
                "name": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "type": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                }
              }
            },
            "call": {
              "properties": {
                "dialledURI": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "source": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                }
              }
            },
            "callTaker": {
              "properties": {
                "calltakerURI": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "controlRoom": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "organization": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                }
              }
            },
            "caller": {
              "properties": {
                "callbackURI": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "callerInformation": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "callerURI": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                },
                "spokenLanguage": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                }
              }
            },
            "primary": {
              "type": "boolean"
            },
            "receivedAt": {
              "type": "date",
              "format":"yyyy-MM-dd HH:mm:ss"
                
            },
            "reporting": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            }
          }
        },
        "severity": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        }
      }
    },
}

In [48]:
response = client.indices.create(
    index=INDEX_NAME,
    body=affair_mapping,
    ignore=400 # ignore 400 already exists code
)

In [49]:
affairs = [edxl.resource.message.choice.to_dict() 
           for edxl in [build_edxl_from_record(record) for record in tqdm(records)] if edxl]


100%|██████████| 98192/98192 [00:46<00:00, 2123.36it/s]


In [50]:
str(affairs[7834]["createdAt"])

'2019-10-28 02:31:04'

In [51]:
for affair in tqdm(affairs):
    lat = affair["eventLocation"]["coord"]["lat"]
    lon = affair["eventLocation"]["coord"]["lon"]
    affair["eventLocation"]["location"] = { 
    "lat": float(lat),
    "lon": float(lon)
  }
    

100%|██████████| 98192/98192 [00:00<00:00, 208702.42it/s]


In [52]:
from elasticsearch import helpers


In [53]:
actions = [
    {
        "_id" : affair["eventId"],
        "_index": INDEX_NAME,
        "_source": json.loads(json.dumps(affair, cls=SapeurJsonEncoder,))
    }
    for affair in tqdm(affairs)
]

100%|██████████| 98192/98192 [00:17<00:00, 5626.45it/s]


In [54]:
response = helpers.bulk(client, actions)