Skip to content

Commit

Permalink
Merge pull request #913 from atalyaalon/dev
Browse files Browse the repository at this point in the history
Add new accidents around schools script and schools table
  • Loading branch information
atalyaalon committed Jul 26, 2018
2 parents aabfa32 + b82a9c2 commit 91db07a
Show file tree
Hide file tree
Showing 12 changed files with 3,667 additions and 9 deletions.
54 changes: 54 additions & 0 deletions alembic/versions/5ac16eaf11a_schools_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""schools_table
Revision ID: 5ac16eaf11a
Revises: 3680a8998648
Create Date: 2018-07-21 18:40:32.562699
"""

# revision identifiers, used by Alembic.
revision = '5ac16eaf11a'
down_revision = '3680a8998648'
branch_labels = None
depends_on = None

from alembic import op
import sqlalchemy as sa
import geoalchemy2


def upgrade():
### commands auto generated by Alembic - please adjust! ###
op.create_table('schools',
sa.Column('id', sa.BigInteger(), nullable=False),
sa.Column('fcode_type', sa.Integer(), nullable=True),
sa.Column('yishuv_symbol', sa.Integer(), nullable=True),
sa.Column('yishuv_name', sa.Text(), nullable=True),
sa.Column('school_name', sa.Text(), nullable=True),
sa.Column('school_latin_name', sa.Text(), nullable=True),
sa.Column('usg', sa.Integer(), nullable=True),
sa.Column('usg_code', sa.Integer(), nullable=True),
sa.Column('e_ord', sa.Float(), nullable=True),
sa.Column('n_ord', sa.Float(), nullable=True),
sa.Column('longitude', sa.Float(), nullable=True),
sa.Column('latitude', sa.Float(), nullable=True),
sa.Column('geom', geoalchemy2.types.Geometry(geometry_type='POINT', srid=4326), nullable=True),
sa.Column('data_year', sa.Integer(), nullable=True),
sa.Column('prdct_ver', sa.DateTime(), nullable=True),
sa.Column('x', sa.Float(), nullable=True),
sa.Column('y', sa.Float(), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_schools_geom'), 'schools', ['geom'], unique=False)
op.create_index(op.f('ix_schools_id'), 'schools', ['id'], unique=False)
op.create_index(op.f('ix_schools_yishuv_symbol'), 'schools', ['yishuv_symbol'], unique=False)
### end Alembic commands ###


def downgrade():
### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_schools_yishuv_symbol'), table_name='schools')
op.drop_index(op.f('ix_schools_id'), table_name='schools')
op.drop_index(op.f('ix_schools_geom'), table_name='schools')
op.drop_table('schools')
### end Alembic commands ###
129 changes: 129 additions & 0 deletions anyway/accidents_around_schools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import sqlalchemy as sa
from sqlalchemy.orm.query import Query, aliased
from sqlalchemy.dialects import postgresql
from sqlalchemy import desc, or_, join, select
from flask_sqlalchemy import SQLAlchemy
import argparse
import io
import math
import requests
from datetime import datetime
from utilities import init_flask
from models import AccidentMarker, Involved, School
from constants import CONST
import pandas as pd
import os
from time import strftime
from datetime import datetime

SUBTYPE_ACCIDENT_WITH_PEDESTRIAN = 1
LOCATION_ACCURACY_PRECISE = True
LOCATION_ACCURACY_PRECISE_INT = 1
INJURED_TYPE_PEDESTRIAN = 1
YISHUV_SYMBOL_NOT_EXIST = -1
CONTENT_ENCODING = 'utf-8'
ANYWAY_UI_FORMAT = "https://www.anyway.co.il/?zoom=17&start_date={start_date}&end_date={end_date}&lat={latitude}&lon={longitude}&show_fatal=1&show_severe=1&show_light=1&approx={location_approx}&accurate={location_accurate}&show_markers=1&show_discussions=&show_urban=3&show_intersection=3&show_lane=3&show_day=7&show_holiday=0&show_time=24&start_time=25&end_time=25&weather=0&road=0&separation=0&surface=0&acctype={acc_type}&controlmeasure=0&district=0&case_type=0"
DATE_INPUT_FORMAT = '%d-%m-%Y'
DATE_URL_FORMAT = '%Y-%m-%d'


app = init_flask()
db = SQLAlchemy(app)

def get_bounding_box(latitude, longitude, distance_in_km):

latitude = math.radians(latitude)
longitude = math.radians(longitude)

radius = 6371
# Radius of the parallel at given latitude
parallel_radius = radius*math.cos(latitude)

lat_min = latitude - distance_in_km/radius
lat_max = latitude + distance_in_km/radius
lon_min = longitude - distance_in_km/parallel_radius
lon_max = longitude + distance_in_km/parallel_radius
rad2deg = math.degrees

return rad2deg(lat_min), rad2deg(lon_min), rad2deg(lat_max), rad2deg(lon_max)

def acc_inv_query(longitude, latitude, distance, start_date, end_date, school):
lat_min, lon_min, lat_max, lon_max = get_bounding_box(latitude, longitude, distance)
baseX = lon_min;
baseY = lat_min;
distanceX = lon_max;
distanceY = lat_max;
pol_str = 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'.format(baseX,
baseY,
distanceX,
distanceY)

query_obj = db.session.query(Involved, AccidentMarker) \
.join(AccidentMarker, AccidentMarker.provider_and_id == Involved.provider_and_id) \
.filter(AccidentMarker.geom.intersects(pol_str)) \
.filter(Involved.injured_type == INJURED_TYPE_PEDESTRIAN) \
.filter(AccidentMarker.provider_and_id == Involved.provider_and_id) \
.filter(or_((AccidentMarker.provider_code == CONST.CBS_ACCIDENT_TYPE_1_CODE), (AccidentMarker.provider_code == CONST.CBS_ACCIDENT_TYPE_3_CODE))) \
.filter(AccidentMarker.created >= start_date) \
.filter(AccidentMarker.created < end_date) \
.filter(AccidentMarker.locationAccuracy == LOCATION_ACCURACY_PRECISE_INT) \
.filter(AccidentMarker.yishuv_symbol != YISHUV_SYMBOL_NOT_EXIST)

df = pd.read_sql_query(query_obj.with_labels().statement, query_obj.session.bind)

if LOCATION_ACCURACY_PRECISE:
location_accurate = 1
location_approx = 0
else:
location_accurate = 1
location_approx = 1
ui_url = ANYWAY_UI_FORMAT.format(latitude=school['latitude'],
longitude=school['longitude'],
start_date=start_date.strftime(DATE_URL_FORMAT),
end_date=end_date.strftime(DATE_URL_FORMAT),
acc_type=SUBTYPE_ACCIDENT_WITH_PEDESTRIAN,
location_accurate=location_accurate,
location_approx=location_approx)
df['anyway_link'] = ui_url
df['school_id'] = school['id']
df['school_name'] = school['school_name']
df['school_yishuv_symbol'] = school['yishuv_symbol']
df['school_yishuv_name'] = school['yishuv_name']
df['school_longitude'] = school['longitude']
df['school_latitude'] = school['latitude']
return df


def main(start_date, end_date, distance, output_path):
schools_query = sa.select([School])
df_schools = pd.read_sql_query(schools_query, db.session.bind)
df_total = pd.DataFrame()
for idx, school in df_schools.iterrows():
df_total = pd.concat([df_total,
acc_inv_query(longitude=school['longitude'],
latitude=school['latitude'],
distance=distance,
start_date=start_date,
end_date=end_date,
school=school)],
axis=0)
df_total.to_csv(os.path.join(output_path,'df_total.csv'), encoding=CONTENT_ENCODING)

df_total_involved_count = (df_total.groupby(['school_id', 'school_name', 'anyway_link', 'school_longitude', 'school_latitude', 'school_yishuv_symbol', 'school_yishuv_name'])
.size()
.reset_index(name='injured_count')
.sort_values('injured_count', ascending=False))
df_total_involved_count.to_csv(os.path.join(output_path,'df_total_involved_count.csv'), encoding=CONTENT_ENCODING, header=True)

df_total_involved_by_injury = (df_total.groupby(['school_id', 'school_name', 'anyway_link', 'school_longitude', 'school_latitude', 'school_yishuv_symbol', 'school_yishuv_name','involved_injury_severity'])
.size()
.reset_index(name='injured_count')
.sort_values('injured_count', ascending=False))
df_total_involved_by_injury.to_csv(os.path.join(output_path,'df_total_involved_by_injury.csv'), encoding=CONTENT_ENCODING, header=True)

df_total_accident_count = (df_total.drop_duplicates(['school_id', 'school_name', 'anyway_link', 'school_longitude', 'school_latitude', 'school_yishuv_symbol', 'school_yishuv_name','provider_and_id'])
.groupby(['school_id', 'school_name', 'school_yishuv_symbol', 'school_yishuv_name', 'markers_severity'])
.size()
.reset_index(name='accidents_count')
.sort_values('accidents_count', ascending=False))
df_total_accident_count.to_csv(os.path.join(output_path,'df_total_accident_count.csv'), encoding=CONTENT_ENCODING, header=True)
21 changes: 21 additions & 0 deletions anyway/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,3 +878,24 @@ class VehicleNoLocation(Base):
Index('accident_id_idx_vehicles_no_location', 'accident_id'),
Index('provider_and_id_idx_vehicles_no_location', 'provider_and_id', unique=False),
{})


class School(Base):
__tablename__ = "schools"
id = Column(BigInteger(), primary_key=True, index=True)
fcode_type = Column(Integer(), nullable=True)
yishuv_symbol = Column(Integer(), nullable=True, index=True)
yishuv_name = Column(Text(), nullable=True)
school_name = Column(Text(), nullable=True)
school_latin_name = Column(Text(), nullable=True)
usg = Column(Integer(), nullable=True)
usg_code = Column(Integer(), nullable=True)
e_ord = Column(Float(), nullable=True)
n_ord = Column(Float(), nullable=True)
longitude = Column(Float(), nullable=True)
latitude = Column(Float(), nullable=True)
geom = Column(Geometry('POINT', srid=4326), index=True)
data_year = Column(Integer(), nullable=True)
prdct_ver = Column(DateTime, default=None)
x = Column(Float(), nullable=True)
y = Column(Float(), nullable=True)
9 changes: 1 addition & 8 deletions anyway/parsers/cbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..models import AccidentMarker, Involved, Vehicle, AccidentsNoLocation, InvolvedNoLocation, VehicleNoLocation
from .. import models
from ..constants import CONST
from ..utilities import ItmToWGS84, init_flask, CsvReader, time_delta, decode_hebrew,ImporterUI,truncate_tables
from ..utilities import ItmToWGS84, init_flask, CsvReader, time_delta, decode_hebrew,ImporterUI,truncate_tables,chunks
from functools import partial
import logging

Expand Down Expand Up @@ -384,13 +384,6 @@ def get_files(directory):
elif name in (ACCIDENTS, INVOLVED, VEHICLES):
yield name, csv

def chunks(l, n):
"""Yield successive n-sized chunks from l."""
try: xrange
except NameError:
xrange = range
for i in xrange(0, len(l), n):
yield l[i:i + n]


def import_to_datastore(directory, provider_code, batch_size):
Expand Down
73 changes: 73 additions & 0 deletions anyway/parsers/schools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from .. import school_fields
import geoalchemy2.functions as func
import logging
from datetime import datetime
from ..utilities import init_flask, CsvReader, time_delta, chunks
from flask_sqlalchemy import SQLAlchemy
from ..models import School
import pandas as pd

app = init_flask()
db = SQLAlchemy(app)

def get_data_value(value):
"""
:returns: value for parameters which are not mandatory in an accident data
OR -1 if the parameter value does not exist
"""
return int(value) if value else -1

def get_schools(filepath):
logging.info("\tReading schools data from '%s'..." % filepath)
schools = []
df = pd.read_csv(filepath)
for idx, school in df.iterrows():
longitude, latitude = float(school[school_fields.longitude]),float(school[school_fields.latitude]),
point_str = 'SRID=4326;POINT({0} {1})'.format(longitude, latitude)
school = {
"id": int(school[school_fields.id]),
"fcode_type": int(school[school_fields.fcode_type]),
"yishuv_symbol": int(school[school_fields.yishuv_symbol]),
"yishuv_name": school[school_fields.yishuv_name],
"school_name": school[school_fields.school_name],
"school_latin_name ": school[school_fields.school_latin_name],
"usg": int(school[school_fields.usg]),
"usg_code": int(school[school_fields.usg_code]),
"e_ord": float(school[school_fields.e_ord]),
"n_ord": float(school[school_fields.n_ord]),
"longitude": longitude,
"latitude": latitude,
"geom": point_str,
"data_year": get_data_value(school[school_fields.data_year]),
"prdct_ver": None,
"x": float(school[school_fields.x]),
"y": float(school[school_fields.y]),
}
schools.append(school)

return schools

def import_to_datastore(filepath, batch_size):
try:
assert batch_size > 0
started = datetime.now()
schools = get_schools(filepath)
new_items = 0
all_existing_schools_ids = set(map(lambda x: x[0],
db.session.query(School.id).all()))
schools = [school for school in schools if school['id'] not in all_existing_schools_ids]
logging.info('inserting ' + str(len(schools)) + ' new schools')
for schools_chunk in chunks(schools, batch_size):
db.session.bulk_insert_mappings(School, schools_chunk)
db.session.commit()
new_items += len(schools)
logging.info("\t{0} items in {1}".format(new_items, time_delta(started)))
return new_items
except ValueError as e:
failed_dirs[directory] = str(e)
return 0

def parse(filepath, batch_size):
started = datetime.now()
total = import_to_datastore(filepath, batch_size)
logging.info("Total: {0} schools in {1}".format(total, time_delta(started)))
16 changes: 16 additions & 0 deletions anyway/school_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
id = "UNIQ_ID"
fcode_type = "FCODE_TYPE"
yishuv_symbol = "SETL_CODE"
yishuv_name = "SETL_NAME"
school_name = "NAME"
school_latin_name = "LATIN_NAME"
usg = "USG_GROUP"
usg_code = "USG_CODE"
e_ord = "E_ORD"
n_ord = "N_ORD"
longitude = "LON"
latitude = "LAT"
data_year = "DATA_YEAR"
prdct_ver = "PRDCT_VER"
x = "X"
y = "Y"
File renamed without changes.
File renamed without changes.
File renamed without changes.
9 changes: 9 additions & 0 deletions anyway/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,12 @@ def is_delete_all(self):
if confirm_delete_all.lower() == 'n':
self._delete_all = False
return self._delete_all


def chunks(l, n):
"""Yield successive n-sized chunks from l."""
try: xrange
except NameError:
xrange = range
for i in xrange(0, len(l), n):
yield l[i:i + n]
35 changes: 34 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,16 @@ def united(light, username, password, lastmail):
@click.argument("filename")
def rsa(filename):
from anyway.parsers.rsa import parse

return parse(filename)

@process.command()
@click.argument("filepath")
@click.option('--batch_size', type=int, default=5000)
def schools(filepath, batch_size):
from anyway.parsers.schools import parse
return parse(filepath=filepath,
batch_size=batch_size)


@cli.command()
@click.argument('identifiers', nargs=-1)
Expand Down Expand Up @@ -117,6 +124,32 @@ def load_discussions(identifiers):
db.session.rollback()
logging.warn("Failed: " + identifier + ": " + e)

@cli.group()
def scripts():
pass

def valid_date(date_string):
DATE_INPUT_FORMAT = '%d-%m-%Y'
from datetime import datetime
try:
return datetime.strptime(date_string, DATE_INPUT_FORMAT)
except ValueError:
msg = "Not a valid date: '{0}'.".format(date_string)
raise argparse.ArgumentTypeError(msg)


@scripts.command()
@click.option('--start_date', default='01-01-2013', type=valid_date, help='The Start Date - format DD-MM-YYYY')
@click.option('--end_date', default='31-12-2017', type=valid_date, help='The End Date - format DD-MM-YYYY')
@click.option('--distance', default=0.5, help='float In KM. Default is 0.5 (500m)', type=float)
@click.option('--output_path', default='output', help='output file of the results. Default is output.csv')
def accidents_around_schools(start_date, end_date, distance, output_path):
from anyway.accidents_around_schools import main
return main(start_date=start_date,
end_date=end_date,
distance=distance,
output_path=output_path)


if __name__ == '__main__':
cli(sys.argv[1:]) # pylint: disable=too-many-function-args
Loading

0 comments on commit 91db07a

Please sign in to comment.