-
Notifications
You must be signed in to change notification settings - Fork 242
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #913 from atalyaalon/dev
Add new accidents around schools script and schools table
- Loading branch information
Showing
12 changed files
with
3,667 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
"""schools_table | ||
Revision ID: 5ac16eaf11a | ||
Revises: 3680a8998648 | ||
Create Date: 2018-07-21 18:40:32.562699 | ||
""" | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = '5ac16eaf11a' | ||
down_revision = '3680a8998648' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
from alembic import op | ||
import sqlalchemy as sa | ||
import geoalchemy2 | ||
|
||
|
||
def upgrade(): | ||
### commands auto generated by Alembic - please adjust! ### | ||
op.create_table('schools', | ||
sa.Column('id', sa.BigInteger(), nullable=False), | ||
sa.Column('fcode_type', sa.Integer(), nullable=True), | ||
sa.Column('yishuv_symbol', sa.Integer(), nullable=True), | ||
sa.Column('yishuv_name', sa.Text(), nullable=True), | ||
sa.Column('school_name', sa.Text(), nullable=True), | ||
sa.Column('school_latin_name', sa.Text(), nullable=True), | ||
sa.Column('usg', sa.Integer(), nullable=True), | ||
sa.Column('usg_code', sa.Integer(), nullable=True), | ||
sa.Column('e_ord', sa.Float(), nullable=True), | ||
sa.Column('n_ord', sa.Float(), nullable=True), | ||
sa.Column('longitude', sa.Float(), nullable=True), | ||
sa.Column('latitude', sa.Float(), nullable=True), | ||
sa.Column('geom', geoalchemy2.types.Geometry(geometry_type='POINT', srid=4326), nullable=True), | ||
sa.Column('data_year', sa.Integer(), nullable=True), | ||
sa.Column('prdct_ver', sa.DateTime(), nullable=True), | ||
sa.Column('x', sa.Float(), nullable=True), | ||
sa.Column('y', sa.Float(), nullable=True), | ||
sa.PrimaryKeyConstraint('id') | ||
) | ||
op.create_index(op.f('ix_schools_geom'), 'schools', ['geom'], unique=False) | ||
op.create_index(op.f('ix_schools_id'), 'schools', ['id'], unique=False) | ||
op.create_index(op.f('ix_schools_yishuv_symbol'), 'schools', ['yishuv_symbol'], unique=False) | ||
### end Alembic commands ### | ||
|
||
|
||
def downgrade(): | ||
### commands auto generated by Alembic - please adjust! ### | ||
op.drop_index(op.f('ix_schools_yishuv_symbol'), table_name='schools') | ||
op.drop_index(op.f('ix_schools_id'), table_name='schools') | ||
op.drop_index(op.f('ix_schools_geom'), table_name='schools') | ||
op.drop_table('schools') | ||
### end Alembic commands ### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import sqlalchemy as sa | ||
from sqlalchemy.orm.query import Query, aliased | ||
from sqlalchemy.dialects import postgresql | ||
from sqlalchemy import desc, or_, join, select | ||
from flask_sqlalchemy import SQLAlchemy | ||
import argparse | ||
import io | ||
import math | ||
import requests | ||
from datetime import datetime | ||
from utilities import init_flask | ||
from models import AccidentMarker, Involved, School | ||
from constants import CONST | ||
import pandas as pd | ||
import os | ||
from time import strftime | ||
from datetime import datetime | ||
|
||
SUBTYPE_ACCIDENT_WITH_PEDESTRIAN = 1 | ||
LOCATION_ACCURACY_PRECISE = True | ||
LOCATION_ACCURACY_PRECISE_INT = 1 | ||
INJURED_TYPE_PEDESTRIAN = 1 | ||
YISHUV_SYMBOL_NOT_EXIST = -1 | ||
CONTENT_ENCODING = 'utf-8' | ||
ANYWAY_UI_FORMAT = "https://www.anyway.co.il/?zoom=17&start_date={start_date}&end_date={end_date}&lat={latitude}&lon={longitude}&show_fatal=1&show_severe=1&show_light=1&approx={location_approx}&accurate={location_accurate}&show_markers=1&show_discussions=&show_urban=3&show_intersection=3&show_lane=3&show_day=7&show_holiday=0&show_time=24&start_time=25&end_time=25&weather=0&road=0&separation=0&surface=0&acctype={acc_type}&controlmeasure=0&district=0&case_type=0" | ||
DATE_INPUT_FORMAT = '%d-%m-%Y' | ||
DATE_URL_FORMAT = '%Y-%m-%d' | ||
|
||
|
||
app = init_flask() | ||
db = SQLAlchemy(app) | ||
|
||
def get_bounding_box(latitude, longitude, distance_in_km): | ||
|
||
latitude = math.radians(latitude) | ||
longitude = math.radians(longitude) | ||
|
||
radius = 6371 | ||
# Radius of the parallel at given latitude | ||
parallel_radius = radius*math.cos(latitude) | ||
|
||
lat_min = latitude - distance_in_km/radius | ||
lat_max = latitude + distance_in_km/radius | ||
lon_min = longitude - distance_in_km/parallel_radius | ||
lon_max = longitude + distance_in_km/parallel_radius | ||
rad2deg = math.degrees | ||
|
||
return rad2deg(lat_min), rad2deg(lon_min), rad2deg(lat_max), rad2deg(lon_max) | ||
|
||
def acc_inv_query(longitude, latitude, distance, start_date, end_date, school): | ||
lat_min, lon_min, lat_max, lon_max = get_bounding_box(latitude, longitude, distance) | ||
baseX = lon_min; | ||
baseY = lat_min; | ||
distanceX = lon_max; | ||
distanceY = lat_max; | ||
pol_str = 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'.format(baseX, | ||
baseY, | ||
distanceX, | ||
distanceY) | ||
|
||
query_obj = db.session.query(Involved, AccidentMarker) \ | ||
.join(AccidentMarker, AccidentMarker.provider_and_id == Involved.provider_and_id) \ | ||
.filter(AccidentMarker.geom.intersects(pol_str)) \ | ||
.filter(Involved.injured_type == INJURED_TYPE_PEDESTRIAN) \ | ||
.filter(AccidentMarker.provider_and_id == Involved.provider_and_id) \ | ||
.filter(or_((AccidentMarker.provider_code == CONST.CBS_ACCIDENT_TYPE_1_CODE), (AccidentMarker.provider_code == CONST.CBS_ACCIDENT_TYPE_3_CODE))) \ | ||
.filter(AccidentMarker.created >= start_date) \ | ||
.filter(AccidentMarker.created < end_date) \ | ||
.filter(AccidentMarker.locationAccuracy == LOCATION_ACCURACY_PRECISE_INT) \ | ||
.filter(AccidentMarker.yishuv_symbol != YISHUV_SYMBOL_NOT_EXIST) | ||
|
||
df = pd.read_sql_query(query_obj.with_labels().statement, query_obj.session.bind) | ||
|
||
if LOCATION_ACCURACY_PRECISE: | ||
location_accurate = 1 | ||
location_approx = 0 | ||
else: | ||
location_accurate = 1 | ||
location_approx = 1 | ||
ui_url = ANYWAY_UI_FORMAT.format(latitude=school['latitude'], | ||
longitude=school['longitude'], | ||
start_date=start_date.strftime(DATE_URL_FORMAT), | ||
end_date=end_date.strftime(DATE_URL_FORMAT), | ||
acc_type=SUBTYPE_ACCIDENT_WITH_PEDESTRIAN, | ||
location_accurate=location_accurate, | ||
location_approx=location_approx) | ||
df['anyway_link'] = ui_url | ||
df['school_id'] = school['id'] | ||
df['school_name'] = school['school_name'] | ||
df['school_yishuv_symbol'] = school['yishuv_symbol'] | ||
df['school_yishuv_name'] = school['yishuv_name'] | ||
df['school_longitude'] = school['longitude'] | ||
df['school_latitude'] = school['latitude'] | ||
return df | ||
|
||
|
||
def main(start_date, end_date, distance, output_path): | ||
schools_query = sa.select([School]) | ||
df_schools = pd.read_sql_query(schools_query, db.session.bind) | ||
df_total = pd.DataFrame() | ||
for idx, school in df_schools.iterrows(): | ||
df_total = pd.concat([df_total, | ||
acc_inv_query(longitude=school['longitude'], | ||
latitude=school['latitude'], | ||
distance=distance, | ||
start_date=start_date, | ||
end_date=end_date, | ||
school=school)], | ||
axis=0) | ||
df_total.to_csv(os.path.join(output_path,'df_total.csv'), encoding=CONTENT_ENCODING) | ||
|
||
df_total_involved_count = (df_total.groupby(['school_id', 'school_name', 'anyway_link', 'school_longitude', 'school_latitude', 'school_yishuv_symbol', 'school_yishuv_name']) | ||
.size() | ||
.reset_index(name='injured_count') | ||
.sort_values('injured_count', ascending=False)) | ||
df_total_involved_count.to_csv(os.path.join(output_path,'df_total_involved_count.csv'), encoding=CONTENT_ENCODING, header=True) | ||
|
||
df_total_involved_by_injury = (df_total.groupby(['school_id', 'school_name', 'anyway_link', 'school_longitude', 'school_latitude', 'school_yishuv_symbol', 'school_yishuv_name','involved_injury_severity']) | ||
.size() | ||
.reset_index(name='injured_count') | ||
.sort_values('injured_count', ascending=False)) | ||
df_total_involved_by_injury.to_csv(os.path.join(output_path,'df_total_involved_by_injury.csv'), encoding=CONTENT_ENCODING, header=True) | ||
|
||
df_total_accident_count = (df_total.drop_duplicates(['school_id', 'school_name', 'anyway_link', 'school_longitude', 'school_latitude', 'school_yishuv_symbol', 'school_yishuv_name','provider_and_id']) | ||
.groupby(['school_id', 'school_name', 'school_yishuv_symbol', 'school_yishuv_name', 'markers_severity']) | ||
.size() | ||
.reset_index(name='accidents_count') | ||
.sort_values('accidents_count', ascending=False)) | ||
df_total_accident_count.to_csv(os.path.join(output_path,'df_total_accident_count.csv'), encoding=CONTENT_ENCODING, header=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from .. import school_fields | ||
import geoalchemy2.functions as func | ||
import logging | ||
from datetime import datetime | ||
from ..utilities import init_flask, CsvReader, time_delta, chunks | ||
from flask_sqlalchemy import SQLAlchemy | ||
from ..models import School | ||
import pandas as pd | ||
|
||
app = init_flask() | ||
db = SQLAlchemy(app) | ||
|
||
def get_data_value(value): | ||
""" | ||
:returns: value for parameters which are not mandatory in an accident data | ||
OR -1 if the parameter value does not exist | ||
""" | ||
return int(value) if value else -1 | ||
|
||
def get_schools(filepath): | ||
logging.info("\tReading schools data from '%s'..." % filepath) | ||
schools = [] | ||
df = pd.read_csv(filepath) | ||
for idx, school in df.iterrows(): | ||
longitude, latitude = float(school[school_fields.longitude]),float(school[school_fields.latitude]), | ||
point_str = 'SRID=4326;POINT({0} {1})'.format(longitude, latitude) | ||
school = { | ||
"id": int(school[school_fields.id]), | ||
"fcode_type": int(school[school_fields.fcode_type]), | ||
"yishuv_symbol": int(school[school_fields.yishuv_symbol]), | ||
"yishuv_name": school[school_fields.yishuv_name], | ||
"school_name": school[school_fields.school_name], | ||
"school_latin_name ": school[school_fields.school_latin_name], | ||
"usg": int(school[school_fields.usg]), | ||
"usg_code": int(school[school_fields.usg_code]), | ||
"e_ord": float(school[school_fields.e_ord]), | ||
"n_ord": float(school[school_fields.n_ord]), | ||
"longitude": longitude, | ||
"latitude": latitude, | ||
"geom": point_str, | ||
"data_year": get_data_value(school[school_fields.data_year]), | ||
"prdct_ver": None, | ||
"x": float(school[school_fields.x]), | ||
"y": float(school[school_fields.y]), | ||
} | ||
schools.append(school) | ||
|
||
return schools | ||
|
||
def import_to_datastore(filepath, batch_size): | ||
try: | ||
assert batch_size > 0 | ||
started = datetime.now() | ||
schools = get_schools(filepath) | ||
new_items = 0 | ||
all_existing_schools_ids = set(map(lambda x: x[0], | ||
db.session.query(School.id).all())) | ||
schools = [school for school in schools if school['id'] not in all_existing_schools_ids] | ||
logging.info('inserting ' + str(len(schools)) + ' new schools') | ||
for schools_chunk in chunks(schools, batch_size): | ||
db.session.bulk_insert_mappings(School, schools_chunk) | ||
db.session.commit() | ||
new_items += len(schools) | ||
logging.info("\t{0} items in {1}".format(new_items, time_delta(started))) | ||
return new_items | ||
except ValueError as e: | ||
failed_dirs[directory] = str(e) | ||
return 0 | ||
|
||
def parse(filepath, batch_size): | ||
started = datetime.now() | ||
total = import_to_datastore(filepath, batch_size) | ||
logging.info("Total: {0} schools in {1}".format(total, time_delta(started))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
id = "UNIQ_ID" | ||
fcode_type = "FCODE_TYPE" | ||
yishuv_symbol = "SETL_CODE" | ||
yishuv_name = "SETL_NAME" | ||
school_name = "NAME" | ||
school_latin_name = "LATIN_NAME" | ||
usg = "USG_GROUP" | ||
usg_code = "USG_CODE" | ||
e_ord = "E_ORD" | ||
n_ord = "N_ORD" | ||
longitude = "LON" | ||
latitude = "LAT" | ||
data_year = "DATA_YEAR" | ||
prdct_ver = "PRDCT_VER" | ||
x = "X" | ||
y = "Y" |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.