In [24]:
'''
04_create-itinerary.ipynb
Manage itinerary
'''

import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
from shapely.geometry import Point, Polygon, LineString, MultiPoint
import math
import pathlib

from datetime import date, timedelta

DIR_ROOT = os.path.join(pathlib.Path().absolute(), '../..' )
DIR_DATA = DIR_ROOT + '/data/02_processed/'
DIR_PATHS = DIR_ROOT + '/data/03_paths/'
DIR_OUT = DIR_ROOT + '/data/04_itinerary/'
os.makedirs(DIR_OUT) if not os.path.exists(DIR_OUT) else False

activities_df = pd.read_csv(DIR_DATA + 'activities.csv')
localities = gpd.read_file(DIR_DATA + 'localities.geojson').to_dict('records')
paths = gpd.read_file(DIR_PATHS + 'paths.geojson').to_dict('records')

In [25]:
# set periods

bound1 = date(1253, 9, 23)
bound2 = date(1260, 7, 12)
bound3 = date(1269, 10, 27)
bound4 = date(1276, 11, 21)

def get_period(date_to_check):
    if date_to_check < bound1:
        return '1'
    elif date_to_check < bound2:
        return '2'
    elif date_to_check < bound3:
        return '3'
    elif date_to_check < bound4:
        return '4'
    else:
        return '5'

def is_period(date_from, date_to, period):
    if period == '1':
        return date_from < bound1 
    elif period == '2':
        return date_to > bound1 and date_from < bound2
    elif period == '3':
        return date_to > bound2 and date_from < bound3
    elif period == '4':
        return date_to > bound3 and date_from < bound4
    elif period == '5':
        return date_to > bound4

In [26]:
print(activities_df[pd.isna(activities_df['tag_post'])])

Empty DataFrame
Columns: [Unnamed: 0, no, sicherheit, datum_text, tag_post, monat_post, jahr_post, tag_ante, monat_ante, jahr_ante, lokalitat_name, lokalitat_agglomeration, lokalitat_politik, bi1_lokalitatname, bi2_lokalitatname, bi3_lokalitatname, bi4_lokalitatname, bi5_lokalitatname, gg1_lokalitatname, gg2_lokalitatname, gg3_lokalitatname, gg4_lokalitatname, gg5_lokalitatname, aussteller]
Index: []

[0 rows x 24 columns]


In [27]:
# calculate middle dates and sort activities

activities_df['date_post'] = activities_df.apply(
    lambda x: date(int(x['jahr_post']), int(x['monat_post']), int(x['tag_post']))
    ,axis=1
)

activities_df['date_ante'] = activities_df.apply(
    lambda x: date(int(x['jahr_ante']), int(x['monat_ante']), int(x['tag_ante']))
    ,axis=1
)

activities_df['date_delta'] = activities_df['date_ante'] - activities_df['date_post']

activities_df['date_middle'] = activities_df.apply(
    lambda x: x['date_post'] + timedelta(days = x['date_delta'].days / 2)
    ,axis=1
)

activities_df['period'] = activities_df.apply(
    lambda x: get_period(x['date_middle']), axis=1
)


#print(activities_df[activities_df['date_delta'] > timedelta(days=1)])
#print(activities_df['date_middle'])

activities_df = activities_df.sort_values(['date_middle'])
activities_df['invalid'] = False

activities = activities_df.to_dict('records')


In [28]:
# create dataset of stays, hours, influences...

first_day = activities[0]['date_middle']
last_day = activities[len(activities) - 1]['date_middle']
last_stay_date = first_day

all_days_d = last_day - first_day
all_days_d.days

days = []

positions = []
stays = []
influences = []


# finds the activity that happened before the given date
def find_previous_activity(date):
    previous_activities = [a for a in activities if a['date_middle'] <= date]
    min_day_d = 1000
    previous_act = False
    for act in previous_activities:
        d = (date - act['date_middle']).days
        if d < min_day_d:
            min_day_d = d
            previous_act = act
    return previous_act

# finds the activity that happened after the given date
def find_next_activity(date):
    next_activities = [a for a in activities if a['date_middle'] > date]
    min_day_d = 1000
    next_act = False
    for act in next_activities:
        d = (act['date_middle'] - date).days 
        if d < min_day_d:
            min_day_d = d
            next_act = act
    return next_act

# check influence 
def check_locality_by_name(check_name, act_date, act_localityname, act_domestic):
    check_influence = [locality for locality in localities if locality['name'] == check_name]
    check_source_locality = [locality for locality in localities if locality['name'] == act_localityname]

    if len(check_influence) == 1 and len(check_source_locality) == 1:
        locality = check_influence[0]
        source_locality = check_source_locality[0]
        return {
            "certainty": locality['prazision'],
            "name": locality['name'],
            "geometry": Point(locality['y'], locality['x']),
            "locality": act_localityname,
            "geometry_source": Point(source_locality['y'], source_locality['x']),
            "date": act_date,
            "period": get_period(act_date),
            "domestic": act_domestic
        }
    else: 
        return False

for day_no in range(all_days_d.days):
    if day_no < 80099:
        this_date = first_day + timedelta(days=day_no)

        this_activity = False
        this_place = False

        next_a = find_next_activity(this_date)
        prev_a = find_previous_activity(this_date)
        
        if next_a and prev_a:

            days_to_next = (next_a['date_middle'] - this_date).days
            days_to_prev = (prev_a['date_middle'] - this_date).days

            days_diff = (next_a['date_middle'] - prev_a['date_middle']).days

            #print("")

            #print('{}: {}'.format(day_no, date))
            #print("prev {}({}) - next {}({})".format(prev_a['lokalitat_name'], prev_a['date_middle'], next_a['lokalitat_name'], next_a['date_middle']) )

            # default is that we are not traveling
            travel_time = 0
            need_to_travel = False
            this_date_stay_place = False

            hour_positions = False

            domestic = True

            travel_start = prev_a['date_middle'] + timedelta(days= days_diff / 2)

            if prev_a['lokalitat_name'] != next_a['lokalitat_name']:
                travel = next(path for path in paths if path['from'] == prev_a['lokalitat_name'] and path['to'] == next_a['lokalitat_name'])
 
                travel_time = math.ceil(travel['dist']) if travel['dist'] <= days_diff else days_diff

                travel_start = next_a['date_middle'] - timedelta(days=travel_time) - timedelta(days=(days_diff - travel_time) / 2)
                travel_end = travel_start + timedelta(days=travel_time)

                need_to_travel = this_date > travel_start and this_date <= travel_end
                #print("travel {} days ({} - {}), now traveling {}".format(travel_time, travel_start, travel_end, need_to_travel) )

                # checking domestic
                if prev_a['lokalitat_politik'] == 1:
                    if next_a['lokalitat_politik'] == 1:
                        domestic = True
                    else:
                        domestic = this_date < travel_start
                else:
                    if next_a['lokalitat_politik'] == 1:
                        domestic = this_date > travel_end
                    else:
                        domestic = False

                if need_to_travel == False:
                    this_date_stay_place = next_a['lokalitat_name'] if this_date > travel_end else prev_a['lokalitat_name']

                
                if need_to_travel == True:
                    day_of_travel = (this_date - travel_start).days - 1

                    start_fraction = 1 / travel_time * day_of_travel

                    path_part = shapely.ops.substring(travel['geometry'], start_fraction, start_fraction + 1 / travel_time , normalized=True)
                    hour_positions = [shapely.ops.substring(path_part, h/23, h/23, normalized = True) for h in range(24)]
                    this_date_stay_place = False 



            else: 
                domestic = True if next_a['lokalitat_politik'] == '1' else False

                days_to_prev = this_date - prev_a['date_middle']
                days_to_next = next_a['date_middle'] - this_date
                this_date_stay_place = prev_a['lokalitat_name'] if days_to_prev < days_to_next else next_a['lokalitat_name']

            if hour_positions == False:
                locality_data = next(locality for locality in localities if locality['name'] == this_date_stay_place)
                hour_positions = [locality_data['geometry']] * 24

            if this_date == travel_start and prev_a['lokalitat_name'] != next_a['lokalitat_name']:
                locality_data = next(locality for locality in localities if locality['name'] == this_date_stay_place)
                from_date = last_stay_date
                to_date = this_date
                stay_activities = [a for a in activities if a['date_middle'] >= from_date and a['date_middle'] <= to_date ]

                # create new stay item
                stays.append(
                    {
                        "locality": prev_a['lokalitat_name'],
                        "from_date": from_date,
                        "to_date": to_date,
                        "days": (to_date - from_date).days,
                        "domestic": prev_a['lokalitat_politik'] == 1,
                        "geometry": locality_data['geometry'],
                        "no_activities": len(stay_activities),
                        "period1": is_period(from_date, to_date, '1'),
                        "period2": is_period(from_date, to_date, '2'),
                        "period3": is_period(from_date, to_date, '3'),
                        "period4": is_period(from_date, to_date, '4'),
                        "period5": is_period(from_date, to_date, '5')
                    }
                )

                # influences
                for act in stay_activities:
                    for column_name in [
                        'bi1_lokalitatname',
                        'bi2_lokalitatname',
                        'bi3_lokalitatname',
                        'bi4_lokalitatname',
                        'bi5_lokalitatname',
                        'gg1_lokalitatname',
                        'gg2_lokalitatname',
                        'gg3_lokalitatname',
                        'gg4_lokalitatname',
                        'gg5_lokalitatname',
                    ]:
                        locality_obj = check_locality_by_name(
                            act[column_name], 
                            this_date, 
                            act['lokalitat_name'],
                            act['lokalitat_politik'] == 1
                        )

                        if locality_obj:
                            influences.append(locality_obj)


                # set the initial date for the next stay
                last_stay_date = travel_end
            



            # exactly this day is stated in the itinerary
            activity_with_this_date = [activity for activity in activities if activity['date_middle'] == this_date]
            this_date_activity = len(activity_with_this_date) > 0
 
            # create positions
            [positions.append({
                'date': this_date,
                "day": this_date.day,
                "month": this_date.month,
                "year": this_date.year,
                'hour': pi,
                'geometry': hour_positions[pi],
                'stay_location': this_date_stay_place, # false if traveling
                'domestic': domestic, 
                'period': get_period(this_date)
            }) for pi in range(len(hour_positions))]

            #print("activity {}, stay {}".format(this_date_activity, this_date_stay_place))

            days.append({
                "activity": this_date_activity, # whether there was an activity in this day
                "locality": this_date_stay_place, # name of the location where PO2 was active, False if there was only traveling
                "traveling": need_to_travel, # True if PO2 was moving to a different location
                "domestic": domestic,
                "date": this_date,
                "day": this_date.day,
                "month": this_date.month,
                "year": this_date.year,
                "period": get_period(this_date)
            })

days_df = gpd.GeoDataFrame(days)
positions_df = gpd.GeoDataFrame(positions)
stays_df = gpd.GeoDataFrame(stays)
influences_df = gpd.GeoDataFrame(influences)

days_df.to_csv(DIR_OUT + 'days.csv')
positions_df.to_csv(DIR_OUT + 'positions.csv')
stays_df.to_csv(DIR_OUT + 'stays.csv')
activities_df.to_csv(DIR_OUT + 'activities.csv')
influences_df.to_csv(DIR_OUT + 'influences.csv')

Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.779650457657544, 'geometry': <shapely.geometry.linestring.LineString object at 0x7f8bb13dd040>}
Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.779650457657544, 'geometry': <shapely.geometry.linestring.LineString object at 0x7f8bb13dd040>}
Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.779650457657544, 'geometry': <shapely.geometry.linestring.LineString object at 0x7f8bb13dd040>}
Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.779650457657544, 'geometry': <shapely.geometry.linestring.LineString object at 0x7f8bb13dd040>}
Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.779650457657544, 'geometry': <shapely.geometry.linestring.LineString object at 0x7f8bb13dd040>}
Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.779650457657544, 'geometry': <shapely.geometry.linestring.LineString object at 0x7f8bb13dd040>}
Wien Netolice
{'from': 'Wien', 'to': 'Netolice', 'dist': 8.77965045765

FileNotFoundError: [Errno 2] No such file or directory: '/home/casus-fwu036/projects/itinerary-analysis_premysl-otakar/src/processing/../../data/04_itinerary/days.csv'