In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

In [None]:
user_data_path = 'Data/user_data.csv'
covid_cases_data_path = 'covid-cases-coordinates.csv'

In [None]:
user_data = pd.read_csv(user_data_path)
covid_data = pd.read_csv(covid_cases_data_path)

In [None]:
def get_future_risk_score(user_data, past_number_of_weeks):
    curr_date = datetime.now()
    user_data['number_of_days_passed'] = user_data['visit_start_time'].apply(lambda x: (curr_date - datetime.fromtimestamp(x/1000)).days)
    user_data = add_required_time_information(user_data)
    home_addresses = get_possible_work_and_home_places(user_data)
    final_score = get_final_percentage_of_time_spent_in_places(user_data, home_addresses, past_number_of_weeks)
    visited_public_places = get_public_places_visited_per_weeks(user_data, past_number_of_weeks, home_addresses)
    return final_score, visited_public_places

In [None]:
def get_public_places_visited_per_weeks(user_data, past_number_of_weeks, home_addresses):
     user_data = get_last_days_data(user_data, past_number_of_weeks*7)
     return user_data[~user_data['name'].isin(home_addresses)]

In [None]:
def get_final_percentage_of_time_spent_in_places(user_data, home_addresses, past_number_of_weeks):
    last_weeks_df = get_last_days_data(user_data, past_number_of_weeks * 7)
    time_spent_outside = last_weeks_df.groupby('week_day').apply(lambda x: sum(x[~x['name'].isin(home_addresses)]['time_spent_in_location'])/12)
    mean_time_spent_outside_per_day = time_spent_outside / past_number_of_weeks
    return np.mean(mean_time_spent_outside_per_day)

In [None]:
def get_last_days_data(user_data, number_of_days):
    return user_data[user_data['number_of_days_passed'] <= number_of_days].sort_values('number_of_days_passed')

In [None]:
def add_required_time_information(user_data):
    user_data['week_day'] = user_data['visit_start_time'].apply(lambda x: datetime.fromtimestamp(x/1000).weekday())
    user_data['time_spent_in_location'] = user_data.apply(lambda x: (datetime.fromtimestamp(x['visit_end_time']//1000) - datetime.fromtimestamp(x['visit_start_time']//1000)).seconds/3600, axis=1)
    return user_data

In [None]:
def get_possible_work_and_home_places(user_data):
    long_time_locations = user_data[user_data['time_spent_in_location'] > 4]['name'].unique()
    percentage_of_time_spent_per_location = user_data.groupby('name').apply(lambda x: sum(x['time_spent_in_location'])).sort_values()/sum(user_data['time_spent_in_location'])
    work_home_addresses = [location for location in long_time_locations if percentage_of_time_spent_per_location[location] > 0.1]
    print(work_home_addresses)
    return work_home_addresses

In [None]:
def get_public_place_visits(user_data):
    return user_data[user_data.apply(lambda x: x['name'] not in x['address'], axis=1)]

In [None]:
score, df_public = get_future_risk_score(user_data, past_number_of_weeks=1)

In [None]:
user_data