In [1]:
import csv
import copy
import pandas as pd
from tqdm import tqdm
from datetime import datetime

from coordinate_converter import utm_to_ssb_grid_id

In [13]:
INCIDENTS_FILE = "data/incidents_all_processed.csv"

DISTRIBUTION_FILE = "data/incidents_distribution_station.csv"
CSV_COLUMNS = ['Base Station','Year','Month','Day','Week','Weekday','Hour','Incidents']

In [3]:
def create_empty_counts():
    # grids = pd.read_csv("data/grid_centroids.csv")
    base_stations = pd.read_csv("data/base_stations.csv")

    counts = {}

    time = {}
    for year in range(2015, 2019):
        time[year] = {}
        for month in range(1, 13):
            time[year][month] = {}
            for day in range(1, 32):
                time[year][month][day] = {}
                for hour in range(0, 24):
                    time[year][month][day][hour] = 0

    for station in base_stations.values:
        counts[int(station[0])] = copy.deepcopy(time)

    return counts

In [4]:
def count_incidents(df, counts):
    grid_zones = pd.read_csv("data/grid_zones.csv")

    for incident in tqdm(df.values, desc="Count incidents per hour"):
        dt = datetime.strptime(incident[0], '%Y-%m-%d %H:%M:%S')
        incident_grid = utm_to_ssb_grid_id(int(incident[1]), int(incident[2]))
        try:
            incident_station = grid_zones.loc[grid_zones["SSBID1000M"] == incident_grid, "base_station"].iloc[0]
            counts[incident_station][dt.year][dt.month][dt.day][dt.hour] += 1
        except:
            print(f"grid {incident_grid} was not in grid_zones.csv")
            continue

In [10]:
def save_counts_to_csv(counts):
    with open(DISTRIBUTION_FILE, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=CSV_COLUMNS)
        writer.writeheader()
        for station_id in counts.keys():
            for year in counts[station_id].keys():
                for month in counts[station_id][year].keys():
                    for day in counts[station_id][year][month].keys():
                        for hour in counts[station_id][year][month][day].keys():
                            count = counts[station_id][year][month][day][hour]
                            try:
                                date = datetime(year, month, day)
                                week = date.isocalendar().week
                                weekday = date.weekday() + 1
                                row = {
                                    'Base Station': station_id, 
                                    'Year': year, 
                                    'Month': month, 
                                    'Day': day, 
                                    'Week': week, 
                                    'Weekday': weekday, 
                                    'Hour': hour, 
                                    'Incidents': count
                                }
                                writer.writerow(row)
                            except:
                                break
                            



In [6]:
incidents = pd.read_csv(INCIDENTS_FILE, encoding='utf-8', escapechar='\\', parse_dates=True)

In [14]:
counts = create_empty_counts()
count_incidents(incidents, counts)
save_counts_to_csv(counts)