### Setup

In [13]:
import os
import pandas as pd 
from epiweeks import Week

In [14]:
PATH_CODES = '../../cities.csv'
KEYWORD = 'violencia de genero'

#### CSV Files

In [15]:
cities = pd.read_csv(PATH_CODES)
cities

Unnamed: 0,lat,lng,Municipality,Municipality code,ISO
0,6.2447,-75.5748,Medellín,5001,CO-ANT
1,3.44,-76.5197,Cali,76001,CO-VAC
2,4.5781,-74.2144,Soacha,25754,CO-CUN
3,4.1425,-73.6294,Villavicencio,50001,CO-MET
4,1.2136,-77.2811,Pasto,52001,CO-NAR
5,10.9639,-74.7964,Barranquilla,8001,CO-ATL
6,7.1186,-73.1161,Bucaramanga,68001,CO-SAN
7,4.4378,-75.2006,Ibagué,73001,CO-TOL
8,2.4411,-76.6061,Popayán,19001,CO-CAU
9,7.9075,-72.5047,Cúcuta,54001,CO-NSA


### Read each file

In [16]:
dfs = []
for city in cities.ISO:
    dfs.append(pd.read_csv(f'cities_cohort/{city}_{KEYWORD}.csv'))
    # dfs.append(pd.concat([pd.read_csv(f'cities/{city}_{KEYWORD}(1).csv'), pd.read_csv(f'cities/{city}_{KEYWORD}.csv')]))

In [17]:
dfs[0]

Unnamed: 0,Category: All categories
Week,violencia de genero: (Antioquia)
2017-12-24,0
2017-12-31,32
2018-01-07,0
2018-01-14,22
...,...
2022-12-04,41
2022-12-11,1
2022-12-18,38
2022-12-25,21


### Merge:

In [18]:
# Merge the dataframes by the "Week" column
merged_df = pd.concat(dfs, axis=1, keys=cities['Municipality'])

In [19]:
merged_df

Municipality,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta
Unnamed: 0_level_1,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories
Week,violencia de genero: (Antioquia),violencia de genero: (Valle del Cauca),violencia de genero: (Cundinamarca),violencia de genero: (Meta),violencia de genero: (Narino),violencia de genero: (Atlantico),violencia de genero: (Santander Department),violencia de genero: (Tolima),violencia de genero: (Cauca Department),violencia de genero: (North Santander)
2017-12-24,0,0,0,1,30,0,47,0,1,100
2017-12-31,32,0,31,0,88,37,0,1,0,0
2018-01-07,0,1,2,0,0,0,0,0,3,0
2018-01-14,22,13,25,0,1,0,36,1,0,0
...,...,...,...,...,...,...,...,...,...,...
2022-12-04,41,13,0,17,50,0,0,18,0,0
2022-12-11,1,33,22,0,1,18,44,61,21,0
2022-12-18,38,0,22,12,1,19,0,35,0,17
2022-12-25,21,12,14,<1,0,0,19,55,1,30


In [20]:
# Remove the second level of the column index
merged_df.columns = merged_df.columns.droplevel(1)
merged_df.columns.name = None
merged_df = merged_df.iloc[1:,:]
merged_df = merged_df.reset_index().rename(columns={'index':'date'})
merged_df = merged_df[merged_df['date'] != 'Week']
merged_df

Unnamed: 0,date,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta
0,2017-12-24,0,0,0,1,30,0,47,0,1,100
1,2017-12-31,32,0,31,0,88,37,0,1,0,0
2,2018-01-07,0,1,2,0,0,0,0,0,3,0
3,2018-01-14,22,13,25,0,1,0,36,1,0,0
4,2018-01-21,2,19,0,0,0,1,0,3,33,63
...,...,...,...,...,...,...,...,...,...,...,...
258,2022-12-04,41,13,0,17,50,0,0,18,0,0
259,2022-12-11,1,33,22,0,1,18,44,61,21,0
260,2022-12-18,38,0,22,12,1,19,0,35,0,17
261,2022-12-25,21,12,14,<1,0,0,19,55,1,30


### Get Epiweek

In [21]:
merged_df['date'] = pd.to_datetime(merged_df['date'])

In [22]:
merged_df['epiweek'] = merged_df['date'].apply(lambda x: Week.fromdate(x))

# replace <1 with 0
merged_df = merged_df.replace('<1', 0)

In [23]:
merged_df

Unnamed: 0,date,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta,epiweek
0,2017-12-24,0,0,0,1,30,0,47,0,1,100,201752
1,2017-12-31,32,0,31,0,88,37,0,1,0,0,201801
2,2018-01-07,0,1,2,0,0,0,0,0,3,0,201802
3,2018-01-14,22,13,25,0,1,0,36,1,0,0,201803
4,2018-01-21,2,19,0,0,0,1,0,3,33,63,201804
...,...,...,...,...,...,...,...,...,...,...,...,...
258,2022-12-04,41,13,0,17,50,0,0,18,0,0,202249
259,2022-12-11,1,33,22,0,1,18,44,61,21,0,202250
260,2022-12-18,38,0,22,12,1,19,0,35,0,17,202251
261,2022-12-25,21,12,14,0,0,0,19,55,1,30,202252


### Save

In [24]:
merged_df.to_csv(f'{KEYWORD}.csv', index=False)