### Setup

In [1]:
import os
import pandas as pd 
from epiweeks import Week

In [2]:
PATH_CODES = '../../cities.csv'
KEYWORD = 'violencia de genero'

#### CSV Files

In [3]:
cities = pd.read_csv(PATH_CODES)
cities

Unnamed: 0,lat,lng,Municipality,Municipality code,ISO
0,6.2447,-75.5748,Medellín,5001,CO-ANT
1,3.44,-76.5197,Cali,76001,CO-VAC
2,4.5781,-74.2144,Soacha,25754,CO-CUN
3,4.1425,-73.6294,Villavicencio,50001,CO-MET
4,1.2136,-77.2811,Pasto,52001,CO-NAR
5,10.9639,-74.7964,Barranquilla,8001,CO-ATL
6,7.1186,-73.1161,Bucaramanga,68001,CO-SAN
7,4.4378,-75.2006,Ibagué,73001,CO-TOL
8,2.4411,-76.6061,Popayán,19001,CO-CAU
9,7.9075,-72.5047,Cúcuta,54001,CO-NSA


### Read each file

In [4]:
dfs = []
for city in cities.ISO:
    
    dfs.append(pd.concat([pd.read_csv(f'cities/{city}_{KEYWORD}(1).csv'), pd.read_csv(f'cities/{city}_{KEYWORD}.csv')]))

In [5]:
dfs[0]

Unnamed: 0,Category: All categories
Week,violencia de genero: (Antioquia)
2014-01-05,0
2014-01-12,0
2014-01-19,0
2014-01-26,0
...,...
2023-07-30,39
2023-08-06,25
2023-08-13,0
2023-08-20,30


### Merge:

In [6]:
# Merge the dataframes by the "Week" column
merged_df = pd.concat(dfs, axis=1, keys=cities['Municipality'])

In [7]:
merged_df

Municipality,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta
Unnamed: 0_level_1,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories
Week,violencia de genero: (Antioquia),violencia de genero: (Valle del Cauca),violencia de genero: (Cundinamarca),violencia de genero: (Meta),violencia de genero: (Narino),violencia de genero: (Atlantico),violencia de genero: (Santander Department),violencia de genero: (Tolima),violencia de genero: (Cauca Department),violencia de genero: (North Santander)
2014-01-05,0,42,0,49,0,0,0,0,0,0
2014-01-12,0,0,0,0,0,0,0,0,0,0
2014-01-19,0,0,48,0,0,0,0,0,0,0
2014-01-26,0,0,28,0,0,38,16,0,0,0
...,...,...,...,...,...,...,...,...,...,...
2023-07-30,39,67,24,0,33,16,16,40,0,16
2023-08-06,25,18,14,14,48,26,15,64,26,0
2023-08-13,0,31,30,17,0,20,50,0,0,0
2023-08-20,30,65,34,13,34,0,0,0,76,0


In [8]:
# Remove the second level of the column index
merged_df.columns = merged_df.columns.droplevel(1)
merged_df.columns.name = None
merged_df = merged_df.iloc[1:,:]
merged_df = merged_df.reset_index().rename(columns={'index':'date'})
merged_df = merged_df[merged_df['date'] != 'Week']
merged_df

Unnamed: 0,date,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta
0,2014-01-05,0,42,0,49,0,0,0,0,0,0
1,2014-01-12,0,0,0,0,0,0,0,0,0,0
2,2014-01-19,0,0,48,0,0,0,0,0,0,0
3,2014-01-26,0,0,28,0,0,38,16,0,0,0
4,2014-02-02,0,81,52,36,0,0,18,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
500,2023-07-30,39,67,24,0,33,16,16,40,0,16
501,2023-08-06,25,18,14,14,48,26,15,64,26,0
502,2023-08-13,0,31,30,17,0,20,50,0,0,0
503,2023-08-20,30,65,34,13,34,0,0,0,76,0


### Get Epiweek

In [9]:
merged_df['date'] = pd.to_datetime(merged_df['date'])

In [10]:
merged_df['epiweek'] = merged_df['date'].apply(lambda x: Week.fromdate(x))

In [11]:
merged_df

Unnamed: 0,date,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta,epiweek
0,2014-01-05,0,42,0,49,0,0,0,0,0,0,201402
1,2014-01-12,0,0,0,0,0,0,0,0,0,0,201403
2,2014-01-19,0,0,48,0,0,0,0,0,0,0,201404
3,2014-01-26,0,0,28,0,0,38,16,0,0,0,201405
4,2014-02-02,0,81,52,36,0,0,18,0,0,0,201406
...,...,...,...,...,...,...,...,...,...,...,...,...
500,2023-07-30,39,67,24,0,33,16,16,40,0,16,202331
501,2023-08-06,25,18,14,14,48,26,15,64,26,0,202332
502,2023-08-13,0,31,30,17,0,20,50,0,0,0,202333
503,2023-08-20,30,65,34,13,34,0,0,0,76,0,202334


### Save

In [12]:
merged_df.to_csv(f'{KEYWORD}.csv', index=False)