### Setup

In [1]:
import os
import pandas as pd 
from epiweeks import Week

In [2]:
PATH_CODES = '../../cities.csv'
KEYWORD = 'Violencia'

#### CSV Files

In [3]:
cities = pd.read_csv(PATH_CODES)
cities

Unnamed: 0,lat,lng,Municipality,Municipality code,ISO
0,6.2447,-75.5748,Medellín,5001,CO-ANT
1,3.44,-76.5197,Cali,76001,CO-VAC
2,4.5781,-74.2144,Soacha,25754,CO-CUN
3,4.1425,-73.6294,Villavicencio,50001,CO-MET
4,1.2136,-77.2811,Pasto,52001,CO-NAR
5,10.9639,-74.7964,Barranquilla,8001,CO-ATL
6,7.1186,-73.1161,Bucaramanga,68001,CO-SAN
7,4.4378,-75.2006,Ibagué,73001,CO-TOL
8,2.4411,-76.6061,Popayán,19001,CO-CAU
9,7.9075,-72.5047,Cúcuta,54001,CO-NSA


### Read each file

In [4]:
dfs = []
for city in cities.ISO:
    
    dfs.append(pd.concat([pd.read_csv(f'cities/{city}_{KEYWORD}(1).csv'), pd.read_csv(f'cities/{city}_{KEYWORD}.csv')]))

In [5]:
dfs[0]

Unnamed: 0,Category: All categories
Week,Violence: (Antioquia)
2014-01-05,35
2014-01-12,44
2014-01-19,46
2014-01-26,41
...,...
2023-07-30,59
2023-08-06,59
2023-08-13,68
2023-08-20,54


### Merge:

In [6]:
# Merge the dataframes by the "Week" column
merged_df = pd.concat(dfs, axis=1, keys=cities['Municipality'])

In [7]:
merged_df

Municipality,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta
Unnamed: 0_level_1,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories,Category: All categories
Week,Violence: (Antioquia),Violence: (Valle del Cauca),Violence: (Cundinamarca),Violence: (Meta),Violence: (Narino),Violence: (Atlantico),Violence: (Santander Department),Violence: (Tolima),Violence: (Cauca Department),Violence: (North Santander)
2014-01-05,35,25,31,66,26,27,23,0,0,19
2014-01-12,44,34,22,47,64,21,8,26,32,9
2014-01-19,46,40,0,0,16,49,27,24,0,0
2014-01-26,41,49,100,0,63,26,32,35,0,43
...,...,...,...,...,...,...,...,...,...,...
2023-07-30,59,50,56,41,41,38,57,46,60,23
2023-08-06,59,48,47,48,81,45,49,51,84,32
2023-08-13,68,44,47,47,43,55,61,64,55,28
2023-08-20,54,61,61,44,57,51,53,49,50,31


In [8]:
# Remove the second level of the column index
merged_df.columns = merged_df.columns.droplevel(1)
merged_df.columns.name = None
merged_df = merged_df.iloc[1:,:]
merged_df = merged_df.reset_index().rename(columns={'index':'date'})
merged_df = merged_df[merged_df['date'] != 'Week']
merged_df

Unnamed: 0,date,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta
0,2014-01-05,35,25,31,66,26,27,23,0,0,19
1,2014-01-12,44,34,22,47,64,21,8,26,32,9
2,2014-01-19,46,40,0,0,16,49,27,24,0,0
3,2014-01-26,41,49,100,0,63,26,32,35,0,43
4,2014-02-02,57,49,0,0,0,40,35,29,0,48
...,...,...,...,...,...,...,...,...,...,...,...
500,2023-07-30,59,50,56,41,41,38,57,46,60,23
501,2023-08-06,59,48,47,48,81,45,49,51,84,32
502,2023-08-13,68,44,47,47,43,55,61,64,55,28
503,2023-08-20,54,61,61,44,57,51,53,49,50,31


### Get Epiweek

In [9]:
merged_df['date'] = pd.to_datetime(merged_df['date'])

In [10]:
merged_df['epiweek'] = merged_df['date'].apply(lambda x: Week.fromdate(x))

In [12]:
merged_df

Unnamed: 0,date,Medellín,Cali,Soacha,Villavicencio,Pasto,Barranquilla,Bucaramanga,Ibagué,Popayán,Cúcuta,epiweek
0,2014-01-05,35,25,31,66,26,27,23,0,0,19,201402
1,2014-01-12,44,34,22,47,64,21,8,26,32,9,201403
2,2014-01-19,46,40,0,0,16,49,27,24,0,0,201404
3,2014-01-26,41,49,100,0,63,26,32,35,0,43,201405
4,2014-02-02,57,49,0,0,0,40,35,29,0,48,201406
...,...,...,...,...,...,...,...,...,...,...,...,...
500,2023-07-30,59,50,56,41,41,38,57,46,60,23,202331
501,2023-08-06,59,48,47,48,81,45,49,51,84,32,202332
502,2023-08-13,68,44,47,47,43,55,61,64,55,28,202333
503,2023-08-20,54,61,61,44,57,51,53,49,50,31,202334


### Save

In [13]:
merged_df.to_csv('Violence.csv', index=False)