# Dependencies


## Installation


In [None]:
%pip install pip --upgrade -q
%pip install pandas -q
%pip install numpy -q
%pip install matplotlib -q
%pip install folium -q
%pip install geopy -q
%pip install geopandas -q

## Importing


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import geopandas as gpd

# Obtaining Psychiatric hospitalization data


In [None]:
SUS_DATA_FILE_PATH = "data/sus/mental_diseases_jan2024-dec2024/sih_cnv_nibr102443191_53_138_174.csv"
# Read the CSV file
raw_data = pd.read_csv(SUS_DATA_FILE_PATH, encoding="latin1", sep=";",header= 4, skipfooter=6, engine='python')
raw_data.columns = ["municipality", "hospitalizations"]

In [None]:
# Display the first 5 rows of the DataFrame
raw_data.head(5)


In [None]:
# Display the last 5 rows of the DataFrame
raw_data.tail(5)

In [None]:
# Separating IBGE code into municipality and state codes
# The IBGE code is a unique code for each municipality in Brazil
# The first two digits are the state code and the last four digits are the municipality code
# For example, the IBGE code for São Paulo is 3550308
# The first two digits are 35 (São Paulo state) and the last four digits are 0308 (São Paulo municipality)

ibge_code = raw_data["municipality"].apply(lambda x: x.split()[0].strip())
ibge_municipality_state_code = ibge_code.apply(lambda x: x[:2])
ibge_municipality_code = ibge_code.apply(lambda x: x[2:])

In [None]:
# Creating a new DataFrame with the relevant columns
sus_data = pd.DataFrame({
    "IBGE-code": ibge_code.astype(str),
    "IBGE-state-code": ibge_municipality_state_code.astype(str),
    "IBGE-municipality-code": ibge_municipality_code.astype(str),
    "internacoes": raw_data["hospitalizations"].astype(int),

})
# Preview the data
sus_data.head(5)

In [None]:
# Saving the processed data to a CSV file
GENERATED_SUS_DATA_FILE_PATH = "./data/generated/sus/mental_diseases_jan2024-dec2024.csv"
sus_data.to_csv(GENERATED_SUS_DATA_FILE_PATH, index=False, sep=";", encoding="utf-8")