# Região Centro Oeste PA Janeiro de 2023

### Reading the files

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

df_go = pd.read_csv('csv/PAGO2301.csv')
df_go.head()

In [None]:
df_df = pd.read_csv('csv/PADF2301.csv')
df_go.head()

In [None]:
df_ms = pd.read_csv('csv/PAMS2301.csv', encoding='ISO-8859-1')
df_ms.head()

In [None]:
df_mt = pd.read_csv('csv/PAMT2301.csv', encoding='ISO-8859-1')
df_mt.head()

### Merging the data of PA_CIDPRI in one dataset with the 4 states

In [None]:
# Calculate the frequency of each unique value in the PA_CIDPRI column for each dataframe
freq_go = df_go['PA_CIDPRI'].value_counts().reset_index()
freq_go.columns = ['PA_CIDPRI', 'Frequency_GO']

freq_ms = df_ms['PA_CIDPRI'].value_counts().reset_index()
freq_ms.columns = ['PA_CIDPRI', 'Frequency_MS']

freq_mt = df_mt['PA_CIDPRI'].value_counts().reset_index()
freq_mt.columns = ['PA_CIDPRI', 'Frequency_MT']

freq_df = df_df['PA_CIDPRI'].value_counts().reset_index()
freq_df.columns = ['PA_CIDPRI', 'Frequency_DF']

# Step 2: Merge the frequency dataframes on PA_CIDPRI
df_região_centro_oeste = freq_go.merge(freq_ms, on='PA_CIDPRI', how='outer') \
                .merge(freq_mt, on='PA_CIDPRI', how='outer') \
                .merge(freq_df, on='PA_CIDPRI', how='outer')

# Rename columns
df_região_centro_oeste.columns = ['PA_CIDPRI', 'Goiás', 'Mato Grosso do Sul', 'Mato Grosso', 'Distrito Federal']

# Fill NaN with 0s for accurate frequency calculations
df_região_centro_oeste.fillna(0, inplace=True)

# Calculate the Total Frequency
df_região_centro_oeste['Total Frequency'] = df_região_centro_oeste[['Goiás', 'Mato Grosso do Sul', 'Mato Grosso', 'Distrito Federal']].sum(axis=1)

df_região_centro_oeste = df_região_centro_oeste.sort_values(by='Total Frequency', ascending=False)

df_região_centro_oeste.head()

### Frequency of the first 5 rows excluding the 0000

In [None]:
# Ensure PA_CIDPRI is of type string
df_região_centro_oeste['PA_CIDPRI'] = df_região_centro_oeste['PA_CIDPRI'].astype(str)

# Exclude the first row and then retrieve the top 5 rows
top5_excluding_first = df_região_centro_oeste.iloc[1:6]

# Now plotting the bar chart
plt.figure(figsize=(10, 6))
plt.bar(top5_excluding_first['PA_CIDPRI'], top5_excluding_first['Total Frequency'], color='skyblue')
plt.title('Top 5 PA_CIDPRI Codes by Total Occurrences (Excluding the 0000)')
plt.xlabel('PA_CIDPRI Code')
plt.ylabel('Total Occurrences')
plt.xticks(rotation=45)
plt.show()

### Pie Chart of the N180

In [None]:
from matplotlib import pyplot as plt

# Filter out the 'N180' row
df_n180_row = df_região_centro_oeste[df_região_centro_oeste['PA_CIDPRI'] == 'N180']

# Drop 'PA_CIDPRI' and 'Total Frequency' columns
df_n180_data = df_n180_row.drop(columns=['PA_CIDPRI', 'Total Frequency'])

# Now we can plot the pie chart
fig, ax = plt.subplots()
ax.pie(df_n180_data.iloc[0], labels=df_n180_data.columns, autopct='%1.1f%%', startangle=90)
ax.axis('equal')  # Equal aspect ratio ensures that pie chart is drawn as a circle.

# Set the title of the pie chart
plt.title('Pie Chart for N180')

# Display the pie chart
plt.show()

In [None]:
# Filter each DataFrame for 'N180' in 'PA_CIDPRI'
filtered_df = df_df[df_df['PA_CIDPRI'] == 'N180']
filtered_go = df_go[df_go['PA_CIDPRI'] == 'N180']
filtered_ms = df_ms[df_ms['PA_CIDPRI'] == 'N180']
filtered_mt = df_mt[df_mt['PA_CIDPRI'] == 'N180']

# Calculate the frequency of 'PA_IDADE' for each filtered DataFrame
freq_df = filtered_df['PA_IDADE'].value_counts().rename('DF')
freq_go = filtered_go['PA_IDADE'].value_counts().rename('GO')
freq_ms = filtered_ms['PA_IDADE'].value_counts().rename('MS')
freq_mt = filtered_mt['PA_IDADE'].value_counts().rename('MT')

# Merge the frequencies into a single DataFrame
freq_merged = pd.concat([freq_df, freq_go, freq_ms, freq_mt], axis=1, sort=False)

# Fill NaN values with 0 to indicate no occurrences for that age in a specific dataset
freq_merged = freq_merged.fillna(0).astype(int)

# Resetting the index to make the current index a column
freq_merged = freq_merged.reset_index()

# Renaming the new column to 'Age'
freq_merged.rename(columns={'index': 'Age'}, inplace=True)

freq_merged.head()

### Calculating weighted average age for each state

In [None]:
weights = freq_merged['Age']
df_average = (freq_merged['DF'] * weights).sum() / freq_merged['DF'].sum()
go_average = (freq_merged['GO'] * weights).sum() / freq_merged['GO'].sum()
ms_average = (freq_merged['MS'] * weights).sum() / freq_merged['MS'].sum()
mt_average = (freq_merged['MT'] * weights).sum() / freq_merged['MT'].sum()

# Consolidating the averages into a dictionary for easier plotting
state_averages = {
    'DF': df_average,
    'GO': go_average,
    'MS': ms_average,
    'MT': mt_average
}

### Plotting the bar chart of weighted average ages

In [None]:
states = list(state_averages.keys())
averages = list(state_averages.values())

plt.figure(figsize=(10, 6))
plt.bar(states, averages, color=['blue', 'green', 'red', 'orange'])

plt.title('Weighted Average Age for N180 Diagnosis by State')
plt.xlabel('State')
plt.ylabel('Weighted Average Age')
plt.xticks(states)

for i, avg in enumerate(averages):
    plt.text(i, avg + 0.5, f"{avg:.2f}", ha = 'center')

plt.show()

### Exporting to a csv file

In [None]:
# Specify the file name
file_name = 'regiao_centro_oeste_pa.csv'

# Export the DataFrame to a CSV file
df_região_centro_oeste.to_csv(file_name, index=False)  # Set index=False to avoid writing row indices

print(f"DataFrame exported to {file_name}")