# Analysis of data from the spreadsheet of dead journalists, highlighting Gaza and the years 2023 to 2025.

In [22]:
import pandas as pd

df = pd.read_excel('jornalists_murdered.xlsx')

print(df.head())

                   Nome      Data da Morte  \
0    Abadullah Hananzai     April 30, 2018   
1            Abay Hailu   February 9, 1998   
2  Abd al-Karim al-Ezzo  December 21, 2012   
3        Abdallah Alwan  December 18, 2023   
4    Abdallah Bouhachek  February 10, 1996   

                                  Local da Morte  \
0                                    Afghanistan   
1                                       Ethiopia   
2                                          Syria   
3  Israel and the Occupied Palestinian Territory   
4                                        Algeria   

                                  Organizações        Tipo  \
0  Radio Azadi,Radio Free Europe/Radio Liberty  Journalist   
1                                       Agiere  Journalist   
2                                    Freelance  Journalist   
3    Holy Quran Radio,Midan,Mugtama,Al-Jazeera  Journalist   
4                        Révolution et Travail  Journalist   

                  Causa Motivo Confir

In [23]:
df['Data da Morte'] = pd.to_datetime(df['Data da Morte'])

df['Ano'] = df['Data da Morte'].dt.year
murders_per_year = df['Ano'].value_counts()
murders_per_year.head(5)

Ano
2024    103
2023     82
2009     76
2012     74
2013     74
Name: count, dtype: int64

In [13]:
places_most_deaths = df['Local da Morte'].value_counts()
places_most_deaths

Local da Morte
Iraq                                             193
Israel and the Occupied Palestinian Territory    183
Syria                                            145
Philippines                                       96
Somalia                                           73
                                                ... 
Papua New Guinea                                   1
Lesotho                                            1
Japan                                              1
Ireland                                            1
Lithuania                                          1
Name: count, Length: 106, dtype: int64

In [24]:
mask = (df['Data da Morte'].dt.year >= 2023) & (df['Data da Morte'].dt.year <= 2025)
df_filter = df.loc[mask]

places_most_deaths = df_filter['Local da Morte'].value_counts().reset_index()

places_most_deaths.columns = ['country', 'deaths']

#Salve in CSV
places_most_deaths.to_csv('places_most_death_2023_2025.csv', index=False)

places_most_deaths.head()

Unnamed: 0,country,deaths
0,Israel and the Occupied Palestinian Territory,164
1,Sudan,9
2,Syria,5
3,Lebanon,5
4,India,3


# Filter data only for 2023 and the territory Israel and the Occupied Palestinian Territory (defined by CPJ)

In [26]:
mask_gaza = df['Local da Morte'].str.contains('Gaza|Israel and the Occupied Palestinian Territory', case=False, na=False)

df['Data da Morte'] = pd.to_datetime(df['Data da Morte'], errors='coerce')

start_date = pd.to_datetime('2023-10-07')
end_date = pd.to_datetime('2025-12-31')
mask_date = (df['Data da Morte'] >= start_date) & (df['Data da Morte'] <= end_date)

df_gaza = df.loc[mask_gaza & mask_date, ['Nome', 'Data da Morte']]

df_gaza['Data da Morte'] = df_gaza['Data da Morte'].dt.year

df_gaza_sorted = df_gaza.sort_values('Data da Morte', ascending=True).reset_index(drop=True)

df_gaza_sorted.to_excel('journalists_killed_gaza_2023_2025.xlsx', index=False)

In [27]:
df = pd.read_excel('journalists_killed_gaza_2023_2025.xlsx')

years_count_gaza = df['Data da Morte'].value_counts()
years_count_gaza

Data da Morte
2024    76
2023    65
2025    23
Name: count, dtype: int64