In [286]:
#imports
import pandas as pd
import numpy as np

#setups
pd.set_option('display.max_rows', None)

In [287]:
#reading our df with all the data we scraped previously
df = pd.read_csv('all_incidents_cleaned.csv')

In [288]:
#checking our df's head
df.head()

Unnamed: 0,conclusion_number,conclusion_date,incident_date,incident_type,incident_description,fatalities,fatalities_description,area,registry,aircraft_type,...,deaths_crew_members,seriously_injured_crew_members,passengers,deaths_passengers,seriously_injured_passengers,other_deaths,other_injuries,aircraft_type2,Year,percentage_deaths_to_all_incidents
0,06/2022,2022-11-23,2017-09-22,ΑΤΥΧΗΜΑ,Συντριβή υπερελαφρού αεροπλάνου στην περιοχή Κ...,Θανάσιμος τραυματισμός,Πλήρωμα=1 Επιβάτες...,Περιοχη Κερασια Ροδοπης,UR-STAS,Αεροπλάνο,...,1.0,0.0,1.0,1.0,0.0,0.0,0.0,Αεροπλάνο,2017,25.0
1,05/2022,2022-10-11,2013-08-11,ΑΤΥΧΗΜΑ,Ελαφρύ αεροπλάνο κατά τη διάρκεια της προσγείω...,Χωρίς Τραυματισμό,Πλήρωμα=2 Επιβάτες...,Αεροδρομιο Μεγαρων Lgmg,SX-AGE,Αεροπλάνο,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Αεροπλάνο,2013,22.2
2,E01/2022,2022-10-27,2019-05-28,ΑΤΥΧΗΜΑ,Χειριστής αλεξιπτώτου πλαγιάς μετά από σύντομη...,Σοβαρός τραυματισμός,Πλήρωμα=1 Επιβάτες...,Στη Περιοχη Της Ανατολικης Παραλιας Καλαματας,-,Αλεξίπτωτο,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Αλεξίπτωτο,2019,16.7
3,04/2022,2022-09-06,2019-08-20,ΑΤΥΧΗΜΑ,Ελικόπτερο συνετρίβη στη θαλάσσια περιοχή μετα...,Θανάσιμος τραυματισμός,Πλήρωμα=1 Επιβάτες...,Θαλλασιος Διαυλος Πορου-Γαλατα,,Ελικόπτερο,...,1.0,0.0,2.0,2.0,0.0,0.0,0.0,Ελικόπτερο,2019,21.4
4,03/2022,2022-02-06,2020-07-16,ΑΤΥΧΗΜΑ,Xειριστής αλεξιπτώτου πλαγιάς που απογειώθηκε ...,Θανάσιμος τραυματισμός,Πλήρωμα=1 Επιβάτες...,Καλυμπακι Αγιου Θωμα Βοιωτιας,,Αλεξίπτωτο,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,Αλεξίπτωτο,2020,11.1


In [289]:
#So, in order to catch only the data we need for the visualization, we are gonna create a new, advanced DataFrame, which will contain the time in comparison with the amount of fatal_injuries,minor_injuries,serious_injuries and the total number of incidents per year 
#For starters we are gonna create a column named 'Year'
df['Year'] = pd.to_datetime(df.incident_date).dt.year
fatalities = df.groupby('Year').fatalities.value_counts().unstack().reset_index()

#creating an additional column with the total number of incidents
fatalities['all_incidents'] = fatalities[['Θανάσιμος τραυματισμός', 'Μικρός τραυματισμός','Σοβαρός τραυματισμός', 'Χωρίς Τραυματισμό']].sum(axis=1)

#getting the percentage of the fatal incidents 
df['percentage_deaths_to_all_incidents'] = round((fatalities['Θανάσιμος τραυματισμός']/fatalities['all_incidents'])*100,1)


In [290]:
#setting fatalities as a subunit of the original Dataframe: df1
df1 = fatalities
fatalities_pd = pd.DataFrame(fatalities)

#taking a look at what we have done so far
df1

fatalities,Year,Θανάσιμος τραυματισμός,Μικρός τραυματισμός,Σοβαρός τραυματισμός,Χωρίς Τραυματισμό,all_incidents
0,2000,1.0,,,3.0,4.0
1,2001,2.0,,2.0,5.0,9.0
2,2002,2.0,1.0,2.0,7.0,12.0
3,2003,3.0,1.0,4.0,6.0,14.0
4,2004,2.0,2.0,5.0,9.0,18.0
5,2005,1.0,1.0,1.0,3.0,6.0
6,2006,1.0,2.0,1.0,7.0,11.0
7,2007,1.0,1.0,3.0,12.0,17.0
8,2008,,3.0,1.0,8.0,12.0
9,2009,3.0,,,4.0,7.0


In [291]:
#renaming the greek titles with english ones for more functionality
fatalities_pd.rename(columns = {'Θανάσιμος τραυματισμός':'fatal_injuries','Μικρός τραυματισμός': 'minor_injuries', 'Σοβαρός τραυματισμός' :'serious_injuries', 'Χωρίς Τραυματισμό' : 'no_injuries'}, inplace = True)
df1

fatalities,Year,fatal_injuries,minor_injuries,serious_injuries,no_injuries,all_incidents
0,2000,1.0,,,3.0,4.0
1,2001,2.0,,2.0,5.0,9.0
2,2002,2.0,1.0,2.0,7.0,12.0
3,2003,3.0,1.0,4.0,6.0,14.0
4,2004,2.0,2.0,5.0,9.0,18.0
5,2005,1.0,1.0,1.0,3.0,6.0
6,2006,1.0,2.0,1.0,7.0,11.0
7,2007,1.0,1.0,3.0,12.0,17.0
8,2008,,3.0,1.0,8.0,12.0
9,2009,3.0,,,4.0,7.0


In [292]:
#replacing the NaN values with 0
df1 = df1.fillna(0)
df1

fatalities,Year,fatal_injuries,minor_injuries,serious_injuries,no_injuries,all_incidents
0,2000,1.0,0.0,0.0,3.0,4.0
1,2001,2.0,0.0,2.0,5.0,9.0
2,2002,2.0,1.0,2.0,7.0,12.0
3,2003,3.0,1.0,4.0,6.0,14.0
4,2004,2.0,2.0,5.0,9.0,18.0
5,2005,1.0,1.0,1.0,3.0,6.0
6,2006,1.0,2.0,1.0,7.0,11.0
7,2007,1.0,1.0,3.0,12.0,17.0
8,2008,0.0,3.0,1.0,8.0,12.0
9,2009,3.0,0.0,0.0,4.0,7.0


In [293]:
#merging the two columns named "minor_injuries" and "serious_injuries" into one named "total_injuries", as we will need the total number of incidents that contained injuries ingeneral
df1["total_injuries"]=df1["minor_injuries"]+df1["serious_injuries"]

In [294]:
#checking the type of our data
display(df1.dtypes)

fatalities
Year                  int64
fatal_injuries      float64
minor_injuries      float64
serious_injuries    float64
no_injuries         float64
all_incidents       float64
total_injuries      float64
dtype: object

In [295]:
#Due to our data being in float form, we need to turn them into integers(except the data in 'Year' column, as they already are integers)
df1['minor_injuries'] = df1['minor_injuries'].apply(np.int64)
df1['serious_injuries'] = df1['serious_injuries'].apply(np.int64)
df1['fatal_injuries'] = df1['fatal_injuries'].apply(np.int64)
df1['all_incidents'] = df1['all_incidents'].apply(np.int64)
df1['total_injuries'] = df1['total_injuries'].apply(np.int64)
df1['no_injuries'] = df1['no_injuries'].apply(np.int64)

#taking a look at our df's form so far
df1

fatalities,Year,fatal_injuries,minor_injuries,serious_injuries,no_injuries,all_incidents,total_injuries
0,2000,1,0,0,3,4,0
1,2001,2,0,2,5,9,2
2,2002,2,1,2,7,12,3
3,2003,3,1,4,6,14,5
4,2004,2,2,5,9,18,7
5,2005,1,1,1,3,6,2
6,2006,1,2,1,7,11,3
7,2007,1,1,3,12,17,4
8,2008,0,3,1,8,12,4
9,2009,3,0,0,4,7,0


In [296]:
#checking again just to be sure
df1.dtypes


fatalities
Year                int64
fatal_injuries      int64
minor_injuries      int64
serious_injuries    int64
no_injuries         int64
all_incidents       int64
total_injuries      int64
dtype: object

In [299]:
#saving our last df as a .csv file
df1.to_csv('fatalities.csv',index=False)
