This take the general dataset of court martials, or an edited dataset which contains the following columns: Year, Guilty of Violent Offence, Guilty of Sex offence, A-Violent Charges, B-Sex offence charges.

In [10]:
import pandas as pd

# Load the data to see the first few rows and column names
file_path = 'GeneralDataset2010-2023.csv'
data = pd.read_csv(file_path, header=1)

# Display the first few rows and the column names to understand the structure of the data
data.head(), data.columns

(  Reference number       Rank Service Unit Trial Court Sentencing Date  \
 0              NaN  Signaller    Army  NaN  Colchester       07-Jan-10   
 1              NaN    Colonel    Army  NaN  Sennelager       08-Jan-10   
 2              NaN  Guardsman    Army  NaN  Colchester       13-Jan-10   
 3              NaN  Signaller    Army  NaN  Colchester       13-Jan-10   
 4              NaN  Guardsman    Army  NaN  Colchester       21-Jan-10   
 
    Year of Sentencing                                         Charge (s)  \
 0                2010  2 x Battery, 1 x Assault Occasioning Actual Bo...   
 1                2010  5 x Obtaining a money transfer by deception, 6...   
 2                2010                          1 x Absence Without Leave   
 3                2010                          1 x Absence Without Leave   
 4                2010           1 x Desertion, 1 x Absence Without Leave   
 
   Act charged under                      Finding  ...      G - Alcohol  \
 0       

In [11]:
columns_to_keep = ['Year of Sentencing', 'Guilty of violent offence', 'Guilty of sex offence', 'A - Violent charges', 'B - Sex offence charges']
filtered_data = data[columns_to_keep]

# Print or continue with further analysis...
print(filtered_data)

      Year of Sentencing Guilty of violent offence Guilty of sex offence  \
0                   2010                       Yes                    No   
1                   2010                       NaN                    No   
2                   2010                       NaN                    No   
3                   2010                       NaN                    No   
4                   2010                       NaN                    No   
...                  ...                       ...                   ...   
6066                2023                        No                    No   
6067                2023                       Yes                    No   
6068                2023                       Yes                   NaN   
6069                2023                       NaN                   NaN   
6070                2023                       Yes                   Yes   

     A - Violent charges B - Sex offence charges  
0               Contains         Doe

In [16]:
violent_charges = filtered_data[filtered_data['A - Violent charges'] == 'Contains']

# Filter the rows where 'Guilty of violent offence' contains 'Yes'
violent_convictions = filtered_data[filtered_data['Guilty of violent offence'] == 'Yes']

sexoffence_charges = filtered_data[filtered_data['B - Sex offence charges'] == 'Contains']
sexoffence_convictions = filtered_data[filtered_data['Guilty of sex offence'] == 'Yes']

# Group by 'Year of Sentencing' and count the occurrences
violent_charges_counts = violent_charges.groupby('Year of Sentencing').size()
violent_convictions_counts = violent_convictions.groupby('Year of Sentencing').size()

sexoffence_charges_counts = sexoffence_charges.groupby('Year of Sentencing').size()
sexoffence_convictions_counts = sexoffence_convictions.groupby('Year of Sentencing').size()

# Combine the counts into a single DataFrame for comparison
violent_counts_by_year = pd.DataFrame({
    'Violent Charges': violent_charges_counts,
    'Violent Convictions': violent_convictions_counts
}).fillna(0)  # Fill NaN with 0 where no data is present for a year

sexoffence_counts_by_year = pd.DataFrame({
    'Sex Offence Charges': sexoffence_charges_counts,
    'Sex offence Convictions': sexoffence_convictions_counts
}).fillna(0)  # Fill NaN with 0 where no data is present for a year

print(violent_counts_by_year)
print(sexoffence_counts_by_year)

                    Violent Charges  Violent Convictions
Year of Sentencing                                      
2010                            170                   99
2011                            215                  155
2012                            184                  124
2013                            263                  152
2014                            249                  150
2015                            235                  162
2016                            239                  154
2017                            200                  117
2018                            203                  127
2019                            246                  156
2020                            181                  124
2021                            206                  109
2022                            197                  106
2023                            140                   72
                    Sex Offence Charges  Sex offence Convictions
Year of Sentencing     

In [28]:
violent_counts_by_year['Violent Conviction Rate'] = (violent_counts_by_year['Violent Convictions'] / violent_counts_by_year['Violent Charges']) * 100

violent_counts_by_year['Violent Conviction Rate']

Year of Sentencing
2010    58.235294
2011    72.093023
2012    67.391304
2013    57.794677
2014    60.240964
2015    68.936170
2016    64.435146
2017    58.500000
2018    62.561576
2019    63.414634
2020    68.508287
2021    52.912621
2022    53.807107
2023    51.428571
Name: Violent Conviction Rate, dtype: float64

In [37]:
sexoffence_counts_by_year['Sex Offence Conviction Rate'] = (sexoffence_counts_by_year['Sex offence Convictions'] / sexoffence_counts_by_year['Sex Offence Charges']) * 100

sexoffence_counts_by_year['Sex Offence Conviction Rate']

Year of Sentencing
2010    68.965517
2011    85.294118
2012    63.333333
2013    60.000000
2014    55.319149
2015    64.444444
2016    61.818182
2017    51.851852
2018    57.894737
2019    42.105263
2020    60.784314
2021    50.561798
2022    39.423077
2023    56.410256
Name: Sex Offence Conviction Rate, dtype: float64

In [44]:
violent_conviction_rate_df = violent_counts_by_year['Violent Conviction Rate'].to_frame(name='Violent Conviction Rate').round(2)
sex_offence_conviction_rate_df = sexoffence_counts_by_year['Sex Offence Conviction Rate'].to_frame(name='Sex Offence Conviction Rate').round(2)

# Reset the index to turn it into a column
violent_conviction_rate_df.reset_index(inplace=True)
sex_offence_conviction_rate_df.reset_index(inplace=True)

# Merge the two DataFrames on 'Year of Sentencing'
combined_conviction_rates_df = pd.merge(violent_conviction_rate_df, sex_offence_conviction_rate_df, on='Year of Sentencing')

combined_conviction_rates_df.head()

Unnamed: 0,Year of Sentencing,Violent Conviction Rate,Sex Offence Conviction Rate
0,2010,58.24,68.97
1,2011,72.09,85.29
2,2012,67.39,63.33
3,2013,57.79,60.0
4,2014,60.24,55.32


In [48]:
long_format_df = pd.melt(combined_conviction_rates_df, id_vars=['Year of Sentencing'],
                         value_vars=['Violent Conviction Rate', 'Sex Offence Conviction Rate'],
                         var_name='category', value_name='value')

long_format_df.rename(columns={'Year of Sentencing': 'year'}, inplace=True)


print(long_format_df.head())

   year                 category  value
0  2010  Violent Conviction Rate  58.24
1  2011  Violent Conviction Rate  72.09
2  2012  Violent Conviction Rate  67.39
3  2013  Violent Conviction Rate  57.79
4  2014  Violent Conviction Rate  60.24


In [49]:
long_format_df.to_csv('long_format_conviction_rates.csv', index=False)
print("Data exported successfully to 'long_format_conviction_rates.csv'")

Data exported successfully to 'long_format_conviction_rates.csv'
