<a href="https://colab.research.google.com/github/jjsantanna/conti_ransomware_psy_analysis/blob/master/Thesis_Beatrice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [33]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

from google.colab import files

import time
import os

## Downloading and preparing the data

In [34]:
# Downloading the translated data
df_2020 = pd.read_csv('https://github.com/NorthwaveSecurity/complete_translation_leaked_chats_conti_ransomware/blob/main/jabber_chat_2020_translated.csv?raw=true',index_col=0)
df_2021 = pd.read_csv('https://github.com/NorthwaveSecurity/complete_translation_leaked_chats_conti_ransomware/blob/main/jabber_chat_2021_2022_translated.csv?raw=true',index_col=0)

# Merging the data from 2020 and 2021+2022
df = pd.concat([df_2020,df_2021]).reset_index(drop=True)

# Converting the time column
df['ts'] = pd.to_datetime(df['ts'])

# Setting the time column as the new index
df = df.set_index('ts', drop=True) 

# Keeping only some columns
df = df[['from','to','body_en']]

# Remove the domain name from names
df['from'] = df['from'].str.replace('@q3mcco35auwcstmt.onion','')
df['to']= df['to'].str.replace('@q3mcco35auwcstmt.onion','')

# Replacing some html characters to ascii
df = df.replace({'&#45':'-', 
                 '&#39':"'",
                 '&#38':'&',
                 '&amp':'&'}, regex=True)

  df['from'] = df['from'].str.replace('@q3mcco35auwcstmt.onion','')
  df['to']= df['to'].str.replace('@q3mcco35auwcstmt.onion','')


# Defining the function "Beatrice Methodology" to filter the data based on (1) theme, (2) code and (3) words/phrases in a list.

In [35]:
def beatrice_methodology (theme, code, filter_list):  
  for filter in filter_list:
    df_result = df[df['body_en'].str.contains(filter,case=False)]
    
    theme = theme.replace(' ','.')
    code = code.replace(' ','.')
    filter = filter.replace(' ','.')
    file_name = theme+'_'+code+'_'+filter+'.xlsx'
    df_result.to_excel(file_name)
    
    # Download the Excel file to your local machine
    files.download(file_name)
    while not os.path.exists(file_name):
      time.sleep(1)
      
    print('-',len(df_result),'conversations found for the filter: "'+filter.replace('.',' ')+'". Those records are available in the file '+theme+'_'+code+'_'+filter+'.xlsx')
    print(df_result['from'].value_counts())
    print()

# Displaying a Form to facilitade the input of paramethers

In [36]:
import ipywidgets as widgets
from IPython.display import display

# Define the function to be called when the button is clicked
def on_button_clicked(b):
    if theme.value and code.value and filter_list.value:
        filter_list_values = filter_list.value.split('\n')
        beatrice_methodology(theme.value, code.value, filter_list_values)
    else:
        print('Please fill in all fields.')

# Create the input widgets
theme = widgets.Text(description='Theme:')
code = widgets.Text(description='Code:')
filter_list = widgets.Textarea(description='Filter(s):', layout={'height': '150px'})
button = widgets.Button(description='Run Function')

# Create two rows of input widgets
input_row1 = widgets.HBox([theme])
input_row2 = widgets.HBox([code])
input_row3 = widgets.HBox([filter_list])

# Create the button and attach the on_button_clicked function
button = widgets.Button(description='Run Function', style={'button_color': 'gray'})
button.on_click(on_button_clicked)

# Create a VBox layout to arrange the widgets vertically
input_box = widgets.VBox([input_row1, input_row2, input_row3])
output_box = widgets.Output()
display(widgets.VBox([input_box, button, output_box]))


VBox(children=(VBox(children=(HBox(children=(Text(value='', description='Theme:'),)), HBox(children=(Text(valu…

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

- 22 conversations found for the filter: "I believe". Those records are available in the file communication.style_assertive_I.believe.xlsx
target       6
bloodrush    3
azot         2
driver       2
van          1
price        1
professor    1
baget        1
revers       1
stern        1
deploy       1
best         1
pumba        1
Name: from, dtype: int64



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

- 33 conversations found for the filter: "I feel". Those records are available in the file communication.style_assertive_I.feel.xlsx
stern        6
target       3
bio          3
baget        2
professor    2
bill         2
kevin        2
bloodrush    2
pumba        2
tom          2
many         2
mors         1
dandis       1
bentley      1
terry        1
mango        1
Name: from, dtype: int64



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

- 0 conversations found for the filter: "I'm confident". Those records are available in the file communication.style_assertive_I'm.confident.xlsx
Series([], Name: from, dtype: int64)

