# 4.3. Data Cleaning - source Feodo Tracker

In [1]:
import datetime
import os
import time
import pandas as pd 
import requests
import urllib3
import json
import sys
import numpy as np
import math

pd.options.display.max_colwidth = 1000
pd.set_option('display.max_columns', None)

In [2]:
entries  = r"C:\\Users\\leona\\OneDrive\\Desktop\\Tesi\\Pipeline\\Datasets\\Pipeline\\IocSegnalations\\PostDataStructurization\\Feodo_Tracker\\Entries.csv"
malwares = r"C:\\Users\\leona\\OneDrive\\Desktop\\Tesi\\Pipeline\\Datasets\\Pipeline\\IocSegnalations\\PostDataStructurization\\Feodo_Tracker\\Malwares.csv"

## Entries - Feodo Tracker
### Data Cleaning

#### Normalization


In [3]:
entries_df = pd.read_csv(entries, low_memory = False )
entries_df

Unnamed: 0,ID_ENTRY,source,ioc,ioc_type,threat_type,malware,first_seen,last_seen,reporter,reference
0,0,Feodo Tracker,89.101.97.139:443,ip:port,botnet,QakBot,2021-09-29 08:42:47,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
1,1,Feodo Tracker,41.228.22.180:443,ip:port,botnet,QakBot,2021-09-29 08:42:51,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
2,2,Feodo Tracker,144.139.47.206:443,ip:port,botnet,QakBot,2021-09-30 21:25:38,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
3,3,Feodo Tracker,41.86.42.158:995,ip:port,botnet,QakBot,2021-10-08 14:48:30,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
4,4,Feodo Tracker,63.143.92.99:995,ip:port,botnet,QakBot,2021-10-08 14:48:31,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
...,...,...,...,...,...,...,...,...,...,...
333,333,Feodo Tracker,41.97.47.7:443,ip:port,botnet,QakBot,2023-08-24 23:29:43,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
334,334,Feodo Tracker,92.9.44.234:2222,ip:port,botnet,QakBot,2023-08-25 01:29:36,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
335,335,Feodo Tracker,78.152.198.132:443,ip:port,botnet,QakBot,2023-08-25 10:08:52,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
336,336,Feodo Tracker,113.193.95.229:443,ip:port,botnet,QakBot,2023-08-25 11:24:40,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/


In [4]:
def prepare_for_similarity_comparison(df, col_i, col_f):
    
    for i in range(len(df)):
        
        value = df.at[i,col_i]

        if (not isinstance(value, float) or not math.isnan(value)):    
            
            value = ' ' + value + ' ' 
            value = value.lower() 
            value = value.replace('?', ' ').replace('!', ' ').replace('%', ' ').replace('.', ' ').replace('-', ' ').replace('[' , ' ').replace( ']', ' ').replace('(' , ' ').replace( ')', ' ').replace('\\',' ').replace( ',', ' ').replace( '_', ' ') 
            value = value.strip() 
        
        df.loc[i, col_f] = value
            
    return df

In [5]:
Feodo_Tracker_ENTRIES_df = prepare_for_similarity_comparison(entries_df, 'malware', 'malware')
Feodo_Tracker_ENTRIES_df

Unnamed: 0,ID_ENTRY,source,ioc,ioc_type,threat_type,malware,first_seen,last_seen,reporter,reference
0,0,Feodo Tracker,89.101.97.139:443,ip:port,botnet,qakbot,2021-09-29 08:42:47,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
1,1,Feodo Tracker,41.228.22.180:443,ip:port,botnet,qakbot,2021-09-29 08:42:51,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
2,2,Feodo Tracker,144.139.47.206:443,ip:port,botnet,qakbot,2021-09-30 21:25:38,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
3,3,Feodo Tracker,41.86.42.158:995,ip:port,botnet,qakbot,2021-10-08 14:48:30,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
4,4,Feodo Tracker,63.143.92.99:995,ip:port,botnet,qakbot,2021-10-08 14:48:31,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
...,...,...,...,...,...,...,...,...,...,...
333,333,Feodo Tracker,41.97.47.7:443,ip:port,botnet,qakbot,2023-08-24 23:29:43,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
334,334,Feodo Tracker,92.9.44.234:2222,ip:port,botnet,qakbot,2023-08-25 01:29:36,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
335,335,Feodo Tracker,78.152.198.132:443,ip:port,botnet,qakbot,2023-08-25 10:08:52,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/
336,336,Feodo Tracker,113.193.95.229:443,ip:port,botnet,qakbot,2023-08-25 11:24:40,2023-08-25,Feodo Tracker blocklist,https://feodotracker.abuse.ch/


## Malwares - Feodo Tracker
### Data Cleaning

#### Normalization


In [6]:
malwares_df = pd.read_csv(malwares, low_memory = False )
malwares_df

Unnamed: 0,malware,type
0,QakBot,unknown
1,Pikabot,unknown


In [7]:
Feodo_Tracker_MALWARES_df = prepare_for_similarity_comparison(malwares_df, 'malware', 'malware')
Feodo_Tracker_MALWARES_df

Unnamed: 0,malware,type
0,qakbot,unknown
1,pikabot,unknown


# SAVE into ..\IocSegnalations\PostDataCleaning\Threat_Fox\ folder


In [8]:
dataframes_dict = {}

# Add the DataFrames to the dictionary with keys
dataframes_dict['Entries']  = Feodo_Tracker_ENTRIES_df
dataframes_dict['Malwares'] = Feodo_Tracker_MALWARES_df


In [9]:
save_directory = "C:\\Users\\leona\\OneDrive\\Desktop\\Tesi\\Pipeline\\Datasets\\Pipeline\\IocSegnalations\\PostDataCleaning\\Feodo_Tracker\\"
for key, df in dataframes_dict.items():
    file_name = f"{key}.csv"
    file_path = save_directory + file_name
    df.to_csv(file_path, index=False)