# Scraping Date: 13/02/2020

### Imports

In [3]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import json

import re
from googletrans import Translator
from langdetect import detect
from selenium.webdriver.chrome.options import Options  
from selenium import webdriver


pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)

### Main declarations

In [5]:
# Import Nationality csv (thnx https://github.com/knowitall/chunkedextractor/blob/master/src/main/resources/edu/knowitall/chunkedextractor/demonyms.csv)
nat = pd.read_csv('demonyms.csv', header=None, index_col=0).squeeze().to_dict()

In [6]:
json_start = 'https://sherloc.unodc.org/cld/v3/sherloc/cldb/data.json?lng=en&criteria=%7B%22filters%22:%5B%5D,%22startAt%22:'
json_end = ',%22sortings%22:%22%22%7D'

In [7]:
chrome_options = Options()  
chrome_options.add_argument("--headless")  

driver = webdriver.Chrome(executable_path='C:/Users/jason/centralenv/Need_to_PATH/chromedriver.exe', options=chrome_options)

In [22]:
df = pd.DataFrame(columns = ['ID','Page_Title','Fact_Summary','URL','Trafficking_Persons','Cyber_Crime','Drug','Money_Laundering','Criminal_group','Migrant_Smuggling','Wild_life','Terrorism','Falsified_Medicine','Trafficking_Cultural_Property','Counterfeiting','Other','Piracy','Illicit_Firearms','Corruption','Justice_Obstruction','Language','Cross_Cutting','Victim_Name','Victim_Gender','Victim_Nationality','Victim_Age','Victim_Born','Defendant_Name','Defendant_Gender','Defendant_Nationality','Defendant_Age','Defendant_Born','Verdict_Year','Verdict_Month','Sentenced_Year','Sentenced_Month'])
df

Unnamed: 0,ID,Page_Title,Fact_Summary,URL,Trafficking_Persons,Cyber_Crime,Drug,Money_Laundering,Criminal_group,Migrant_Smuggling,Wild_life,Terrorism,Falsified_Medicine,Trafficking_Cultural_Property,Counterfeiting,Other,Piracy,Illicit_Firearms,Corruption,Justice_Obstruction,Language,Cross_Cutting,Victim_Name,Victim_Gender,Victim_Nationality,Victim_Age,Victim_Born,Defendant_Name,Defendant_Gender,Defendant_Nationality,Defendant_Age,Defendant_Born,Verdict_Year,Verdict_Month,Sentenced_Year,Sentenced_Month


In [23]:
def ret_types(typeList):
    ret = {}
    
    for i in ['Trafficking_Persons','Cyber_Crime','Drug','Money_Laundering','Criminal_group','Migrant_Smuggling','Wild_life','Terrorism','Falsified_Medicine','Trafficking_Cultural_Property','Counterfeiting','Other','Piracy','Illicit_Firearms','Corruption','Justice_Obstruction']:
        ret[i] = 0
        
    if typeList:     
        for i in typeList:
            if i == 'Cybercrime':
                ret['Cyber_Crime'] = 1

            elif i == 'Money laundering':
                ret['Money_Laundering'] = 1

            elif i == 'Trafficking in persons':
                ret['Trafficking_Persons'] = 1

            elif i == 'Participation in an organized criminal group':
                ret['Criminal_group'] = 1

            elif i == 'Smuggling of migrants':
                ret['Migrant_Smuggling'] = 1

            elif i == 'Trafficking in cultural property':
                ret['Trafficking_Cultural_Property'] = 1

            elif i == 'Falsified medical products':
                ret['Falsified_Medicine'] = 1

            elif i == 'Drug offences':
                ret['Drug'] = 1

            elif i == 'Obstruction of Justice':
                ret['Justice_Obstruction'] = 1

            elif i == 'Piracy and Maritime crime':
                ret['Piracy'] = 1

            elif i == 'Trafficking in firearms':
                ret['Illicit_Firearms'] = 1

            elif i == 'Crimes that affect the environment':
                ret['Environmental'] = 1
                
            elif i == 'Other Crimes':
                ret['Other'] = 1

            else:
                ret[i] = 1

    return ret

### Scrape the data

In [24]:
# for l in range(900,910,10):
# for l in range(0,3080,10):
bugs = []
for l in range(0,3240,10):
    json_link = json_start + str(l) + json_end
    
    response = requests.get(json_link).json()
    cases = response['results']
    
    for case in cases:
        index = len(df)
        # Get Data from JSON RESPONSE        
        df.at[index,'URL'] =  'https://sherloc.unodc.org/cld' + case['uri']
                
        try:
            df.at[index,'ID'] = case['values']['caseLaw@unodcNo_s1']
            print(round(df.ID.nunique()/3078*100,3),' %')

        except:
            df.at[index,'ID'] = ''
            
        try:
            ver_date = case['values']['caseLaw@decisionVerdictDate_s1'].split('-')
        except:
            df.at[index,'Verdict_Year'] = ''
            df.at[index,'Verdict_Month'] = ''
        else:
            df.at[index,'Verdict_Year'] = ver_date[0]
            try:
                df.at[index,'Verdict_Month'] = ver_date[1]
            except:
                df.at[index,'Verdict_Month'] = ''
        try:
            # Hit and get page
            driver.get(df.iloc[index]['URL'])
            soup = BeautifulSoup(driver.page_source,"html.parser")
        except:
            bugs.append(df.iloc[index]['URL'])
        else:
            # Get Sentence Date
            try:
                sent_date = soup.find('div',{'class': 'sentencedDate field'}).text.split(' ')[2].split('-')
            except:
                df.at[index,'Sentenced_Year'] = ''
                df.at[index,'Sentenced_Month'] = ''
            else:
                df.at[index,'Sentenced_Year'] = sent_date[0]
                try:
                    df.at[index,'Sentenced_Month'] = sent_date[1]  
                except:
                    df.at[index,'Sentenced_Month'] = ''

            # Get Title
            try:
                df.at[index,'Page_Title'] = soup.find('span',{'class':'title'}).text
            except: 
                df.at[index,'Page_Title'] = ''


            # Get Country
            try:
                df.at[index,'Country'] = soup.find('span',{'class':'text'}).text
            except: 
                df.at[index,'Country'] = ''

            # Get Crime Types
            try:
                 types = [item.text for item in soup.find_all('h4',{'class':'media-heading'})]
            except:
                types = []

            for key,val in ret_types(types).items():
                df.at[index,key] = val

            # Get Summary
            sum_id = ''
            summary = soup.find('div',{'class':'factSummary'})
            if summary:
                # Check if many languages and take English
                tabs = summary.find_all('ul')
                if len(tabs)>0:
                    for item in tabs[0].find_all('li'):
                        if item.text == 'English':
                            sum_id = item.find('a')['href'][1:]
                            break

                if sum_id != '':
                    df.at[index,'Fact_Summary'] = re.sub('[^A-Za-z0-9]+', ' ',summary.find('div',{'id':sum_id}).text).strip()
                else:
                    df.at[index,'Fact_Summary'] =  re.sub('[^A-Za-z0-9]+', ' ',summary.text).strip()
            else:
                df.at[index,'Fact_Summary'] =  ''

            # Get Language
            try: 
                df.at[index,'Language'] = detect(df.iloc[index]['Fact_Summary'])
            except:
                df.at[index,'Language'] = ''

            # Get Cross Cutting, Counties Involved
            cross_cutting = soup.find_all('div', {'class':'containerListElement'})

            cross_data = []
            coop_countries = []
            ofnd_countries = []

            for row in cross_cutting:
                if str(row).find('crossCuttingIssues') != -1:
                    values = row.find_all('div',{'class': 'value'})
                    cross_data.extend([re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() for item in values])

                if str(row).find('internationalCooperation_involvedCountries') != -1:
                    values = row.find_all('span',{'class':'text'})
                    coop_countries.extend([item.text for item in values])

                if str(row).find('offending_involvedCountries') != -1:
                    values = row.find_all('span',{'class':'text'})
                    ofnd_countries.extend([item.text for item in values])

            df.at[index,'Cross_Cutting'] = cross_data

            for c in coop_countries:
                if 'Cooperation_'+c not in df.columns:
                    df['Cooperation_'+c] = 0
                df.at[index, 'Cooperation_'+c] = 1

            for c in ofnd_countries:
                if 'Offending_'+c not in df.columns:
                    df['Offending_'+c] = 0
                df.at[index, 'Offending_'+c] = 1

            # Get People Invloved
            try:
                defs = soup.select(".defendantsRespondents")[0]
            except:
                defendants = []
            else:
                defendants = defs.find_all('div',{'class':'person'})

            try:
                vics = soup.select(".victimsPlaintiffs")[0]
            except: 
                victims = []
            else:
                victims = vics.find_all('div',{'class':'person'})

            for j in range(max([len(defendants), len(victims)]) - 1):
                df.loc[len(df)] = df.loc[index]

            for i in range(index, len(df)):
                if i - index + 1 <= len(defendants):
                    labels  = [re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() for item in defendants[i-index].find_all('div',{'class':'label'}) if re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() != '']
                    values = [re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() for item in defendants[i - index].find_all('div',{'class':'value'}) if re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() != '']

                    try:
                        df.at[i,'Defendant_Name'] = values[labels.index('Defendant')]
                    except:
                        df.at[i,'Defendant_Name'] = ''

                    try:
                        df.at[i,'Defendant_Gender'] = values[labels.index('Gender')]
                    except:
                        df.at[i,'Defendant_Gender'] = ''

                    try:
                        df.at[i,'Defendant_Nationality'] = nat[values[labels.index('Nationality')]]
                    except:
                        df.at[i,'Defendant_Nationality'] = ''

                    try:
                        df.at[i,'Defendant_Age'] = values[labels.index('Age')]
                    except:
                        df.at[i,'Defendant_Age'] = ''

                    try:
                        df.at[i,'Defendant_Born'] = values[labels.index('Born')]
                    except:
                        df.at[i,'Defendant_Born'] = ''

                if i - index + 1 <= len(victims):
                    labels  = [re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() for item in victims[i-index].find_all('div',{'class':'label'}) if re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() != '']
                    values = [re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() for item in victims[i - index].find_all('div',{'class':'value'}) if re.sub('[^A-Za-z0-9]+', ' ', item.text).strip() != '']

                    try:
                        df.at[i,'Victim_Name'] = values[labels.index('Victim')]
                    except:
                        df.at[i,'Victim_Name'] = ''

                    try:
                        df.at[i,'Victim_Gender'] = values[labels.index('Gender')]
                    except:
                        df.at[i,'Victim_Gender'] = ''

                    try:
                        df.at[i,'Victim_Nationality'] = nat[values[labels.index('Nationality')]]
                    except:
                        df.at[i,'Victim_Nationality'] = ''

                    try:
                        df.at[i,'Victim_Age'] = values[labels.index('Age')]
                    except:
                        df.at[i,'Victim_Age'] = ''

                    try:
                        df.at[i,'Victim_Born'] = values[labels.index('Born')]
                    except:
                        df.at[i,'Victim_Born'] = ''

0.032  %
0.065  %
0.097  %
0.13  %
0.162  %
0.195  %
0.227  %
0.26  %
0.292  %
0.325  %
0.357  %
0.39  %
0.422  %
0.455  %
0.487  %
0.52  %
0.552  %
0.585  %
0.617  %
0.617  %
0.65  %
0.65  %
0.682  %
0.715  %
0.747  %
0.78  %
0.812  %
0.845  %
0.877  %
0.91  %
0.942  %
0.975  %
1.007  %
1.04  %
1.072  %
1.105  %
1.137  %
1.17  %
1.202  %
1.235  %
1.267  %
1.3  %
1.332  %
1.365  %
1.397  %
1.429  %
1.462  %
1.494  %
1.527  %
1.559  %
1.592  %
1.624  %
1.657  %
1.689  %
1.722  %
1.722  %
1.754  %
1.787  %
1.819  %
1.819  %
1.852  %
1.884  %
1.917  %
1.949  %
1.982  %
2.014  %
2.047  %
2.079  %
2.112  %
2.144  %
2.177  %
2.209  %
2.209  %
2.242  %
2.274  %
2.307  %
2.339  %
2.372  %
2.404  %
2.437  %
2.469  %
2.502  %
2.534  %
2.567  %
2.599  %
2.632  %
2.632  %
2.664  %
2.697  %
2.729  %
2.762  %
2.794  %
2.827  %
2.859  %
2.891  %
2.924  %
2.956  %
2.989  %
3.021  %
3.054  %
3.086  %
3.119  %
3.151  %
3.184  %
3.216  %
3.249  %
3.281  %
3.314  %
3.346  %
3.379  %
3.411  %
3.444  %
3.47

27.096  %
27.128  %
27.16  %
27.193  %
27.225  %
27.258  %
27.29  %
27.323  %
27.355  %
27.388  %
27.42  %
27.453  %
27.485  %
27.518  %
27.55  %
27.55  %
27.583  %
27.615  %
27.648  %
27.68  %
27.713  %
27.745  %
27.778  %
27.81  %
27.843  %
27.875  %
27.908  %
27.94  %
27.973  %
28.005  %
28.038  %
28.038  %
28.07  %
28.103  %
28.135  %
28.168  %
28.2  %
28.233  %
28.265  %
28.298  %
28.33  %
28.363  %
28.395  %
28.428  %
28.46  %
28.493  %
28.525  %
28.558  %
28.59  %
28.622  %
28.655  %
28.687  %
28.72  %
28.752  %
28.785  %
28.817  %
28.85  %
28.882  %
28.915  %
28.947  %
28.98  %
29.012  %
29.045  %
29.077  %
29.11  %
29.142  %
29.175  %
29.207  %
29.24  %
29.272  %
29.305  %
29.337  %
29.37  %
29.402  %
29.435  %
29.467  %
29.5  %
29.532  %
29.565  %
29.597  %
29.63  %
29.662  %
29.695  %
29.727  %
29.76  %
29.792  %
29.825  %
29.857  %
29.89  %
29.922  %
29.955  %
29.987  %
30.019  %
30.052  %
30.084  %
30.117  %
30.149  %
30.182  %
30.214  %
30.247  %
30.279  %
30.312  %
30.34

53.541  %
53.574  %
53.606  %
53.639  %
53.671  %
53.704  %
53.736  %
53.769  %
53.801  %
53.834  %
53.866  %
53.899  %
53.931  %
53.931  %
53.964  %
53.996  %
54.029  %
54.061  %
54.094  %
54.126  %
54.159  %
54.191  %
54.224  %
54.256  %
54.288  %
54.321  %
54.353  %
54.386  %
54.418  %
54.451  %
54.483  %
54.516  %
54.548  %
54.581  %
54.613  %
54.646  %
54.678  %
54.711  %
54.743  %
54.776  %
54.808  %
54.841  %
54.873  %
54.906  %
54.938  %
54.971  %
55.003  %
55.036  %
55.068  %
55.101  %
55.133  %
55.166  %
55.198  %
55.231  %
55.263  %
55.296  %
55.328  %
55.361  %
55.393  %
55.426  %
55.458  %
55.491  %
55.523  %
55.556  %
55.588  %
55.621  %
55.653  %
55.686  %
55.718  %
55.75  %
55.783  %
55.815  %
55.848  %
55.88  %
55.913  %
55.945  %
55.978  %
56.01  %
56.043  %
56.075  %
56.108  %
56.14  %
56.173  %
56.205  %
56.238  %
56.27  %
56.303  %
56.335  %
56.368  %
56.4  %
56.433  %
56.465  %
56.498  %
56.53  %
56.563  %
56.595  %
56.628  %
56.66  %
56.693  %
56.725  %
56.758  %

80.279  %
80.312  %
80.344  %
80.377  %
80.409  %
80.442  %
80.474  %
80.507  %
80.539  %
80.572  %
80.604  %
80.637  %
80.669  %
80.702  %
80.734  %
80.767  %
80.799  %
80.832  %
80.864  %
80.897  %
80.929  %
80.962  %
80.994  %
81.027  %
81.059  %
81.092  %
81.124  %
81.157  %
81.189  %
81.222  %
81.254  %
81.287  %
81.319  %
81.352  %
81.384  %
81.417  %
81.449  %
81.481  %
81.514  %
81.546  %
81.579  %
81.611  %
81.644  %
81.676  %
81.709  %
81.741  %
81.774  %
81.806  %
81.839  %
81.871  %
81.904  %
81.936  %
81.969  %
82.001  %
82.034  %
82.066  %
82.099  %
82.131  %
82.164  %
82.196  %
82.229  %
82.261  %
82.294  %
82.326  %
82.359  %
82.391  %
82.424  %
82.456  %
82.489  %
82.521  %
82.554  %
82.586  %
82.619  %
82.651  %
82.684  %
82.716  %
82.749  %
82.781  %
82.814  %
82.846  %
82.878  %
82.911  %
82.943  %
82.976  %
83.008  %
83.041  %
83.073  %
83.106  %
83.138  %
83.171  %
83.203  %
83.236  %
83.268  %
83.301  %
83.333  %
83.366  %
83.398  %
83.398  %
83.431  %
83.463  %


### Sort the dataset

In [77]:
df.sort_values(by='ID', ignore_index=True, inplace=True)

In [78]:
sorted_columns = [
    'ID',
    'Page_Title',
    'URL',
    'Fact_Summary',
    'Language',
    'Cross_Cutting',
    'Country', 
    'Corruption',
    'Counterfeiting',
    'Criminal_group',
    'Cyber_Crime',
    'Drug',
    'Falsified_Medicine',
    'Illicit_Firearms',
    'Justice_Obstruction',
    'Migrant_Smuggling',
    'Money_Laundering',
    'Other',
    'Piracy',
    'Terrorism',
    'Trafficking_Cultural_Property',
    'Trafficking_Persons',
    'Wild_life',
    'Sentenced_Month',
    'Sentenced_Year',
    'Verdict_Month',
    'Verdict_Year',
    'Defendant_Age',
    'Defendant_Born',
    'Defendant_Gender',
    'Defendant_Name',
    'Defendant_Nationality',
    'Victim_Age',
    'Victim_Born',
    'Victim_Gender',
    'Victim_Name',
    'Victim_Nationality',
    'Cooperation_Afghanistan',
    'Cooperation_Algeria',
    'Cooperation_Andorra',
    'Cooperation_Angola',
    'Cooperation_Argentina',
    'Cooperation_Australia',
    'Cooperation_Austria',
    'Cooperation_Bahamas',
    'Cooperation_Barbados',
    'Cooperation_Belgium',
    'Cooperation_Bosnia and Herzegovina',
    'Cooperation_Brazil',
    'Cooperation_Bulgaria',
    'Cooperation_Canada',
    'Cooperation_Chile',
    'Cooperation_China',
    'Cooperation_Colombia',
    'Cooperation_Costa Rica',
    'Cooperation_Croatia',
    'Cooperation_Czechia',
    'Cooperation_Denmark',
    'Cooperation_Dominican Republic',
    'Cooperation_Ecuador',
    'Cooperation_European Union',
    'Cooperation_France',
    'Cooperation_Georgia',
    'Cooperation_Germany',
    'Cooperation_Greece',
    'Cooperation_Grenada',
    'Cooperation_Guatemala',
    'Cooperation_Guyana',
    'Cooperation_Honduras',
    'Cooperation_Hungary',
    'Cooperation_Iceland',
    'Cooperation_India',
    'Cooperation_Indonesia',
    'Cooperation_Iraq',
    'Cooperation_Ireland',
    'Cooperation_Israel',
    'Cooperation_Italy',
    'Cooperation_Japan',
    'Cooperation_Kazakhstan',
    'Cooperation_Kenya',
    'Cooperation_Latvia',
    'Cooperation_Lebanon',
    'Cooperation_Liberia',
    'Cooperation_Liechtenstein',
    'Cooperation_Lithuania',
    'Cooperation_Luxembourg',
    'Cooperation_Malaysia',
    'Cooperation_Malta',
    'Cooperation_Mauritius',
    'Cooperation_Mexico',
    'Cooperation_Montenegro',
    'Cooperation_Mozambique',
    'Cooperation_Netherlands',
    'Cooperation_New Zealand',
    'Cooperation_Nicaragua',
    'Cooperation_Nigeria',
    'Cooperation_North Macedonia',
    'Cooperation_Norway',
    'Cooperation_Pakistan',
    'Cooperation_Panama',
    'Cooperation_Paraguay',
    'Cooperation_Peru',
    'Cooperation_Philippines',
    'Cooperation_Poland',
    'Cooperation_Portugal',
    'Cooperation_Romania',
    'Cooperation_Russian Federation',
    'Cooperation_Saudi Arabia',
    'Cooperation_Senegal',
    'Cooperation_Serbia',
    'Cooperation_Seychelles',
    'Cooperation_Singapore',
    'Cooperation_Slovakia',
    'Cooperation_Slovenia',
    'Cooperation_Somalia',
    'Cooperation_South Africa',
    'Cooperation_Spain',
    'Cooperation_Sweden',
    'Cooperation_Switzerland',
    'Cooperation_Thailand',
    'Cooperation_Trinidad and Tobago',
    'Cooperation_Turkey',
    'Cooperation_Ukraine',
    'Cooperation_United Arab Emirates',
    'Cooperation_United Kingdom of Great Britain and Northern Ireland',
    'Cooperation_United States of America',
    'Cooperation_Uruguay',
    'Cooperation_Venezuela (Bolivarian Republic of)',
    'Offending_Afghanistan',
    'Offending_Albania',
    'Offending_Algeria',
    'Offending_Andorra',
    'Offending_Angola',
    'Offending_Antigua and Barbuda',
    'Offending_Argentina',
    'Offending_Armenia',
    'Offending_Australia',
    'Offending_Austria',
    'Offending_Azerbaijan',
    'Offending_Bahamas',
    'Offending_Bahrain',
    'Offending_Bangladesh',
    'Offending_Barbados',
    'Offending_Belarus',
    'Offending_Belgium',
    'Offending_Belize',
    'Offending_Benin',
    'Offending_Bolivia (Plurinational State of)',
    'Offending_Bosnia and Herzegovina',
    'Offending_Brazil',
    'Offending_Bulgaria',
    'Offending_Burkina Faso',
    'Offending_Cabo Verde',
    'Offending_Cambodia',
    'Offending_Cameroon',
    'Offending_Canada',
    'Offending_Chad',
    'Offending_Chile',
    'Offending_China',
    'Offending_Colombia',
    'Offending_Congo',
    'Offending_Costa Rica',
    'Offending_Croatia',
    'Offending_Cuba',
    'Offending_Cyprus',
    'Offending_Czechia',
    "Offending_Côte d'Ivoire",
    'Offending_Democratic Republic of the Congo',
    'Offending_Denmark',
    'Offending_Dominican Republic',
    'Offending_Ecuador',
    'Offending_Egypt',
    'Offending_El Salvador',
    'Offending_Equatorial Guinea',
    'Offending_Eritrea',
    'Offending_Estonia',
    'Offending_Eswatini',
    'Offending_Ethiopia',
    'Offending_European Union',
    'Offending_Fiji',
    'Offending_Finland',
    'Offending_France',
    'Offending_Gambia',
    'Offending_Georgia',
    'Offending_Germany',
    'Offending_Ghana',
    'Offending_Greece',
    'Offending_Grenada',
    'Offending_Guatemala',
    'Offending_Guinea',
    'Offending_Guyana',
    'Offending_Haiti',
    'Offending_Honduras',
    'Offending_Hungary',
    'Offending_Iceland',
    'Offending_India',
    'Offending_Indonesia',
    'Offending_International and Regional Bodies',
    'Offending_Iran (Islamic Republic of)',
    'Offending_Iraq',
    'Offending_Ireland',
    'Offending_Israel',
    'Offending_Italy',
    'Offending_Jamaica',
    'Offending_Japan',
    'Offending_Jordan',
    'Offending_Kazakhstan',
    'Offending_Kenya',
    'Offending_Kuwait',
    "Offending_Lao People's Democratic Republic",
    'Offending_Latvia',
    'Offending_Lebanon',
    'Offending_Lesotho',
    'Offending_Liberia',
    'Offending_Libya',
    'Offending_Liechtenstein',
    'Offending_Lithuania',
    'Offending_Luxembourg',
    'Offending_Malawi',
    'Offending_Malaysia',
    'Offending_Maldives',
    'Offending_Mali',
    'Offending_Malta',
    'Offending_Marshall Islands',
    'Offending_Mauritania',
    'Offending_Mauritius',
    'Offending_Mexico',
    'Offending_Micronesia (Federated States of)',
    'Offending_Mongolia',
    'Offending_Montenegro',
    'Offending_Morocco',
    'Offending_Mozambique',
    'Offending_Myanmar',
    'Offending_Namibia',
    'Offending_Nepal',
    'Offending_Netherlands',
    'Offending_New Zealand',
    'Offending_Nicaragua',
    'Offending_Niger',
    'Offending_Nigeria',
    'Offending_North Macedonia',
    'Offending_Norway',
    'Offending_Oman',
    'Offending_Pakistan',
    'Offending_Palau',
    'Offending_Panama',
    'Offending_Paraguay',
    'Offending_Peru',
    'Offending_Philippines',
    'Offending_Poland',
    'Offending_Portugal',
    'Offending_Qatar',
    'Offending_Republic of Korea',
    'Offending_Republic of Moldova',
    'Offending_Romania',
    'Offending_Russian Federation',
    'Offending_Rwanda',
    'Offending_Saudi Arabia',
    'Offending_Senegal',
    'Offending_Serbia',
    'Offending_Seychelles',
    'Offending_Sierra Leone',
    'Offending_Singapore',
    'Offending_Slovakia',
    'Offending_Slovenia',
    'Offending_Somalia',
    'Offending_South Africa',
    'Offending_Spain',
    'Offending_Sri Lanka',
    'Offending_Sudan',
    'Offending_Suriname',
    'Offending_Sweden',
    'Offending_Switzerland',
    'Offending_Syrian Arab Republic',
    'Offending_Thailand',
    'Offending_Togo',
    'Offending_Tonga',
    'Offending_Trinidad and Tobago',
    'Offending_Tunisia',
    'Offending_Turkey',
    'Offending_Uganda',
    'Offending_Ukraine',
    'Offending_Undefined',
    'Offending_United Arab Emirates',
    'Offending_United Kingdom of Great Britain and Northern Ireland',
    'Offending_United Republic of Tanzania',
    'Offending_United States of America',
    'Offending_Uruguay',
    'Offending_Uzbekistan',
    'Offending_Venezuela (Bolivarian Republic of)',
    'Offending_Viet Nam',
    'Offending_Yemen',
    'Offending_Zambia'
]

In [79]:
df = df.reindex(sorted_columns, axis=1)

### Replace NANs

In [80]:
for column in sorted_columns:
    if column not in ['ID',
    'Page_Title',
    'URL',
    'Fact_Summary',
    'Language',
    'Cross_Cutting',
    'Country', 
    'Corruption',
    'Counterfeiting',
    'Criminal_group',
    'Cyber_Crime',
    'Drug',
    'Falsified_Medicine',
    'Illicit_Firearms',
    'Justice_Obstruction',
    'Migrant_Smuggling',
    'Money_Laundering',
    'Other',
    'Piracy',
    'Terrorism',
    'Trafficking_Cultural_Property',
    'Trafficking_Persons',
    'Wild_life',
    'Sentenced_Month',
    'Sentenced_Year',
    'Verdict_Month',
    'Verdict_Year',
    'Defendant_Age',
    'Defendant_Born',
    'Defendant_Gender',
    'Defendant_Name',
    'Defendant_Nationality',
    'Victim_Age',
    'Victim_Born',
    'Victim_Gender',
    'Victim_Name',
    'Victim_Nationality']:
        df[column] = df[column].fillna(0)


In [81]:
df.head()

Unnamed: 0,ID,Page_Title,URL,Fact_Summary,Language,Cross_Cutting,Country,Corruption,Counterfeiting,Criminal_group,Cyber_Crime,Drug,Falsified_Medicine,Illicit_Firearms,Justice_Obstruction,Migrant_Smuggling,Money_Laundering,Other,Piracy,Terrorism,Trafficking_Cultural_Property,Trafficking_Persons,Wild_life,Sentenced_Month,Sentenced_Year,Verdict_Month,Verdict_Year,Defendant_Age,Defendant_Born,Defendant_Gender,Defendant_Name,Defendant_Nationality,Victim_Age,Victim_Born,Victim_Gender,Victim_Name,Victim_Nationality,Cooperation_Afghanistan,Cooperation_Algeria,Cooperation_Andorra,Cooperation_Angola,Cooperation_Argentina,Cooperation_Australia,Cooperation_Austria,Cooperation_Bahamas,Cooperation_Barbados,Cooperation_Belgium,Cooperation_Bosnia and Herzegovina,Cooperation_Brazil,Cooperation_Bulgaria,Cooperation_Canada,Cooperation_Chile,Cooperation_China,Cooperation_Colombia,Cooperation_Costa Rica,Cooperation_Croatia,Cooperation_Czechia,Cooperation_Denmark,Cooperation_Dominican Republic,Cooperation_Ecuador,Cooperation_European Union,Cooperation_France,Cooperation_Georgia,Cooperation_Germany,Cooperation_Greece,Cooperation_Grenada,Cooperation_Guatemala,Cooperation_Guyana,Cooperation_Honduras,Cooperation_Hungary,Cooperation_Iceland,Cooperation_India,Cooperation_Indonesia,Cooperation_Iraq,Cooperation_Ireland,Cooperation_Israel,Cooperation_Italy,Cooperation_Japan,Cooperation_Kazakhstan,Cooperation_Kenya,Cooperation_Latvia,Cooperation_Lebanon,Cooperation_Liberia,Cooperation_Liechtenstein,Cooperation_Lithuania,Cooperation_Luxembourg,Cooperation_Malaysia,Cooperation_Malta,Cooperation_Mauritius,Cooperation_Mexico,Cooperation_Montenegro,Cooperation_Mozambique,Cooperation_Netherlands,Cooperation_New Zealand,Cooperation_Nicaragua,Cooperation_Nigeria,Cooperation_North Macedonia,Cooperation_Norway,Cooperation_Pakistan,Cooperation_Panama,Cooperation_Paraguay,Cooperation_Peru,Cooperation_Philippines,Cooperation_Poland,Cooperation_Portugal,Cooperation_Romania,Cooperation_Russian Federation,Cooperation_Saudi Arabia,Cooperation_Senegal,Cooperation_Serbia,Cooperation_Seychelles,Cooperation_Singapore,Cooperation_Slovakia,Cooperation_Slovenia,Cooperation_Somalia,Cooperation_South Africa,Cooperation_Spain,Cooperation_Sweden,Cooperation_Switzerland,Cooperation_Thailand,Cooperation_Trinidad and Tobago,Cooperation_Turkey,Cooperation_Ukraine,Cooperation_United Arab Emirates,Cooperation_United Kingdom of Great Britain and Northern Ireland,Cooperation_United States of America,Cooperation_Uruguay,Cooperation_Venezuela (Bolivarian Republic of),Offending_Afghanistan,Offending_Albania,Offending_Algeria,Offending_Andorra,Offending_Angola,Offending_Antigua and Barbuda,Offending_Argentina,Offending_Armenia,Offending_Australia,Offending_Austria,Offending_Azerbaijan,Offending_Bahamas,Offending_Bahrain,Offending_Bangladesh,Offending_Barbados,Offending_Belarus,Offending_Belgium,Offending_Belize,Offending_Benin,Offending_Bolivia (Plurinational State of),Offending_Bosnia and Herzegovina,Offending_Brazil,Offending_Bulgaria,Offending_Burkina Faso,Offending_Cabo Verde,Offending_Cambodia,Offending_Cameroon,Offending_Canada,Offending_Chad,Offending_Chile,Offending_China,Offending_Colombia,Offending_Congo,Offending_Costa Rica,Offending_Croatia,Offending_Cuba,Offending_Cyprus,Offending_Czechia,Offending_Côte d'Ivoire,Offending_Democratic Republic of the Congo,Offending_Denmark,Offending_Dominican Republic,Offending_Ecuador,Offending_Egypt,Offending_El Salvador,Offending_Equatorial Guinea,Offending_Eritrea,Offending_Estonia,Offending_Eswatini,Offending_Ethiopia,Offending_European Union,Offending_Fiji,Offending_Finland,Offending_France,Offending_Gambia,Offending_Georgia,Offending_Germany,Offending_Ghana,Offending_Greece,Offending_Grenada,Offending_Guatemala,Offending_Guinea,Offending_Guyana,Offending_Haiti,Offending_Honduras,Offending_Hungary,Offending_Iceland,Offending_India,Offending_Indonesia,Offending_International and Regional Bodies,Offending_Iran (Islamic Republic of),Offending_Iraq,Offending_Ireland,Offending_Israel,Offending_Italy,Offending_Jamaica,Offending_Japan,Offending_Jordan,Offending_Kazakhstan,Offending_Kenya,Offending_Kuwait,Offending_Lao People's Democratic Republic,Offending_Latvia,Offending_Lebanon,Offending_Lesotho,Offending_Liberia,Offending_Libya,Offending_Liechtenstein,Offending_Lithuania,Offending_Luxembourg,Offending_Malawi,Offending_Malaysia,Offending_Maldives,Offending_Mali,Offending_Malta,Offending_Marshall Islands,Offending_Mauritania,Offending_Mauritius,Offending_Mexico,Offending_Micronesia (Federated States of),Offending_Mongolia,Offending_Montenegro,Offending_Morocco,Offending_Mozambique,Offending_Myanmar,Offending_Namibia,Offending_Nepal,Offending_Netherlands,Offending_New Zealand,Offending_Nicaragua,Offending_Niger,Offending_Nigeria,Offending_North Macedonia,Offending_Norway,Offending_Oman,Offending_Pakistan,Offending_Palau,Offending_Panama,Offending_Paraguay,Offending_Peru,Offending_Philippines,Offending_Poland,Offending_Portugal,Offending_Qatar,Offending_Republic of Korea,Offending_Republic of Moldova,Offending_Romania,Offending_Russian Federation,Offending_Rwanda,Offending_Saudi Arabia,Offending_Senegal,Offending_Serbia,Offending_Seychelles,Offending_Sierra Leone,Offending_Singapore,Offending_Slovakia,Offending_Slovenia,Offending_Somalia,Offending_South Africa,Offending_Spain,Offending_Sri Lanka,Offending_Sudan,Offending_Suriname,Offending_Sweden,Offending_Switzerland,Offending_Syrian Arab Republic,Offending_Thailand,Offending_Togo,Offending_Tonga,Offending_Trinidad and Tobago,Offending_Tunisia,Offending_Turkey,Offending_Uganda,Offending_Ukraine,Offending_Undefined,Offending_United Arab Emirates,Offending_United Kingdom of Great Britain and Northern Ireland,Offending_United Republic of Tanzania,Offending_United States of America,Offending_Uruguay,Offending_Uzbekistan,Offending_Venezuela (Bolivarian Republic of),Offending_Viet Nam,Offending_Yemen,Offending_Zambia
0,ALB001,Decision No. 648,https://sherloc.unodc.org/cld/case-law-doc/tra...,In the months of July and August of 2005 the d...,en,[],Albania,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,2009,2,2009,,,Male,Jani Xhavit Tafa,,,,Female,E I,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,ALB001,Decision No. 648,https://sherloc.unodc.org/cld/case-law-doc/tra...,In the months of July and August of 2005 the d...,en,[],Albania,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,2009,2,2009,,,,,,,,Female,I T,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,ALB002,Decision No.292,https://sherloc.unodc.org/cld/case-law-doc/tra...,Defendant Genc Hysa and S K moved in together ...,en,[],,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,5,2009,5,2009,,,Male,Genc Hysa,,,,Female,S K,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ALB002,Decision No.292,https://sherloc.unodc.org/cld/case-law-doc/tra...,Defendant Genc Hysa and S K moved in together ...,en,[],,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,5,2009,5,2009,,,,,,,,Female,T L,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ALB002,Decision No.292,https://sherloc.unodc.org/cld/case-law-doc/tra...,Defendant Genc Hysa and S K moved in together ...,en,[],,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,5,2009,5,2009,,,,,,,,Female,M L,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Save the dataset 

In [193]:
df.to_excel('Sherloc_dataset.xlsx',index=False)