In [11]:
# https://www.fao.org/faolex/opendata/en/

import pandas as pd

In [12]:
# http://extwprlegs1.fao.org/docs/pdf/FAOLEX_Agriculture.zip?
# download the data from above link and save 
# enter the correct path and load the dataset

df = pd.read_csv('E:\dataset\FAOLEX_Agriculture.csv')

In [13]:
# check one entry for Fisheries and Agriculture based on Record Id
df[(df['Record Id']=='LEX-FAOC096511') | (df['Record Id']=='LEX-FAOC022343')].T

Unnamed: 0,72874,80809
Record Id,LEX-FAOC022343,LEX-FAOC096511
Record URL,http://www.fao.org/faolex/results/details/en/c...,http://www.fao.org/faolex/results/details/en/c...
Document URL,http://faolex.fao.org/docs/pdf/niu22343.pdf; h...,http://faolex.fao.org/docs/pdf/den96511.pdf
Title,Territorial Sea and Exclusive Economic Zone Ac...,Rural Development Act (No. 316 of 2007).
Original title,,Lov om udvikling af landdistrikterne (landdist...
Date of original text,30-04-1997,31-03-2007
Last amended date,12-2006,
Available website,www.paclii.org,retsinfo.dk
Language of document,English,Danish
Country/Territory,Niue (New Zealand),Denmark


In [4]:
list(df)

['Record Id',
 'Record URL',
 'Document URL',
 'Title',
 'Original title',
 'Date of original text',
 'Last amended date',
 'Available website',
 'Language of document',
 'Country/Territory',
 'Regional organizations',
 'Territorial subdivision',
 'Type of text',
 'Repealed',
 'Abstract',
 'Primary subjects',
 'Domain',
 'Keywords']

In [5]:
# select necessary columns
imp_col = ['Record Id','Abstract','Title','Type of text','Date of original text','Document URL',
 'Keywords','Country/Territory','Primary subjects','Domain','Repealed','Territorial subdivision']

In [6]:
# filter the dataframe
df = df[imp_col]

In [7]:
# new columns to be created based on few key word search
new_col = ['digital innovations','data infrastructure', 'business development',
          'enabling environments for digitalization']

# key words to look for, while creating new columns
key_dict = {'digital innovations':['digital', 'development', 'innovation', 'tech', 'advance', 'advancement'],
            'data infrastructure' :['data', 'development', 'tech', 'info'],
            'business development':['business', 'development', 'growth'],
            'enabling environments for digitalization' : ['enabling', 'enable', 'supporting']}

In [8]:
# function to search keyword within abstract (text)
def search(keys,text):
    
    # if text is string then it will search the keywords
    if isinstance(text,str):
        return any(key in text for key in keys)
    
    # for few entry there is no abstract i.e NaN, therefore handle it in else case
    else:
        return False
    
for col in new_col:
    df[col] = df['Abstract'].apply(lambda text:'Yes' if search(key_dict[col],text) else 'No')

In [9]:
df.head()

Unnamed: 0,Record Id,Abstract,Title,Type of text,Date of original text,Document URL,Keywords,Country/Territory,Primary subjects,Domain,Repealed,Territorial subdivision,digital innovations,data infrastructure,business development,enabling environments for digitalization
0,LEX-FAOC027670,The purpose of this Act is to enact appropriat...,Act on Japanese Agricultural Standards (Act No...,Legislation,11-05-1950,http://faolex.fao.org/docs/pdf/jap27670.pdf; h...,basic legislation; certification; standards; i...,Japan,Agricultural & rural development; Food & nutri...,Agriculture; Livestock; Food & nutrition; Fish...,,,Yes,Yes,Yes,No
1,LEX-FAOC029030,Cette loi modifie les loi suivantes: Loi du 15...,Loi portant des dispositions diverses et relat...,Legislation,05-02-1999,http://faolex.fao.org/docs/pdf/bel29030.pdf,special fund; financial agricultural measures;...,Belgium,Agricultural & rural development,Agriculture; Fisheries; Food & nutrition; Fore...,,,No,No,No,No
2,LEX-FAOC043198,La presente Ley crea las organizaciones de cad...,Ley Nº 811 - Crea las organizaciones de cadena...,Legislation,26-06-2003,http://faolex.fao.org/docs/pdf/col43198.pdf,agricultural development; institution; extensi...,Colombia,Agricultural & rural development,Agriculture; Livestock; Food & nutrition; Fish...,,,No,Yes,No,No
3,LEX-FAOC095279,This Decision approves the Scheme on the devel...,Decision No. 2194/QD-TTg approving the Scheme ...,Regulation,25-12-2009,http://faolex.fao.org/docs/pdf/vie95279.pdf,agricultural development; indigenous peoples; ...,Viet Nam,Fisheries; Cultivated plants,Agriculture; Livestock; Fisheries; Forestry; C...,,,Yes,Yes,Yes,No
4,LEX-FAOC121539,This Regional Act sets out the legislative fra...,"Regional Act No. 31 on agriculture, forests, f...",Legislation,05-12-2008,http://faolex.fao.org/docs/pdf/ita121539.pdf,agricultural development; subsidy/incentive; a...,Italy,Agricultural & rural development,Agriculture; Livestock; Fisheries; Forestry; C...,,Lombardia,Yes,Yes,Yes,No


In [10]:
# save the dataframe as csv
df.to_csv('Complete_faolex_data.csv')

In [10]:
df['Document URL']

0        http://faolex.fao.org/docs/pdf/jap27670.pdf; h...
1              http://faolex.fao.org/docs/pdf/bel29030.pdf
2              http://faolex.fao.org/docs/pdf/col43198.pdf
3              http://faolex.fao.org/docs/pdf/vie95279.pdf
4             http://faolex.fao.org/docs/pdf/ita121539.pdf
                               ...                        
92893    http://faolex.fao.org/docs/pdf/uk38493original...
92894    http://faolex.fao.org/docs/pdf/est98238ENG.pdf...
92895          http://faolex.fao.org/docs/pdf/uk154195.pdf
92896         http://faolex.fao.org/docs/pdf/nsw197684.pdf
92897         http://faolex.fao.org/docs/pdf/uru189494.pdf
Name: Document URL, Length: 92898, dtype: object

In [26]:
d = pd.read_csv('test (6).csv')

In [27]:
d['Link'][10]

'AUSTRALIA’S TECH FUTURE (industry.gov.au)'

In [28]:
d

Unnamed: 0,Title,Link,Definitions of Data,Does this Regulation/ Policy mirror any other Country's (Yes/No?),Policy/Regulation it mirrors,Countries,Categories,Tags,Digital Innovations,Data Infrastructure,Business Development Services,Enabling Environment for Digitalisation
0,Information and Communication Technologies Pol...,Information\nand Communication Technologies Po...,,,,,,,,,,
1,Data Protection Act 2013,Antigua and Barbuda - Data Protection Act 2013...,"Data is not defined specifically. However, pro...",Yes,Data Protection Acts,Singapore\nBotswana\nJamaica\nCyprus\nMalta,Data Governance,Consent;\nData;\nData integrity;\nData process...,,,,
2,Electronic Crimes Act 2013 & Electronic Crimes...,Electronic Crimes Act 2013 & Electronic Crimes...,"Data includes representation of facts, informa...",Yes,Computer Misuse Act; Prevention of Electronic ...,Mauritius\nBrunei\nPakistan,Data Governance,Data;\nData transmission;\nElectronic crimes;\...,,,,
3,Electronic Transactions Act 2013 & Electronic ...,electronic-transactions-act-2013-electronic-tr...,Does not define data. However defines consumer...,Yes,Electronic Transactions Acts/Bills,Ghana\nSt. Vincent and the Grenadines\nSri Lan...,Electronic Transactions,Electronic commerce;\nElectronic communication...,,,,
4,G.A.T.E. Government Assisted Technology Endeavour,G.A.T.E. Government Assisted Technology Endeav...,,No,,,Skills,Broadband;\nCompetition;\nICT services;\nInter...,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
757,,,,,,,,,,,,
758,,,,,,,,,,,,
759,,,,,,,,,,,,
760,,,,,,,,,,,,
