In [2]:
import os
import string
import pandas as pd
from dotenv import load_dotenv
from opensearchpy import OpenSearch

In [None]:
load_dotenv()

In [None]:
host = [{'host': os.environ.get('OPENSEARCH_HOST'), 'port': os.environ.get('OPENSEARCH_PORT')}]
auth = (os.environ.get('OPENSEARCH_USER'), os.environ.get('OPENSEARCH_PASS'))

client = OpenSearch(
    hosts = [{'host': os.environ.get('OPENSEARCH_HOST'), 'port': os.environ.get('OPENSEARCH_PORT')}],
    http_auth = (os.environ.get('OPENSEARCH_USER'), os.environ.get('OPENSEARCH_PASS')),
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
    timeout = 999999
)

In [None]:
client.info()

In [None]:
doc_index = 'law_analyzer_new4'

excluded_fields = ["Blocks.additional_context.vector",
                   "Blocks.additional_context.token_size",
                   "Blocks.additional_context.source_token_length",
                   "Blocks.source_token_length",
                   "Blocks.chunks.main_vector", 
                   "Blocks.chunks.context_vector",
                   "Blocks.chunks.context_token_size",
                   "Blocks.chunks.main_token_size",
                   "Blocks.chunks.source_token_length",]

In [None]:
def query_by_id(doc_id, exc_field):
    return {
        "_source":{
            "excludes": exc_field
        },
        "query": {
            "ids": {
                "values": [doc_id]
            }
        }
    }

def get_doc_df(doc_id, exc_field):
    doc = client.search(index=doc_index, body=query_by_id(doc_id, exc_field))
    return pd.json_normalize(doc['hits']['hits'])

def get_docs_df(doc_ids, exc_field):
    docs = []
    for doc_id in doc_ids:
        docs.append(get_doc_df(doc_id, exc_field))
    return pd.concat(docs)

In [None]:
doc_count = client.count(index=doc_index)['count']
doc_count

In [None]:
ids = client.search(index=doc_index, body={"query": {"match_all": {}}}, _source=False, size=doc_count)['hits']['hits']
ids = [id['_id'] for id in ids]

In [None]:
docs_df = get_docs_df(ids, excluded_fields)
docs_df

In [None]:
# save the dataframe to csv
docs_df.to_csv('data/law_analyzer_new4.csv', index=False)

In [3]:
docs_df = pd.read_csv('data/law_analyzer_new4.csv')
docs_df

Unnamed: 0,_index,_id,_score,_source.Status,_source.No,_source.PeraturanGoId,_source.Blocks,_source.TanggalPenetapan,_source.Bidang,_source.Slug,...,_source.Tahun,_source.TanggalPengundangan,_source.Bentuk,_source.PeraturanId,_source.BlocksMinioPath,_source.FileMinioPath,_source.Domain,_source.Tematik,_source.TanggalPembaruan,_source.BlocksMiniPath
0,law_analyzer_new4,221-pmk.010-2015,1.0,Berlaku,221.0,https://peraturan.go.id/files/bn1843-2015.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2015-12-07 00:00:00.000,,221/pmk.010/2015,...,2015.0,2015-12-07 00:00:00.000,Peraturan Menteri Keuangan,CCDD282D-D3A1-43A4-BE32-94C635A50127,,,,,,
1,law_analyzer_new4,pp-21-tahun-2005,1.0,Berlaku,21.0,https://peraturan.go.id/files/pp19-2005.pdf,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas...",2005-05-19 00:00:00.000,,pp-21-tahun-2005,...,2005.0,2005-05-19 00:00:00.000,Peraturan Pemerintah,3990F6C9-792A-43A1-93A3-736B9A11779E,,,,,,
2,law_analyzer_new4,pp-51-tahun-2008,1.0,Berlaku,51.0,,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2008-01-01 00:00:00.000,,pp-51-tahun-2008,...,2008.0,2008-01-01 00:00:00.000,Peraturan Pemerintah,6086A055-44C0-4C6E-B7D7-D7617EFF4C16,,,,,,
3,law_analyzer_new4,62-pmk.04-2018,1.0,Tidak Berlaku,62.0,https://peraturan.go.id/files/bn777-2018.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2018-06-06 00:00:00.000,,62/pmk.04/2018,...,2018.0,2018-06-21 00:21:00.000,Peraturan Menteri Keuangan,BE35AA04-E741-4264-8914-4AB922C47318,,,,,,
4,law_analyzer_new4,42-pmk.05-2017,1.0,Berlaku,42.0,https://peraturan.go.id/files/bn400-2017.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2017-03-10 00:00:00.000,,42/pmk.05/2017,...,2017.0,2017-03-10 00:00:00.000,Peraturan Menteri Keuangan,CD4FDA39-BF00-4C8E-AD91-5A103139976B,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3020,law_analyzer_new4,pp-9-tahun-2014,1.0,Berlaku,9.0,https://peraturan.go.id/files/pp9-2014bt.pdf,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas...",2014-02-12 00:00:00.000,,pp-9-tahun-2014,...,2014.0,2014-02-12 00:00:00.000,Peraturan Pemerintah,BAEBDE95-A779-4D45-9056-247C47CBC502,la_parse/BAEBDE95-A779-4D45-9056-247C47CBC502....,,,,,
3021,law_analyzer_new4,pp-25-tahun-2017,1.0,Berlaku,25.0,https://peraturan.go.id/files/pp25-2017bt.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2017-06-13 00:00:00.000,,pp-25-tahun-2017,...,2017.0,2017-06-13 00:00:00.000,Peraturan Pemerintah,7BEBBDD3-FFE0-4E45-9458-4B9A641B2839,la_parse/7BEBBDD3-FFE0-4E45-9458-4B9A641B2839....,,,,,
3022,law_analyzer_new4,160-pmk.04-2010,1.0,Tidak Berlaku,160.0,,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2010-09-01 00:00:00.000,Hukum Keuangan Negara,160/pmk.04/2010,...,2010.0,2010-09-01 00:00:00.000,Peraturan Menteri Keuangan,A78C2FD3-F971-4ECA-A03C-71ED1225E466,la_parse/A78C2FD3-F971-4ECA-A03C-71ED1225E466....,,,,,
3023,law_analyzer_new4,146-pmk.05-2019,1.0,Tidak Berlaku,146.0,https://peraturan.go.id/files/BN+1224-2019.pdf,"[{'content': ' '}, {'content': 'BERITA NEGARA ...",2019-10-18 00:00:00.000,,146/pmk.05/2019,...,2019.0,,Peraturan Menteri Keuangan,E445A109-5781-4C02-B203-DDF63F684C6B,parse_la/bb77bd83-675f-4537-9c32-9d83aead220c....,document/E445A109-5781-4C02-B203-DDF63F684C6B.pdf,,,,


In [4]:
invalid_docs_id = []

for index, row in docs_df.iterrows():
    if type(row['_source.Blocks']) != str:
        invalid_docs_id.append(row['_id'])

invalid_docs_id

['coba2-upload-pdf', 'undefined', 'conothbaru']

In [5]:
# drop the invalid documents
docs_df = docs_df[~docs_df['_id'].isin(invalid_docs_id)]
docs_df

Unnamed: 0,_index,_id,_score,_source.Status,_source.No,_source.PeraturanGoId,_source.Blocks,_source.TanggalPenetapan,_source.Bidang,_source.Slug,...,_source.Tahun,_source.TanggalPengundangan,_source.Bentuk,_source.PeraturanId,_source.BlocksMinioPath,_source.FileMinioPath,_source.Domain,_source.Tematik,_source.TanggalPembaruan,_source.BlocksMiniPath
0,law_analyzer_new4,221-pmk.010-2015,1.0,Berlaku,221.0,https://peraturan.go.id/files/bn1843-2015.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2015-12-07 00:00:00.000,,221/pmk.010/2015,...,2015.0,2015-12-07 00:00:00.000,Peraturan Menteri Keuangan,CCDD282D-D3A1-43A4-BE32-94C635A50127,,,,,,
1,law_analyzer_new4,pp-21-tahun-2005,1.0,Berlaku,21.0,https://peraturan.go.id/files/pp19-2005.pdf,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas...",2005-05-19 00:00:00.000,,pp-21-tahun-2005,...,2005.0,2005-05-19 00:00:00.000,Peraturan Pemerintah,3990F6C9-792A-43A1-93A3-736B9A11779E,,,,,,
2,law_analyzer_new4,pp-51-tahun-2008,1.0,Berlaku,51.0,,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2008-01-01 00:00:00.000,,pp-51-tahun-2008,...,2008.0,2008-01-01 00:00:00.000,Peraturan Pemerintah,6086A055-44C0-4C6E-B7D7-D7617EFF4C16,,,,,,
3,law_analyzer_new4,62-pmk.04-2018,1.0,Tidak Berlaku,62.0,https://peraturan.go.id/files/bn777-2018.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2018-06-06 00:00:00.000,,62/pmk.04/2018,...,2018.0,2018-06-21 00:21:00.000,Peraturan Menteri Keuangan,BE35AA04-E741-4264-8914-4AB922C47318,,,,,,
4,law_analyzer_new4,42-pmk.05-2017,1.0,Berlaku,42.0,https://peraturan.go.id/files/bn400-2017.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2017-03-10 00:00:00.000,,42/pmk.05/2017,...,2017.0,2017-03-10 00:00:00.000,Peraturan Menteri Keuangan,CD4FDA39-BF00-4C8E-AD91-5A103139976B,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3020,law_analyzer_new4,pp-9-tahun-2014,1.0,Berlaku,9.0,https://peraturan.go.id/files/pp9-2014bt.pdf,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas...",2014-02-12 00:00:00.000,,pp-9-tahun-2014,...,2014.0,2014-02-12 00:00:00.000,Peraturan Pemerintah,BAEBDE95-A779-4D45-9056-247C47CBC502,la_parse/BAEBDE95-A779-4D45-9056-247C47CBC502....,,,,,
3021,law_analyzer_new4,pp-25-tahun-2017,1.0,Berlaku,25.0,https://peraturan.go.id/files/pp25-2017bt.pdf,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2017-06-13 00:00:00.000,,pp-25-tahun-2017,...,2017.0,2017-06-13 00:00:00.000,Peraturan Pemerintah,7BEBBDD3-FFE0-4E45-9458-4B9A641B2839,la_parse/7BEBBDD3-FFE0-4E45-9458-4B9A641B2839....,,,,,
3022,law_analyzer_new4,160-pmk.04-2010,1.0,Tidak Berlaku,160.0,,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa...",2010-09-01 00:00:00.000,Hukum Keuangan Negara,160/pmk.04/2010,...,2010.0,2010-09-01 00:00:00.000,Peraturan Menteri Keuangan,A78C2FD3-F971-4ECA-A03C-71ED1225E466,la_parse/A78C2FD3-F971-4ECA-A03C-71ED1225E466....,,,,,
3023,law_analyzer_new4,146-pmk.05-2019,1.0,Tidak Berlaku,146.0,https://peraturan.go.id/files/BN+1224-2019.pdf,"[{'content': ' '}, {'content': 'BERITA NEGARA ...",2019-10-18 00:00:00.000,,146/pmk.05/2019,...,2019.0,,Peraturan Menteri Keuangan,E445A109-5781-4C02-B203-DDF63F684C6B,parse_la/bb77bd83-675f-4537-9c32-9d83aead220c....,document/E445A109-5781-4C02-B203-DDF63F684C6B.pdf,,,,


In [6]:
# convert all _source.Blocks to list
docs_df['_source.Blocks'] = docs_df['_source.Blocks'].apply(eval)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  docs_df['_source.Blocks'] = docs_df['_source.Blocks'].apply(eval)


In [7]:
content_type_list = []

for index, row in docs_df.iterrows():
    list(row['_source.Blocks'])
    if type(row['_source.Blocks']) == list:
        for k in row['_source.Blocks']:
            if 'type' in k:
                if k['type'] not in content_type_list:
                    content_type_list.append(k['type'])
            else:
                pass

content_type_list

['CONTENT_PASAL', 'DEFINITION', 'konsideran', 'heading_pasal']

In [8]:
# create new dataframe with only the relevant fields (_id, _source.Judul, _source.Blocks)
data_df = docs_df[['_id','_source.Blocks']]
data_df

Unnamed: 0,_id,_source.Blocks
0,221-pmk.010-2015,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
1,pp-21-tahun-2005,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas..."
2,pp-51-tahun-2008,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
3,62-pmk.04-2018,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
4,42-pmk.05-2017,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
...,...,...
3020,pp-9-tahun-2014,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas..."
3021,pp-25-tahun-2017,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
3022,160-pmk.04-2010,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
3023,146-pmk.05-2019,"[{'content': ' '}, {'content': 'BERITA NEGARA ..."


In [9]:
def context_prep(context_text):
    # remove all characters before "-" and including "-".
    # if context_text contains "-", remove it and the characters before it
    if "-" in context_text:
        text = context_text.split("-", 1)[1].strip()
        text = text.translate(str.maketrans('', '', string.punctuation)).strip()
    else:
        text = context_text.translate(str.maketrans('', '', string.punctuation)).strip()
        ""
    return ' '.join(text.split())


def title_prep(context_text):
    # remove all characters after "-" and including "-"
    # if context_text contains "-", remove it and the characters after it
    if "-" in context_text:
        text = context_text.split("-", 1)[0].strip()
        text = text.translate(str.maketrans('', '', string.punctuation)).strip()
    else:
        text = context_text.translate(str.maketrans('', '', string.punctuation)).strip()

    return ' '.join(text.split())


def context_prep_2(context_text):
    context_parts = context_text.split(" - ")
    # join the parts after the first part
    text = ' - '.join(context_parts[1:]).strip()

    return text


def title_prep_2(context_text):
    context_parts = context_text.split(" - ")

    return context_parts[0].strip()

In [10]:
docs_title_list = []

valid_docs_content_id = []
invalid_docs_content_id = []

for index, row in data_df.iterrows():
    for c in row['_source.Blocks']:
        if 'type' in c and c['type'] == 'CONTENT_PASAL':
            title_text = title_prep_2(c['context'])
            docs_title_list.append(title_text)
            valid_docs_content_id.append(row['_id'])
            break
        else:
            if row['_id'] not in invalid_docs_content_id:
                invalid_docs_content_id.append(row['_id'])

# remove id in invalid_docs_title_id if exists in valid_docs_title_id
invalid_docs_content_id = [x for x in invalid_docs_content_id if x not in valid_docs_content_id]
invalid_docs_content_id

['uu-10-tahun-1994', 'uu-6-tahun-1966', 'uu-16-tahun-2000']

In [12]:
docs_title_list

['PENGENAAN BEA MASUK ANTI DUMPING TERHADAP IMPOR PRODUK _BIAXIALLY ORIENTED POLYETHYLENE TEREPHTHALATE_ (BOPET) DARI NEGARA INDIA, REPUBLIK RAKYAT TIONGKOK, DAN THAILAND',
 'STANDAR NASIONAL PENDIDIKAN',
 'PERUBAHAN NAMA KABUPATEN YAPEN WAROPEN MENJADI KABUPATEN KEPULAUAN YAPEN PROVINSI PAPUA',
 'PERUBAHAN KEDUA ATAS PERATURAN MENTERI KEUANGAN NOMOR 160/PMK.04/2010 TENTANG NILAI PABEAN UNTUK PENGHITUNGAN BEA MASUK',
 'PERUBAHAN ATAS PERATURAN MENTERI KEUANGAN NOMOR 220/PMK.05/2016 TENTANG SISTEM AKUNTANSI DAN PELAPORAN KEUANGAN BADAN LAYANAN UMUM',
 'PELAKSANAAN LIKUIDASI ENTITAS AKUNTANSI PADA BAGIAN ANGGARAN BENDAHARA UMUM NEGARA',
 'TATA CARA PEMUNGUTAN DAN PENYETORAN PAJAK ROKOK',
 'DANA PENSIUN',
 'TIDAK DIPUNGUT CUKAI',
 'ALOKASI DEFINITIF DANA BAGI HASIL PAJAK PENGHASILAN PASAL 25 DAN PASAL 29 WAJIB PAJAK ORANG PRIBADI DALAM NEGERI DAN PAJAK PENGHASILAN PASAL 21 TAHUN ANGGARAN 2009',
 'PENAMBAHAN PENYERTAAN MODAL NEGARA REPUBLIK INDONESIA KE DALAM MODAL PERUSAHAAN UMUM (PERUM) 

In [11]:
# delete the invalid documents
data_df = data_df[~data_df['_id'].isin(invalid_docs_content_id)]
data_df

Unnamed: 0,_id,_source.Blocks
0,221-pmk.010-2015,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
1,pp-21-tahun-2005,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas..."
2,pp-51-tahun-2008,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
3,62-pmk.04-2018,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
4,42-pmk.05-2017,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
...,...,...
3020,pp-9-tahun-2014,"[{'ref': 'none', 'bab': 'bab-i', 'pasal': 'pas..."
3021,pp-25-tahun-2017,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
3022,160-pmk.04-2010,"[{'ref': 'none', 'bab': 'none', 'pasal': 'pasa..."
3023,146-pmk.05-2019,"[{'content': ' '}, {'content': 'BERITA NEGARA ..."


In [18]:
docs_content_list = []
docs_id_list = []

for index, row in data_df.iterrows():
    doc_content_list = []
    context_temp = []

    for c in row['_source.Blocks']:
        if 'type' in c and c['type'] == 'CONTENT_PASAL':
            title_text = title_prep_2(c['context'])
            context_text = context_prep_2(c['context'])
            
            if context_text not in context_temp:
                doc_content_list.append(f'{context_text} {c["content"]}')
                context_temp.append(context_text)
            else:
                doc_content_list.append(c['content'])

    doc_content_string = ' '.join(doc_content_list)
    docs_content_list.append(doc_content_string)
    docs_id_list.append(row['_id'])

In [19]:
# create dictionary to map the id to the content
id_to_content = dict(zip(docs_id_list, docs_content_list))
id_to_title = dict(zip(docs_id_list, docs_title_list))

# add the content to the dataframe
data_df['title'] = data_df['_id'].map(id_to_title)
data_df['content'] = data_df['_id'].map(id_to_content)

# delete the _source.Blocks column
data_df = data_df.drop(columns='_source.Blocks')
data_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_df['title'] = data_df['_id'].map(id_to_title)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_df['content'] = data_df['_id'].map(id_to_content)


Unnamed: 0,_id,title,content
0,221-pmk.010-2015,PENGENAAN BEA MASUK ANTI DUMPING TERHADAP IMPO...,Terhadapimporproduk _Biaxially_ _Oriented_ _P...
1,pp-21-tahun-2005,STANDAR NASIONAL PENDIDIKAN,KETENTUAN UMUM Dalam Peraturan Pemerintah ini ...
2,pp-51-tahun-2008,PERUBAHAN NAMA KABUPATEN YAPEN WAROPEN MENJADI...,Nama Kabupaten Yapen Waropen sebagai daerah o...
3,62-pmk.04-2018,PERUBAHAN KEDUA ATAS PERATURAN MENTERI KEUANGA...,(1) Metode pengulangan ( _fallback_) sebagaim...
4,42-pmk.05-2017,PERUBAHAN ATAS PERATURAN MENTERI KEUANGAN NOMO...,(1) Laporan Keuangan BLU sebagaimana dimaksud...
...,...,...,...
3020,pp-9-tahun-2014,PELAKSANAAN UNDANG-UNDANG NOMOR 4 TAHUN 2011 T...,KETENTUAN UMUM Dalam Peraturan Pemerintah ini ...
3021,pp-25-tahun-2017,PEMBERIAN TUNJANGAN HARI RAYA DALAM TAHUN ANGG...,Dalam Peraturan Pemerintah ini yang dimaksud ...
3022,160-pmk.04-2010,PELAKSANAAN PENYALURAN DANA ALOKASI KHUSUS TAH...,(1)PenyaluranDanaAlokasiKhusus(DAK)TahunAngga...
3023,146-pmk.05-2019,TARIF LAYANAN BADAN LAYANAN UMUM POLITEKNIK KE...,Tarif layanan Badan Layanan Umum Politeknik K...


In [35]:
invalid_title_id = []

for index, row in data_df.iterrows():
    if len(row['title']) == 0:
        invalid_title_id.append(row['_id'])

invalid_title_id

['pp-24-tahun-2009',
 'pp-61-tahun-2009',
 'uu-8-tahun-2010',
 'pp-5-tahun-2009',
 'pp-14-tahun-2011',
 'uu-17-tahun-2011',
 'pp-48-tahun-2009',
 '257-pmk.02-2010',
 'perpres-117-tahun-2015',
 'perpres-1-tahun-2007',
 'perpres-105-tahun-2015',
 'pp-22-tahun-2008',
 '233-pmk.07-2010',
 'uu-3-tahun-2011',
 'uu-13tahun2003',
 'pp-72-tahun-2009',
 'uu-9-tahun-2016']

In [36]:
# drop the invalid documents
data_df = data_df[~data_df['_id'].isin(invalid_title_id)]
data_df

Unnamed: 0,_id,title,content
0,221-pmk.010-2015,PENGENAAN BEA MASUK ANTI DUMPING TERHADAP IMPO...,Terhadapimporproduk _Biaxially_ _Oriented_ _P...
1,pp-21-tahun-2005,STANDAR NASIONAL PENDIDIKAN,KETENTUAN UMUM Dalam Peraturan Pemerintah ini ...
2,pp-51-tahun-2008,PERUBAHAN NAMA KABUPATEN YAPEN WAROPEN MENJADI...,Nama Kabupaten Yapen Waropen sebagai daerah o...
3,62-pmk.04-2018,PERUBAHAN KEDUA ATAS PERATURAN MENTERI KEUANGA...,(1) Metode pengulangan ( _fallback_) sebagaim...
4,42-pmk.05-2017,PERUBAHAN ATAS PERATURAN MENTERI KEUANGAN NOMO...,(1) Laporan Keuangan BLU sebagaimana dimaksud...
...,...,...,...
3020,pp-9-tahun-2014,PELAKSANAAN UNDANG-UNDANG NOMOR 4 TAHUN 2011 T...,KETENTUAN UMUM Dalam Peraturan Pemerintah ini ...
3021,pp-25-tahun-2017,PEMBERIAN TUNJANGAN HARI RAYA DALAM TAHUN ANGG...,Dalam Peraturan Pemerintah ini yang dimaksud ...
3022,160-pmk.04-2010,PELAKSANAAN PENYALURAN DANA ALOKASI KHUSUS TAH...,(1)PenyaluranDanaAlokasiKhusus(DAK)TahunAngga...
3023,146-pmk.05-2019,TARIF LAYANAN BADAN LAYANAN UMUM POLITEKNIK KE...,Tarif layanan Badan Layanan Umum Politeknik K...


In [17]:
# save the dataframe to csv
data_df.to_csv('data/law_analyzer_new4_title-content.csv', index=False)