## BDEX Data Extraction

### Prereq:
- Python3.7+
- Run on `{BQNT /I:NOBOX <GO>}` and Trusted Kernel.
- Install the library first using:
    - !pip install aiohttp
    - !pip install minio

In [1]:
print("Please specify how many weeks of data you would like to extract:")
############# To manually change input ###################
number_of_weeks = 5
############# To manually change input ###################
print("You've specified "+str(number_of_weeks)+" weeks.")
print("Then please run all cells and find output in the folder bdex_output.")

Please specify how many weeks of data you would like to extract:
You've specified 5 weeks.
Then please run all cells and find output in the folder bdex_output.


### Main:

In [2]:
!pip install aiohttp

Looking in indexes: https://artifactory.inf.bloomberg.com/artifactory/api/pypi/bloomberg-pypi/simple


In [3]:
!pip install minio

Looking in indexes: https://artifactory.inf.bloomberg.com/artifactory/api/pypi/bloomberg-pypi/simple


In [4]:
import copy
from src.bdex_fe import (
    apply_and_filter,
    bdex_search,
    get_tags,
    GetData,
    PreProcessor,
)
from src.bdex_fe.products import bnef

In [5]:
query_tags = get_tags(product="bnef", number_of_weeks=number_of_weeks)
query_tags

['dskt2.AswoNewsStory.week.202229',
 'dskt2.AswoNewsStory.week.202228',
 'dskt2.AswoNewsStory.week.202227',
 'dskt2.AswoNewsStory.week.202226',
 'dskt2.AswoNewsStory.week.202225']

In [6]:
# from sample_data import all_recs
all_recs = await bdex_search(
    query_tags,
    proxy=None  # not needed on (?:nj|ny)lxdev\d or CORP
)

In [7]:
TAG_FILTERS = {  # example
    "and_tags": [  # contains all
#         "dskt2.AswoNewsStory.language.ENGLISH",
#         "dskt2.AswoNewsStory.jflo.indexed.true"
    ],
    "or_tags": [  # contains any
        
    ],
    "no_tags": [  # contains none
        
    ],
    "any_substr": [  # contains in any
#         "AswoNewsStory"
    ],
}
GET_DATA = {
    "doc_needed": False,
    "source_data_needed": False,
    # if any of the above are True, fill these:
    "bcos_account": None,
    "bcos_secret": None,
    "minio_access": None,
    "minio_secret": None
}

functions = []
preproc = PreProcessor(**TAG_FILTERS)
get_data_client = GetData(**GET_DATA)

product_specific_extraction_functions = [bnef.extract_suid, bnef.extract_resolution]


functions = (preproc.functions +
             product_specific_extraction_functions +
             get_data_client.functions +
             [])  # potential tokenisers

In [8]:
recs = copy.deepcopy(all_recs)
for f in functions:
    apply_and_filter(recs, f)

processing extract_tags                 :  14689 records |███████████| 100%[-00:00,  233035.40 records/s] 00:00
processing extract_suid                 :  14689 records |███████████| 100%[-00:00,  341470.36 records/s] 00:00
processing extract_resolution           :  14689 records |███████████| 100%[-00:00,  507371.58 records/s] 00:00


In [9]:
recs[0]

{'tags': ['dskt2.AswoNewsStory.BNEF-1006859.asset_id.NA',
  'dskt2.AswoNewsStory.BNEF-1006859.resolution.Rejected',
  'dskt2.AswoNewsStory.BNEF-1006859.financing_id.NA',
  'dskt2.AswoNewsStory.jflo.indexed.true',
  'dskt2.AswoNewsStory.jflo.week.202229',
  'dskt2.AswoNewsStory.BNEF-1006859.region.EMEA',
  'dskt2.AswoNewsStory.BNEF-1006859.resolution_time.2022-07-19T13:51:15.618Z',
  'dskt2.AswoNewsStory.BNEF-1006859.reason.Not enough information for update',
  'dskt2.AswoNewsStory.jflo.issue.BCE-27895',
  'dskt2.AswoNewsStory.jflo.issue.BNEF-1006859',
  'dskt2.AswoNewsStory.jflo.created.true',
  'dskt2.AswoNewsStory.arrivalTime.2022-07-18T03:38:00.589Z',
  'dskt2.AswoNewsStory.topicClusterId.RF763CT1UM0W==',
  'dskt2.AswoNewsStory.region_score.100',
  'dskt2.AswoNewsStory.suid.RF763CT1UM0W',
  'dskt2.AswoNewsStory.language.ENGLISH',
  'dskt2.AswoNewsStory.region.EMEA',
  'dskt2.AswoNewsStory.class.50',
  'dskt2.AswoNewsStory.niCodes.GDNEFLNG',
  'dskt2.AswoNewsStory.spam_score.97',
  '

### Exporting as a Table via Reverse Engineering

In [10]:
import pandas as pd
from datetime import date
import re

today = date.today()

def tags_list_to_dict(cell):
    tags_dict = dict()
    issue_info_list = [] # to store info starting with 'dskt2.AswoNewsStory.BNEF-xxxxxx.'
    for i in cell:
        #jflo related tags
        if i.startswith('dskt2.AswoNewsStory.jflo.indexed.'):
            tags_dict['jflo.indexed'] = i.partition('dskt2.AswoNewsStory.jflo.indexed.')[2]
        if i.startswith('dskt2.AswoNewsStory.jflo.created.'):
            tags_dict['jflo.created'] = i.partition('dskt2.AswoNewsStory.jflo.created.')[2]  
        if i.startswith('dskt2.AswoNewsStory.jflo.week.'):
            tags_dict['jflo.week'] = i.partition('dskt2.AswoNewsStory.jflo.week.')[2] 
            
        # non-archived
        if i.startswith('dskt2.AswoNewsStory.jflo.issue.'):
            # there might be multiple issues per source, recording the existence of such
            jflo_id = i.partition('dskt2.AswoNewsStory.jflo.issue.')[2]  
            if 'jflo.issue' in tags_dict.keys():
                if str(tags_dict['jflo.issue']) != str(jflo_id):
                    tags_dict['multiple_issue'] = "Yes"
                    tags_dict['jflo.issue'] = jflo_id # regard the one with 'jflo.issue' prefix as the dominant one
            else:
                tags_dict['jflo.issue'] = jflo_id
        # archived: here we take the first BNEF id in the string as 'jflo.issue'
        if i.startswith('dskt2.AswoNewsStory.BNEF-'):
            m = re.search('dskt2.AswoNewsStory.BNEF-(.+?)\.', i)
            if m:
                jflo_id = 'BNEF-'+m.group(1)            
                # there might be multiple issues per source, recording the existence of such
                if 'jflo.issue' in tags_dict.keys():
                    if str(tags_dict['jflo.issue']) != str(jflo_id):
                        tags_dict['multiple_issue'] = "Yes"
                else:
                    tags_dict['jflo.issue'] = jflo_id  
                    
        #source related tags
        if i.startswith('dskt2.AswoNewsStory.wireId.'):
            tags_dict['wireId'] = i.partition('dskt2.AswoNewsStory.wireId.')[2]
        if i.startswith('dskt2.AswoNewsStory.wireCode.'):
            tags_dict['wireCode'] = i.partition('dskt2.AswoNewsStory.wireCode.')[2]        
        if i.startswith('dskt2.AswoNewsStory.class.'):
            tags_dict['class'] = i.partition('dskt2.AswoNewsStory.class.')[2]
        if i.startswith('dskt2.AswoNewsStory.suid.'):
            tags_dict['tags_suid'] = i.partition('dskt2.AswoNewsStory.suid.')[2]
        if i.startswith('dskt2.AswoNewsStory.headline.'):
            tags_dict['headline'] = i.partition('dskt2.AswoNewsStory.headline.')[2]        
        if i.startswith('dskt2.AswoNewsStory.topicClusterId.'):
            tags_dict['topicClusterId'] = i.partition('dskt2.AswoNewsStory.topicClusterId.')[2]     
        if i.startswith('dskt2.AswoNewsStory.niCodes.'):
            tags_dict['niCodes'] = i.partition('dskt2.AswoNewsStory.niCodes.')[2]          
        if i.startswith('dskt2.AswoNewsStory.language.'):
            tags_dict['language'] = i.partition('dskt2.AswoNewsStory.language.')[2]           
        if i.startswith('dskt2.AswoNewsStory.arrivalTime.'):
            tags_dict['arrivalTime'] = i.partition('dskt2.AswoNewsStory.arrivalTime.')[2]  
        #other info
        if i.startswith('dskt2.AswoNewsStory.region_score.'):
            tags_dict['region_score'] = i.partition('dskt2.AswoNewsStory.region_score.')[2]    
        if i.startswith('dskt2.AswoNewsStory.region.'):
            tags_dict['region'] = i.partition('dskt2.AswoNewsStory.region.')[2]          
        if i.startswith('dskt2.AswoNewsStory.spam_score.'):
            tags_dict['spam_score'] = i.partition('dskt2.AswoNewsStory.spam_score.')[2] 
        if i.startswith('dskt2.AswoNewsStory.week.'):
            tags_dict['week'] = i.partition('dskt2.AswoNewsStory.week.')[2]   
        #currently not including details related to individual jflo issues that were clones
        #individual issue info
        if i.startswith('dskt2.AswoNewsStory.BNEF-'):
            issue_info_list.append(i.partition('dskt2.AswoNewsStory.')[2])    
    #iterate to record key JFLO issue details into dictionary
    if 'jflo.issue' in tags_dict:
        prefix = tags_dict['jflo.issue'] 
        for i in issue_info_list:
            if i.startswith(prefix + '.region.'):
                tags_dict['jflo_region'] = i.partition(prefix + '.region.')[2]   
            if i.startswith(prefix + '.asset_id.'):
                tags_dict['jflo_asset_id'] = i.partition(prefix + '.asset_id.')[2]             
            if i.startswith(prefix + '.resolution_time.'):
                tags_dict['jflo_resolution_time'] = i.partition(prefix + '.resolution_time.')[2]             
            if i.startswith(prefix + '.reason.'):
                tags_dict['jflo_reason'] = i.partition(prefix + '.reason.')[2]               
            if i.startswith(prefix + '.financing_id.'):
                tags_dict['jflo_financing_id'] = i.partition(prefix + '.financing_id.')[2]    
            if i.startswith(prefix + '.resolution.'):
                tags_dict['jflo_resolution'] = i.partition(prefix + '.resolution.')[2]  
    return tags_dict

In [11]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [12]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

comb = pd.DataFrame()
for r in recs:
    temp = pd.DataFrame(list(r.items())).set_index(0).transpose()
    comb = comb.append(temp)
#unstack tags content into multiple columns    
res = comb['tags'].apply(tags_list_to_dict).apply(pd.Series)
res = pd.concat([comb.drop(['tags'], axis=1), res], axis=1)
res.head()

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,arrivalTime,topicClusterId,region_score,tags_suid,language,region,class,niCodes,spam_score,wireId,headline,wireCode,week,jflo_asset_id,jflo_resolution,jflo_financing_id,jflo_region,jflo_resolution_time,jflo_reason
1,9f1836cc23c55a0506bf297aabebd77b9cc5abf30c9de2...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:38:02.205000+00:00,RF763CT1UM0W,Rejected,BNEF-1006859,True,202229,Yes,True,2022-07-18T03:38:00.589Z,RF763CT1UM0W==,100,RF763CT1UM0W,ENGLISH,EMEA,50,GDNEFLNG,97,1172,Qatar Economy Research & Analysis 2022,OBG,202229,,Rejected,,EMEA,2022-07-19T13:51:15.618Z,Not enough information for update
1,c5c767c02a31e508ce4ebfd7d0838ae0c53a9c93602391...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T11:41:55.121000+00:00,RF9N5UMB2SJL,Rejected,BNEF-1008234,True,202229,Yes,True,2022-07-19T11:41:54.358Z,RF9N5UMB2SJL,100,RF9N5UMB2SJL,ENGLISH,EMEA,224,GDNEFWIND,72,353,Energy Voice: UK government approves 8GW of of...,WE3,202229,,Rejected,,EMEA,2022-07-19T12:00:17.041Z,Record already up-to-date
1,12f3d565785b6433abc804e44c396eb309d5f8fc204269...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:30:02.009000+00:00,RF75PYMB2SJO,Rejected,BNEF-1006847,True,202229,Yes,True,2022-07-18T03:29:58.999Z,RF75PYMB2SJO,false,RF75PYMB2SJO,CHINESE_SIMP,false,50712,GDNEFWIND,false,1811,同花顺财经: 明阳智能：公司的风电制造业对外销售的产品以风电整机及零部件为主,NS6,202229,,Rejected,,APAC,2022-07-18T08:13:36.104Z,Irrelevant Content
1,ada666cdb765178e476c1d7c6cf302516c40d565c74f76...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:16:58.210000+00:00,RF82G9MB2SKA,Rejected,BNEF-1007360,True,202229,Yes,True,2022-07-18T15:16:57.800Z,RF7GSDMB2SJV,false,RF82G9MB2SKA,PORTUGUESE,false,12184,GDNEFWIND,false,1810,Jornal Madeira: Iberdola anuncia mais 1.500 mi...,NS5,202229,,Rejected,,EMEA,2022-07-18T15:49:31.676Z,Record already up-to-date
1,5d88ba52fe08480276fb1dcb2846dfb7f868b6ab4acee9...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T08:47:33.496000+00:00,RF9F39MB2SJK,Rejected,BCE-28586,True,202229,Yes,True,2022-07-19T08:47:33.090Z,RF9CZMMB2SJR,false,RF9F39MB2SJK,SPANISH,false,10341,GDNEFWIND,false,1810,Bolsamania: Economía.- Siemens Gamesa suminist...,NS5,202229,,,,,,


In [13]:
# to avoid Excel's limit of 65,530 URLS per worksheet
writer = pd.ExcelWriter('bdex_output/res-'+str(today)+'.xlsx', engine='xlsxwriter',options={'strings_to_urls': False})
res.to_excel(writer)
writer.close()

In [14]:
#filter jflo_Resolution = "Rejected"
newRes=res[res['jflo_resolution']=='Rejected']
newRes

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,arrivalTime,topicClusterId,region_score,tags_suid,language,region,class,niCodes,spam_score,wireId,headline,wireCode,week,jflo_asset_id,jflo_resolution,jflo_financing_id,jflo_region,jflo_resolution_time,jflo_reason
1,9f1836cc23c55a0506bf297aabebd77b9cc5abf30c9de2...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:38:02.205000+00:00,RF763CT1UM0W,Rejected,BNEF-1006859,true,202229,Yes,true,2022-07-18T03:38:00.589Z,RF763CT1UM0W==,100,RF763CT1UM0W,ENGLISH,EMEA,50,GDNEFLNG,97,1172,Qatar Economy Research & Analysis 2022,OBG,202229,,Rejected,,EMEA,2022-07-19T13:51:15.618Z,Not enough information for update
1,c5c767c02a31e508ce4ebfd7d0838ae0c53a9c93602391...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T11:41:55.121000+00:00,RF9N5UMB2SJL,Rejected,BNEF-1008234,true,202229,Yes,true,2022-07-19T11:41:54.358Z,RF9N5UMB2SJL,100,RF9N5UMB2SJL,ENGLISH,EMEA,224,GDNEFWIND,72,353,Energy Voice: UK government approves 8GW of of...,WE3,202229,,Rejected,,EMEA,2022-07-19T12:00:17.041Z,Record already up-to-date
1,12f3d565785b6433abc804e44c396eb309d5f8fc204269...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:30:02.009000+00:00,RF75PYMB2SJO,Rejected,BNEF-1006847,true,202229,Yes,true,2022-07-18T03:29:58.999Z,RF75PYMB2SJO,false,RF75PYMB2SJO,CHINESE_SIMP,false,50712,GDNEFWIND,false,1811,同花顺财经: 明阳智能：公司的风电制造业对外销售的产品以风电整机及零部件为主,NS6,202229,,Rejected,,APAC,2022-07-18T08:13:36.104Z,Irrelevant Content
1,ada666cdb765178e476c1d7c6cf302516c40d565c74f76...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:16:58.210000+00:00,RF82G9MB2SKA,Rejected,BNEF-1007360,true,202229,Yes,true,2022-07-18T15:16:57.800Z,RF7GSDMB2SJV,false,RF82G9MB2SKA,PORTUGUESE,false,12184,GDNEFWIND,false,1810,Jornal Madeira: Iberdola anuncia mais 1.500 mi...,NS5,202229,,Rejected,,EMEA,2022-07-18T15:49:31.676Z,Record already up-to-date
1,7b2fbcdd14a04c398653dcc72c59690afdf10658ae9a1f...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:47:35.548000+00:00,RF83VAMB2SJO,Rejected,BNEF-1007384,true,202229,Yes,true,2022-07-18T15:47:34.752Z,WS8i6gWCrvEdQP6Hs+8QLBw==,100,RF83VAMB2SJO,ENGLISH,AMER,16514,GDNEFWIND,15,1806,Energy Central: Vestas Secures 266 MW Order in...,NS1,202229,,Rejected,,AMER,2022-07-19T12:38:26.260Z,Record already up-to-date
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,13dba31da5da0192161f12186368dd5ab1a9c202cd171d...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T07:42:00.291000+00:00,RDTHDXTVI5MP,Rejected,BNEF-987793,true,202226,,true,2022-06-21T07:41:57.192Z,RDSWIRTP3SHU,false,RDTHDXTVI5MP,CHINESE_SIMP,false,3,GDNEFSOLAR,false,3306,媒体滚动: 北京丰台站改扩建历程,SNA,202225,,Rejected,,APAC,2022-06-28T08:32:00.518Z,Duplicate work item
1,5630f29ae1a6458d3a9811d3b6855d041f4a4a9f45a761...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-25T02:45:21.139000+00:00,RE0IBJMB2SJQ,Rejected,BNEF-990913,true,202226,,true,2022-06-25T02:45:19.941Z,RDW06HMB2SK6,100,RE0IBJMB2SJQ,ENGLISH,APAC,50760,GDNEFSOLAR,12,1811,Realty Plus: Delhi Airport Is India’s First Ai...,NS6,202225,,Rejected,,APAC,2022-06-30T09:36:14.067Z,Not enough information for update
1,3cd952f355f6fccfd3be06f866350e90ef0c635fc4d450...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-23T12:19:57.710000+00:00,RDXJL8MB2SK7,Rejected,BNEF-989629,true,202225,,true,2022-06-23T12:19:56.841Z,WspiWKr0qb98xDIEaWyhMXQ==,100,RDXJL8MB2SK7,ENGLISH,AMER,11496,GDNEFSOLAR,8,1808,Energy Live News: CS Energy awarded three util...,NS3,202225,,Rejected,,AMER,2022-06-23T13:38:16.124Z,Record already up-to-date
1,5cd4a7e0ae554a383c9e8d41a6093cb030960164e21291...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T11:48:50.927000+00:00,RDTSTBMB2SKA,Rejected,BNEF-988063,true,202225,Yes,true,2022-06-21T11:48:47.733Z,RDTSTBMB2SKA,false,RDTSTBMB2SKA,CHINESE_SIMP,false,11639,GDNEFSOLAR,false,1811,大楚网: 阳光也能卖钱？《十堰日报》头条聚焦郧西这一产业！,NS6,202225,,Rejected,,APAC,2022-06-22T09:20:34.634Z,Record already up-to-date


In [15]:
#find duplicate of combinations (class+wireId) from rejected 
newRes=newRes[newRes.duplicated(['class','wireId'], keep=False)]
newRes

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,arrivalTime,topicClusterId,region_score,tags_suid,language,region,class,niCodes,spam_score,wireId,headline,wireCode,week,jflo_asset_id,jflo_resolution,jflo_financing_id,jflo_region,jflo_resolution_time,jflo_reason
1,9f1836cc23c55a0506bf297aabebd77b9cc5abf30c9de2...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:38:02.205000+00:00,RF763CT1UM0W,Rejected,BNEF-1006859,true,202229,Yes,true,2022-07-18T03:38:00.589Z,RF763CT1UM0W==,100,RF763CT1UM0W,ENGLISH,EMEA,50,GDNEFLNG,97,1172,Qatar Economy Research & Analysis 2022,OBG,202229,,Rejected,,EMEA,2022-07-19T13:51:15.618Z,Not enough information for update
1,c5c767c02a31e508ce4ebfd7d0838ae0c53a9c93602391...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T11:41:55.121000+00:00,RF9N5UMB2SJL,Rejected,BNEF-1008234,true,202229,Yes,true,2022-07-19T11:41:54.358Z,RF9N5UMB2SJL,100,RF9N5UMB2SJL,ENGLISH,EMEA,224,GDNEFWIND,72,353,Energy Voice: UK government approves 8GW of of...,WE3,202229,,Rejected,,EMEA,2022-07-19T12:00:17.041Z,Record already up-to-date
1,12f3d565785b6433abc804e44c396eb309d5f8fc204269...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:30:02.009000+00:00,RF75PYMB2SJO,Rejected,BNEF-1006847,true,202229,Yes,true,2022-07-18T03:29:58.999Z,RF75PYMB2SJO,false,RF75PYMB2SJO,CHINESE_SIMP,false,50712,GDNEFWIND,false,1811,同花顺财经: 明阳智能：公司的风电制造业对外销售的产品以风电整机及零部件为主,NS6,202229,,Rejected,,APAC,2022-07-18T08:13:36.104Z,Irrelevant Content
1,7b2fbcdd14a04c398653dcc72c59690afdf10658ae9a1f...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:47:35.548000+00:00,RF83VAMB2SJO,Rejected,BNEF-1007384,true,202229,Yes,true,2022-07-18T15:47:34.752Z,WS8i6gWCrvEdQP6Hs+8QLBw==,100,RF83VAMB2SJO,ENGLISH,AMER,16514,GDNEFWIND,15,1806,Energy Central: Vestas Secures 266 MW Order in...,NS1,202229,,Rejected,,AMER,2022-07-19T12:38:26.260Z,Record already up-to-date
1,002b48515db847c4d511911becfed4fe7fdd2cfc0ed18e...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T05:46:17.588000+00:00,RF96P4MB2SJT,Rejected,BNEF-1007915,true,202229,Yes,true,2022-07-19T05:46:16.679Z,RF7IHEMB2SK1,100,RF96P4MB2SJT,ENGLISH,EMEA,18689,GDNEFSOLAR,5,1806,PV Magazine: Nofar Energy continues to expand ...,NS1,202229,,Rejected,,EMEA,2022-07-19T08:54:05.453Z,Record already up-to-date
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,b038246f94cc53ba8d1741be7aec335e95f201cc1af2d9...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T08:00:26.636000+00:00,RDTI8P0799MO,Rejected,BNEF-987811,true,202225,,true,2022-06-21T08:00:25.417Z,TBkh1VCRSudYIz/6KO7Slgg==,false,RDTI8P0799MP,ENGLISH,false,301,GDNEFSOLAR,67,2546,PRESS RELEASE: hep on track for 2022: around 8...,DJ,202225,,Rejected,,EMEA,2022-06-21T10:07:10.619Z,Duplicate work item
1,13dba31da5da0192161f12186368dd5ab1a9c202cd171d...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T07:42:00.291000+00:00,RDTHDXTVI5MP,Rejected,BNEF-987793,true,202226,,true,2022-06-21T07:41:57.192Z,RDSWIRTP3SHU,false,RDTHDXTVI5MP,CHINESE_SIMP,false,3,GDNEFSOLAR,false,3306,媒体滚动: 北京丰台站改扩建历程,SNA,202225,,Rejected,,APAC,2022-06-28T08:32:00.518Z,Duplicate work item
1,3cd952f355f6fccfd3be06f866350e90ef0c635fc4d450...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-23T12:19:57.710000+00:00,RDXJL8MB2SK7,Rejected,BNEF-989629,true,202225,,true,2022-06-23T12:19:56.841Z,WspiWKr0qb98xDIEaWyhMXQ==,100,RDXJL8MB2SK7,ENGLISH,AMER,11496,GDNEFSOLAR,8,1808,Energy Live News: CS Energy awarded three util...,NS3,202225,,Rejected,,AMER,2022-06-23T13:38:16.124Z,Record already up-to-date
1,5cd4a7e0ae554a383c9e8d41a6093cb030960164e21291...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T11:48:50.927000+00:00,RDTSTBMB2SKA,Rejected,BNEF-988063,true,202225,Yes,true,2022-06-21T11:48:47.733Z,RDTSTBMB2SKA,false,RDTSTBMB2SKA,CHINESE_SIMP,false,11639,GDNEFSOLAR,false,1811,大楚网: 阳光也能卖钱？《十堰日报》头条聚焦郧西这一产业！,NS6,202225,,Rejected,,APAC,2022-06-22T09:20:34.634Z,Record already up-to-date


In [16]:
#newRes=newRes.drop_duplicates(subset=['class', 'wireId'])
#newRes

In [17]:
#export to excel
newRes.to_csv(r'C:\Users\jlee3524\Desktop\newRes.csv', index=False)