## BDEX Data Extraction

### Prereq:
- Python3.7+
- Run on `{BQNT /I:NOBOX <GO>}` and Trusted Kernel.
- Install the library first using:
    - !pip install aiohttp
    - !pip install minio

In [1]:
print("Please specify how many weeks of data you would like to extract:")
############# To manually change input ###################
number_of_weeks = 5
############# To manually change input ###################
print("You've specified "+str(number_of_weeks)+" weeks.")
print("Then please run all cells and find output in the folder bdex_output.")

Please specify how many weeks of data you would like to extract:
You've specified 5 weeks.
Then please run all cells and find output in the folder bdex_output.


### Main:

In [2]:
!pip install aiohttp

Looking in indexes: https://artifactory.inf.bloomberg.com/artifactory/api/pypi/bloomberg-pypi/simple


In [3]:
!pip install minio

Looking in indexes: https://artifactory.inf.bloomberg.com/artifactory/api/pypi/bloomberg-pypi/simple


In [4]:
import copy
from src.bdex_fe import (
    apply_and_filter,
    bdex_search,
    get_tags,
    GetData,
    PreProcessor,
)
from src.bdex_fe.products import bnef

In [5]:
query_tags = get_tags(product="bnef", number_of_weeks=number_of_weeks)
query_tags

['dskt2.AswoNewsStory.week.202229',
 'dskt2.AswoNewsStory.week.202228',
 'dskt2.AswoNewsStory.week.202227',
 'dskt2.AswoNewsStory.week.202226',
 'dskt2.AswoNewsStory.week.202225']

In [6]:
# from sample_data import all_recs
all_recs = await bdex_search(
    query_tags,
    proxy=None  # not needed on (?:nj|ny)lxdev\d or CORP
)

In [7]:
TAG_FILTERS = {  # example
    "and_tags": [  # contains all
#         "dskt2.AswoNewsStory.language.ENGLISH",
#         "dskt2.AswoNewsStory.jflo.indexed.true"
    ],
    "or_tags": [  # contains any
        
    ],
    "no_tags": [  # contains none
        
    ],
    "any_substr": [  # contains in any
#         "AswoNewsStory"
    ],
}
GET_DATA = {
    "doc_needed": False,
    "source_data_needed": False,
    # if any of the above are True, fill these:
    "bcos_account": None,
    "bcos_secret": None,
    "minio_access": None,
    "minio_secret": None
}

functions = []
preproc = PreProcessor(**TAG_FILTERS)
get_data_client = GetData(**GET_DATA)

product_specific_extraction_functions = [bnef.extract_suid, bnef.extract_resolution]


functions = (preproc.functions +
             product_specific_extraction_functions +
             get_data_client.functions +
             [])  # potential tokenisers

In [8]:
recs = copy.deepcopy(all_recs)
for f in functions:
    apply_and_filter(recs, f)

processing extract_tags                 :  14626 records |███████████| 100%[-00:00,  215659.63 records/s] 00:00
processing extract_suid                 :  14626 records |███████████| 100%[-00:00,  379390.28 records/s] 00:00
processing extract_resolution           :  14626 records |███████████| 100%[-00:00,  472837.14 records/s] 00:00


In [9]:
recs[0]

{'tags': ['dskt2.AswoNewsStory.BNEF-1008234.region.EMEA',
  'dskt2.AswoNewsStory.BNEF-1008234.reason.Record already up-to-date',
  'dskt2.AswoNewsStory.BNEF-1008234.resolution_time.2022-07-19T12:00:17.041Z',
  'dskt2.AswoNewsStory.jflo.indexed.true',
  'dskt2.AswoNewsStory.BNEF-1008234.financing_id.NA',
  'dskt2.AswoNewsStory.jflo.week.202229',
  'dskt2.AswoNewsStory.BNEF-1008234.resolution.Rejected',
  'dskt2.AswoNewsStory.BNEF-1008234.asset_id.NA',
  'dskt2.AswoNewsStory.jflo.issue.BCE-28682',
  'dskt2.AswoNewsStory.jflo.issue.BNEF-1008234',
  'dskt2.AswoNewsStory.jflo.created.true',
  'dskt2.AswoNewsStory.headline.Energy Voice: UK government approves 8GW of offshore wind from fourth leasing round',
  'dskt2.AswoNewsStory.wireCode.WE3',
  'dskt2.AswoNewsStory.region_score.100',
  'dskt2.AswoNewsStory.topicClusterId.RF9N5UMB2SJL',
  'dskt2.AswoNewsStory.class.224',
  'dskt2.AswoNewsStory.language.ENGLISH',
  'dskt2.AswoNewsStory.wireId.353',
  'dskt2.AswoNewsStory.arrivalTime.2022-07-

### Exporting as a Table via Reverse Engineering

In [10]:
import pandas as pd
from datetime import date
import re

today = date.today()

def tags_list_to_dict(cell):
    tags_dict = dict()
    issue_info_list = [] # to store info starting with 'dskt2.AswoNewsStory.BNEF-xxxxxx.'
    for i in cell:
        #jflo related tags
        if i.startswith('dskt2.AswoNewsStory.jflo.indexed.'):
            tags_dict['jflo.indexed'] = i.partition('dskt2.AswoNewsStory.jflo.indexed.')[2]
        if i.startswith('dskt2.AswoNewsStory.jflo.created.'):
            tags_dict['jflo.created'] = i.partition('dskt2.AswoNewsStory.jflo.created.')[2]  
        if i.startswith('dskt2.AswoNewsStory.jflo.week.'):
            tags_dict['jflo.week'] = i.partition('dskt2.AswoNewsStory.jflo.week.')[2] 
            
        # non-archived
        if i.startswith('dskt2.AswoNewsStory.jflo.issue.'):
            # there might be multiple issues per source, recording the existence of such
            jflo_id = i.partition('dskt2.AswoNewsStory.jflo.issue.')[2]  
            if 'jflo.issue' in tags_dict.keys():
                if str(tags_dict['jflo.issue']) != str(jflo_id):
                    tags_dict['multiple_issue'] = "Yes"
                    tags_dict['jflo.issue'] = jflo_id # regard the one with 'jflo.issue' prefix as the dominant one
            else:
                tags_dict['jflo.issue'] = jflo_id
        # archived: here we take the first BNEF id in the string as 'jflo.issue'
        if i.startswith('dskt2.AswoNewsStory.BNEF-'):
            m = re.search('dskt2.AswoNewsStory.BNEF-(.+?)\.', i)
            if m:
                jflo_id = 'BNEF-'+m.group(1)            
                # there might be multiple issues per source, recording the existence of such
                if 'jflo.issue' in tags_dict.keys():
                    if str(tags_dict['jflo.issue']) != str(jflo_id):
                        tags_dict['multiple_issue'] = "Yes"
                else:
                    tags_dict['jflo.issue'] = jflo_id  
                    
        #source related tags
        if i.startswith('dskt2.AswoNewsStory.wireId.'):
            tags_dict['wireId'] = i.partition('dskt2.AswoNewsStory.wireId.')[2]
        if i.startswith('dskt2.AswoNewsStory.wireCode.'):
            tags_dict['wireCode'] = i.partition('dskt2.AswoNewsStory.wireCode.')[2]        
        if i.startswith('dskt2.AswoNewsStory.class.'):
            tags_dict['class'] = i.partition('dskt2.AswoNewsStory.class.')[2]
        if i.startswith('dskt2.AswoNewsStory.suid.'):
            tags_dict['tags_suid'] = i.partition('dskt2.AswoNewsStory.suid.')[2]
        if i.startswith('dskt2.AswoNewsStory.headline.'):
            tags_dict['headline'] = i.partition('dskt2.AswoNewsStory.headline.')[2]        
        if i.startswith('dskt2.AswoNewsStory.topicClusterId.'):
            tags_dict['topicClusterId'] = i.partition('dskt2.AswoNewsStory.topicClusterId.')[2]     
        if i.startswith('dskt2.AswoNewsStory.niCodes.'):
            tags_dict['niCodes'] = i.partition('dskt2.AswoNewsStory.niCodes.')[2]          
        if i.startswith('dskt2.AswoNewsStory.language.'):
            tags_dict['language'] = i.partition('dskt2.AswoNewsStory.language.')[2]           
        if i.startswith('dskt2.AswoNewsStory.arrivalTime.'):
            tags_dict['arrivalTime'] = i.partition('dskt2.AswoNewsStory.arrivalTime.')[2]  
        #other info
        if i.startswith('dskt2.AswoNewsStory.region_score.'):
            tags_dict['region_score'] = i.partition('dskt2.AswoNewsStory.region_score.')[2]    
        if i.startswith('dskt2.AswoNewsStory.region.'):
            tags_dict['region'] = i.partition('dskt2.AswoNewsStory.region.')[2]          
        if i.startswith('dskt2.AswoNewsStory.spam_score.'):
            tags_dict['spam_score'] = i.partition('dskt2.AswoNewsStory.spam_score.')[2] 
        if i.startswith('dskt2.AswoNewsStory.week.'):
            tags_dict['week'] = i.partition('dskt2.AswoNewsStory.week.')[2]   
        #currently not including details related to individual jflo issues that were clones
        #individual issue info
        if i.startswith('dskt2.AswoNewsStory.BNEF-'):
            issue_info_list.append(i.partition('dskt2.AswoNewsStory.')[2])    
    #iterate to record key JFLO issue details into dictionary
    if 'jflo.issue' in tags_dict:
        prefix = tags_dict['jflo.issue'] 
        for i in issue_info_list:
            if i.startswith(prefix + '.region.'):
                tags_dict['jflo_region'] = i.partition(prefix + '.region.')[2]   
            if i.startswith(prefix + '.asset_id.'):
                tags_dict['jflo_asset_id'] = i.partition(prefix + '.asset_id.')[2]             
            if i.startswith(prefix + '.resolution_time.'):
                tags_dict['jflo_resolution_time'] = i.partition(prefix + '.resolution_time.')[2]             
            if i.startswith(prefix + '.reason.'):
                tags_dict['jflo_reason'] = i.partition(prefix + '.reason.')[2]               
            if i.startswith(prefix + '.financing_id.'):
                tags_dict['jflo_financing_id'] = i.partition(prefix + '.financing_id.')[2]    
            if i.startswith(prefix + '.resolution.'):
                tags_dict['jflo_resolution'] = i.partition(prefix + '.resolution.')[2]  
    return tags_dict

In [11]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [12]:
comb = pd.DataFrame()
for r in recs:
    temp = pd.DataFrame(list(r.items())).set_index(0).transpose()
    comb = comb.append(temp)
#unstack tags content into multiple columns    
res = comb['tags'].apply(tags_list_to_dict).apply(pd.Series)
res = pd.concat([comb.drop(['tags'], axis=1), res], axis=1)
res.head()

  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
  comb = comb.append(temp)
 

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,headline,wireCode,region_score,topicClusterId,class,language,wireId,arrivalTime,niCodes,tags_suid,region,spam_score,week,jflo_region,jflo_reason,jflo_resolution_time,jflo_financing_id,jflo_resolution,jflo_asset_id
1,c5c767c02a31e508ce4ebfd7d0838ae0c53a9c93602391...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T11:41:55.121000+00:00,RF9N5UMB2SJL,Rejected,BNEF-1008234,True,202229,Yes,True,Energy Voice: UK government approves 8GW of of...,WE3,100,RF9N5UMB2SJL,224,ENGLISH,353,2022-07-19T11:41:54.358Z,GDNEFWIND,RF9N5UMB2SJL,EMEA,72,202229,EMEA,Record already up-to-date,2022-07-19T12:00:17.041Z,,Rejected,
1,12f3d565785b6433abc804e44c396eb309d5f8fc204269...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:30:02.009000+00:00,RF75PYMB2SJO,Rejected,BNEF-1006847,True,202229,Yes,True,同花顺财经: 明阳智能：公司的风电制造业对外销售的产品以风电整机及零部件为主,NS6,false,RF75PYMB2SJO,50712,CHINESE_SIMP,1811,2022-07-18T03:29:58.999Z,GDNEFWIND,RF75PYMB2SJO,false,false,202229,APAC,Irrelevant Content,2022-07-18T08:13:36.104Z,,Rejected,
1,ada666cdb765178e476c1d7c6cf302516c40d565c74f76...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:16:58.210000+00:00,RF82G9MB2SKA,Rejected,BNEF-1007360,True,202229,Yes,True,Jornal Madeira: Iberdola anuncia mais 1.500 mi...,NS5,false,RF7GSDMB2SJV,12184,PORTUGUESE,1810,2022-07-18T15:16:57.800Z,GDNEFWIND,RF82G9MB2SKA,false,false,202229,EMEA,Record already up-to-date,2022-07-18T15:49:31.676Z,,Rejected,
1,5d88ba52fe08480276fb1dcb2846dfb7f868b6ab4acee9...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T08:47:33.496000+00:00,RF9F39MB2SJK,Rejected,BCE-28586,True,202229,Yes,True,Bolsamania: Economía.- Siemens Gamesa suminist...,NS5,false,RF9CZMMB2SJR,10341,SPANISH,1810,2022-07-19T08:47:33.090Z,GDNEFWIND,RF9F39MB2SJK,false,false,202229,,,,,,
1,7b2fbcdd14a04c398653dcc72c59690afdf10658ae9a1f...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:47:35.548000+00:00,RF83VAMB2SJO,Rejected,BNEF-1007384,True,202229,Yes,True,Energy Central: Vestas Secures 266 MW Order in...,NS1,100,WS8i6gWCrvEdQP6Hs+8QLBw==,16514,ENGLISH,1806,2022-07-18T15:47:34.752Z,GDNEFWIND,RF83VAMB2SJO,AMER,15,202229,AMER,Record already up-to-date,2022-07-19T12:38:26.260Z,,Rejected,


In [13]:
# to avoid Excel's limit of 65,530 URLS per worksheet
writer = pd.ExcelWriter('bdex_output/res-'+str(today)+'.xlsx', engine='xlsxwriter',options={'strings_to_urls': False})
res.to_excel(writer)
writer.close()

  writer = pd.ExcelWriter('bdex_output/res-'+str(today)+'.xlsx', engine='xlsxwriter',options={'strings_to_urls': False})


In [14]:
#filter jflo_Resolution = "Rejected"
newRes=res[res['jflo_resolution']=='Rejected']
newRes

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,headline,wireCode,region_score,topicClusterId,class,language,wireId,arrivalTime,niCodes,tags_suid,region,spam_score,week,jflo_region,jflo_reason,jflo_resolution_time,jflo_financing_id,jflo_resolution,jflo_asset_id
1,c5c767c02a31e508ce4ebfd7d0838ae0c53a9c93602391...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T11:41:55.121000+00:00,RF9N5UMB2SJL,Rejected,BNEF-1008234,true,202229,Yes,true,Energy Voice: UK government approves 8GW of of...,WE3,100,RF9N5UMB2SJL,224,ENGLISH,353,2022-07-19T11:41:54.358Z,GDNEFWIND,RF9N5UMB2SJL,EMEA,72,202229,EMEA,Record already up-to-date,2022-07-19T12:00:17.041Z,,Rejected,
1,12f3d565785b6433abc804e44c396eb309d5f8fc204269...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:30:02.009000+00:00,RF75PYMB2SJO,Rejected,BNEF-1006847,true,202229,Yes,true,同花顺财经: 明阳智能：公司的风电制造业对外销售的产品以风电整机及零部件为主,NS6,false,RF75PYMB2SJO,50712,CHINESE_SIMP,1811,2022-07-18T03:29:58.999Z,GDNEFWIND,RF75PYMB2SJO,false,false,202229,APAC,Irrelevant Content,2022-07-18T08:13:36.104Z,,Rejected,
1,ada666cdb765178e476c1d7c6cf302516c40d565c74f76...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:16:58.210000+00:00,RF82G9MB2SKA,Rejected,BNEF-1007360,true,202229,Yes,true,Jornal Madeira: Iberdola anuncia mais 1.500 mi...,NS5,false,RF7GSDMB2SJV,12184,PORTUGUESE,1810,2022-07-18T15:16:57.800Z,GDNEFWIND,RF82G9MB2SKA,false,false,202229,EMEA,Record already up-to-date,2022-07-18T15:49:31.676Z,,Rejected,
1,7b2fbcdd14a04c398653dcc72c59690afdf10658ae9a1f...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:47:35.548000+00:00,RF83VAMB2SJO,Rejected,BNEF-1007384,true,202229,Yes,true,Energy Central: Vestas Secures 266 MW Order in...,NS1,100,WS8i6gWCrvEdQP6Hs+8QLBw==,16514,ENGLISH,1806,2022-07-18T15:47:34.752Z,GDNEFWIND,RF83VAMB2SJO,AMER,15,202229,AMER,Record already up-to-date,2022-07-19T12:38:26.260Z,,Rejected,
1,115955f4577cdb8917d8605032d32be114ab9e15e4cc03...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T14:39:14.587000+00:00,RF80PDMB2SJV,Rejected,BNEF-1007339,true,202229,Yes,true,ConstructionDive: Bureau of Land Management OK...,NS1,100,RF3C9ONL9WXS,50800,ENGLISH,1806,2022-07-18T14:39:13.683Z,GDNEFSOLAR,RF80PDMB2SJV,AMER,1,202229,AMER,Record already up-to-date,2022-07-18T14:44:49.216Z,,Rejected,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,13dba31da5da0192161f12186368dd5ab1a9c202cd171d...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T07:42:00.291000+00:00,RDTHDXTVI5MP,Rejected,BNEF-987793,true,202226,,true,媒体滚动: 北京丰台站改扩建历程,SNA,false,RDSWIRTP3SHU,3,CHINESE_SIMP,3306,2022-06-21T07:41:57.192Z,GDNEFSOLAR,RDTHDXTVI5MP,false,false,202225,APAC,Duplicate work item,2022-06-28T08:32:00.518Z,,Rejected,
1,5630f29ae1a6458d3a9811d3b6855d041f4a4a9f45a761...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-25T02:45:21.139000+00:00,RE0IBJMB2SJQ,Rejected,BNEF-990913,true,202226,,true,Realty Plus: Delhi Airport Is India’s First Ai...,NS6,100,RDW06HMB2SK6,50760,ENGLISH,1811,2022-06-25T02:45:19.941Z,GDNEFSOLAR,RE0IBJMB2SJQ,APAC,12,202225,APAC,Not enough information for update,2022-06-30T09:36:14.067Z,,Rejected,
1,3cd952f355f6fccfd3be06f866350e90ef0c635fc4d450...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-23T12:19:57.710000+00:00,RDXJL8MB2SK7,Rejected,BNEF-989629,true,202225,,true,Energy Live News: CS Energy awarded three util...,NS3,100,WspiWKr0qb98xDIEaWyhMXQ==,11496,ENGLISH,1808,2022-06-23T12:19:56.841Z,GDNEFSOLAR,RDXJL8MB2SK7,AMER,8,202225,AMER,Record already up-to-date,2022-06-23T13:38:16.124Z,,Rejected,
1,5cd4a7e0ae554a383c9e8d41a6093cb030960164e21291...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T11:48:50.927000+00:00,RDTSTBMB2SKA,Rejected,BNEF-988063,true,202225,Yes,true,大楚网: 阳光也能卖钱？《十堰日报》头条聚焦郧西这一产业！,NS6,false,RDTSTBMB2SKA,11639,CHINESE_SIMP,1811,2022-06-21T11:48:47.733Z,GDNEFSOLAR,RDTSTBMB2SKA,false,false,202225,APAC,Record already up-to-date,2022-06-22T09:20:34.634Z,,Rejected,


In [15]:
#find duplicate of combinations (class+wireId) from rejected 
newRes[newRes.duplicated(['class', 'wireId'])]

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,headline,wireCode,region_score,topicClusterId,class,language,wireId,arrivalTime,niCodes,tags_suid,region,spam_score,week,jflo_region,jflo_reason,jflo_resolution_time,jflo_financing_id,jflo_resolution,jflo_asset_id
1,25ebcf8011ae76560ea406ba0414da8403ec35d3e7827f...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T07:09:21.657000+00:00,RF9AJKMB2SJR,Rejected,BNEF-1007992,true,202229,Yes,true,PV Magazine: US authorities approve 500 MW sol...,NS1,100,RF9AJKMB2SJR,18689,ENGLISH,1806,2022-07-19T07:09:20.576Z,GDNEFSOLAR,RF9AJKMB2SJR,AMER,3,202229,AMER,Record already up-to-date,2022-07-19T11:38:11.854Z,,Rejected,
1,b53aaf3c817ec419b5be0ad16c2580ba6c34d738058bf0...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T06:33:01.964000+00:00,RF7E70BSWSG6,Rejected,BNEF-1006946,true,202229,Yes,true,Wind’s share in France’s power generation arri...,AII,100,RF7E70BSWSG6,7,ENGLISH,2691,2022-07-18T06:33:00.952Z,GDNEFWIND,RF7E70BSWSG6,EMEA,99,202229,EMEA,Record already up-to-date,2022-07-18T16:27:47.566Z,,Rejected,
1,8309f1fa5085d27111eec67ff5af353fb387099d52d583...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T02:17:22.432000+00:00,RF8X0XMB2SJQ,Rejected,BNEF-1007789,true,202229,Yes,true,Energy Central: Arise Announces its Largest & ...,NS1,100,RF8X0XMB2SJQ,16514,ENGLISH,1806,2022-07-19T02:17:21.482Z,GDNEFWIND,RF8X0XMB2SJQ,EMEA,0,202229,EMEA,Record already up-to-date,2022-07-19T11:12:39.737Z,,Rejected,
1,404bb1841fb1a97614e6db59156b6d86aa7f63380e4bbd...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T01:06:40.269000+00:00,RF6Z30MB2SK3,Rejected,BNEF-1006747,true,202229,Yes,true,搜狐新闻: 股票大作手：回调是为了更好地上涨吗？,NS6,false,RF6Z30MB2SK3,12252,CHINESE_SIMP,1811,2022-07-18T01:06:36.895Z,GDNEFSOLAR,RF6Z30MB2SK3,false,false,202229,APAC,Record already up-to-date,2022-07-19T06:15:40.172Z,,Rejected,
1,5a716457c9f32a842567f083027383dc6e475ff96eb3af...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T09:35:19.881000+00:00,RF7MMVBSWSG0,Rejected,BNEF-1007137,true,202229,Yes,true,UniCredit inks off-take deal for 25 MW of Ital...,AII,100,RF7MMVBSWSG0,7,ENGLISH,2691,2022-07-18T09:35:19.052Z,GDNEFPPA,RF7MMVBSWSG0,EMEA,29,202229,EMEA,Not enough information for update,2022-07-18T11:32:48.980Z,,Rejected,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,b038246f94cc53ba8d1741be7aec335e95f201cc1af2d9...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T08:00:26.636000+00:00,RDTI8P0799MO,Rejected,BNEF-987811,true,202225,,true,PRESS RELEASE: hep on track for 2022: around 8...,DJ,false,TBkh1VCRSudYIz/6KO7Slgg==,301,ENGLISH,2546,2022-06-21T08:00:25.417Z,GDNEFSOLAR,RDTI8P0799MP,false,67,202225,EMEA,Duplicate work item,2022-06-21T10:07:10.619Z,,Rejected,
1,13dba31da5da0192161f12186368dd5ab1a9c202cd171d...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T07:42:00.291000+00:00,RDTHDXTVI5MP,Rejected,BNEF-987793,true,202226,,true,媒体滚动: 北京丰台站改扩建历程,SNA,false,RDSWIRTP3SHU,3,CHINESE_SIMP,3306,2022-06-21T07:41:57.192Z,GDNEFSOLAR,RDTHDXTVI5MP,false,false,202225,APAC,Duplicate work item,2022-06-28T08:32:00.518Z,,Rejected,
1,3cd952f355f6fccfd3be06f866350e90ef0c635fc4d450...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-23T12:19:57.710000+00:00,RDXJL8MB2SK7,Rejected,BNEF-989629,true,202225,,true,Energy Live News: CS Energy awarded three util...,NS3,100,WspiWKr0qb98xDIEaWyhMXQ==,11496,ENGLISH,1808,2022-06-23T12:19:56.841Z,GDNEFSOLAR,RDXJL8MB2SK7,AMER,8,202225,AMER,Record already up-to-date,2022-06-23T13:38:16.124Z,,Rejected,
1,5cd4a7e0ae554a383c9e8d41a6093cb030960164e21291...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T11:48:50.927000+00:00,RDTSTBMB2SKA,Rejected,BNEF-988063,true,202225,Yes,true,大楚网: 阳光也能卖钱？《十堰日报》头条聚焦郧西这一产业！,NS6,false,RDTSTBMB2SKA,11639,CHINESE_SIMP,1811,2022-06-21T11:48:47.733Z,GDNEFSOLAR,RDTSTBMB2SKA,false,false,202225,APAC,Record already up-to-date,2022-06-22T09:20:34.634Z,,Rejected,


In [16]:
newRes.drop_duplicates(subset=['class', 'wireId'])

Unnamed: 0,doc_sha,document,sourcedata,mime_type,asof,suid,resolution,jflo.issue,jflo.indexed,jflo.week,multiple_issue,jflo.created,headline,wireCode,region_score,topicClusterId,class,language,wireId,arrivalTime,niCodes,tags_suid,region,spam_score,week,jflo_region,jflo_reason,jflo_resolution_time,jflo_financing_id,jflo_resolution,jflo_asset_id
1,c5c767c02a31e508ce4ebfd7d0838ae0c53a9c93602391...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-19T11:41:55.121000+00:00,RF9N5UMB2SJL,Rejected,BNEF-1008234,true,202229,Yes,true,Energy Voice: UK government approves 8GW of of...,WE3,100,RF9N5UMB2SJL,224,ENGLISH,353,2022-07-19T11:41:54.358Z,GDNEFWIND,RF9N5UMB2SJL,EMEA,72,202229,EMEA,Record already up-to-date,2022-07-19T12:00:17.041Z,,Rejected,
1,12f3d565785b6433abc804e44c396eb309d5f8fc204269...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T03:30:02.009000+00:00,RF75PYMB2SJO,Rejected,BNEF-1006847,true,202229,Yes,true,同花顺财经: 明阳智能：公司的风电制造业对外销售的产品以风电整机及零部件为主,NS6,false,RF75PYMB2SJO,50712,CHINESE_SIMP,1811,2022-07-18T03:29:58.999Z,GDNEFWIND,RF75PYMB2SJO,false,false,202229,APAC,Irrelevant Content,2022-07-18T08:13:36.104Z,,Rejected,
1,ada666cdb765178e476c1d7c6cf302516c40d565c74f76...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:16:58.210000+00:00,RF82G9MB2SKA,Rejected,BNEF-1007360,true,202229,Yes,true,Jornal Madeira: Iberdola anuncia mais 1.500 mi...,NS5,false,RF7GSDMB2SJV,12184,PORTUGUESE,1810,2022-07-18T15:16:57.800Z,GDNEFWIND,RF82G9MB2SKA,false,false,202229,EMEA,Record already up-to-date,2022-07-18T15:49:31.676Z,,Rejected,
1,7b2fbcdd14a04c398653dcc72c59690afdf10658ae9a1f...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T15:47:35.548000+00:00,RF83VAMB2SJO,Rejected,BNEF-1007384,true,202229,Yes,true,Energy Central: Vestas Secures 266 MW Order in...,NS1,100,WS8i6gWCrvEdQP6Hs+8QLBw==,16514,ENGLISH,1806,2022-07-18T15:47:34.752Z,GDNEFWIND,RF83VAMB2SJO,AMER,15,202229,AMER,Record already up-to-date,2022-07-19T12:38:26.260Z,,Rejected,
1,115955f4577cdb8917d8605032d32be114ab9e15e4cc03...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-07-18T14:39:14.587000+00:00,RF80PDMB2SJV,Rejected,BNEF-1007339,true,202229,Yes,true,ConstructionDive: Bureau of Land Management OK...,NS1,100,RF3C9ONL9WXS,50800,ENGLISH,1806,2022-07-18T14:39:13.683Z,GDNEFSOLAR,RF80PDMB2SJV,AMER,1,202229,AMER,Record already up-to-date,2022-07-18T14:44:49.216Z,,Rejected,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,3870c105f9135b7e61e2ddbe7d97db60ae7c738c445270...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-23T22:28:06.676000+00:00,RDYBQTMB2SJR,Rejected,BNEF-990045,true,202225,,true,Star-Ledger: Murphy moving ahead with offshore...,SLE,100,WcuLtSMcidInt9RNI7yvayw==,361,ENGLISH,1475,2022-06-23T22:28:05.558Z,GDNEFWIND,RDYBQTMB2SJR,AMER,98,202225,AMER,Record already up-to-date,2022-06-24T17:26:33.272Z,,Rejected,
1,4ac8ad7fc976b52b7189f0a140a102656d13cd815dc024...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-21T14:17:49.152000+00:00,RDTZPNMB2SK8,Rejected,BNEF-988152,true,202225,,true,Barka Water And: ACWA Power-led consortium com...,CO8,100,W+oqmH1kGG8VJbr3qRa4FxQ==,1120,ENGLISH,1926,2022-06-21T14:17:48.033Z,GDNEFWIND,RDTZPNMB2SK8,EMEA,0,202225,EMEA,Record already up-to-date,2022-06-21T14:45:55.005Z,,Rejected,
1,dd92d32aadd263453e21241a73d0bc4f36e5014e2f612b...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-22T14:11:16.500000+00:00,RDVU2RMB2SJZ,Rejected,BNEF-988886,true,202225,,true,Azelis SA: Azelis expands its sustainable R&PA...,CO5,100,WXQToxd0LccKaC48XYN/vhw==,9393,ENGLISH,1814,2022-06-22T14:11:15.661Z,GDNEFBFL,RDVU2RMB2SJZ,EMEA,80,202225,EMEA,Record already up-to-date,2022-06-24T09:37:53.546Z,,Rejected,
1,8323e7af598b121dce99dbf7c6afb990e31a5cb44732b1...,https://bcos.prod.blpprofessional.com/v1/dt.ex...,[],text/html,2022-06-22T10:02:41.223000+00:00,RDVIKGMB2SJL,Rejected,BNEF-988712,true,202225,,true,Tech.eu: Meet Bio-Sep the green tech is turnin...,WE3,100,RDVIKGMB2SJL,365,ENGLISH,353,2022-06-22T10:02:40.148Z,GDNEFBFL,RDVIKGMB2SJL,EMEA,26,202225,EMEA,Not enough information for update,2022-06-24T10:05:25.342Z,,Rejected,


In [21]:
newRes.to_csv(r'C:\Users\jlee3524\Desktop\newRes.csv', index=False)