In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
abstracts = pd.read_excel("2000_abstract.xlsx", index_col=0)

In [3]:
abstracts

Unnamed: 0,t,ab
1,Analysis of Greenhouse Gas Reduction by Using ...,Agriculture contributes approximately 14% of g...
2,Does certified organic farming reduce greenhou...,The increasing prevalence of ecologically sust...
3,USE OF ARTIFICIAL NEURAL NETWORKS TO PREDICT G...,Agriculture has a greater impact on the enviro...
4,SIMULATION OF NITROUS OXIDE EMISSIONS FROM DAI...,Farming practices can have a large impact on t...
5,Modeling shows that alternative soil managemen...,Agricultural management has a significant impa...
...,...,...
1996,Commercial Eucalyptus Plantations with Taungya...,"The increasing demand for wood, fiber, and pul..."
1997,Assessing Seasonal Methane and Nitrous Oxide E...,Improved irrigation management is identified a...
1998,Interactive effect of biochar size and organic...,Biochar (BC) application to agricultural soil ...
1999,Which are the most favourable conditions for r...,No-tillage practices have a recognised benefic...


In [5]:
torch.cuda.is_available()

True

In [4]:
# %%capture
# !pip install transformers
# !pip install datasets
# !pip install torch

from transformers import BertTokenizer, BertModel
import torch

In [6]:
from huggingface_hub import hf_hub_url, cached_download

In [1]:
repo_name = "bert_ft_binary_chatgpt"
config_file_url = hf_hub_url("X/"+repo_name, filename="cls_layer.torch")
value = cached_download(config_file_url)
cls_layer = torch.load(value)

In [8]:
bert_model = BertModel.from_pretrained("X/"+repo_name).cuda()
bert_tokenizer = BertTokenizer.from_pretrained("X/"+repo_name, do_lower_case=False)
e = bert_model.eval()

In [9]:
 def preproccesing(t, ab, q, maxlen_t=0, maxlen_ab=483, maxlen_q=25):
        sentence1 = str(t)
        sentence2 = str(ab)
        sentence3 = str(q)
        
        tokens1 = bert_tokenizer.tokenize(sentence1) if len(sentence1)>0 else ["[UNK]"]
        tokens2 = bert_tokenizer.tokenize(sentence2) if len(sentence2)>0 else ["[UNK]"]
        tokens3 = bert_tokenizer.tokenize(sentence3) if len(sentence3)>0 else ["[UNK]"]

        if len(tokens1) <= maxlen_t:
            tokens1 = tokens1 + ['[PAD]' for _ in range(maxlen_t - len(tokens1))]
        else:
            tokens1 = tokens1[:maxlen_t]

        if len(tokens2) <= maxlen_ab:
            tokens2 = tokens2 + ['[PAD]' for _ in range(maxlen_ab - len(tokens2))]
        else:
            tokens2 = tokens2[:maxlen_ab]

        if len(tokens3) <= maxlen_q:
            tokens3 = tokens3 + ['[PAD]' for _ in range(maxlen_q - len(tokens3))]
        else:
            tokens3 = tokens3[:maxlen_q]
          
        tokens = ["[CLS]"]+tokens1+["[SEP]"]+tokens2+["[SEP]"]+tokens3+["[SEP]"]
        tokens_ids = bert_tokenizer.convert_tokens_to_ids(tokens)
        tokens_ids_tensor = torch.tensor(tokens_ids)
        attn_mask = (tokens_ids_tensor != 0).long() # [PAD] => 1

        return tokens_ids_tensor.cuda(), attn_mask.cuda()

In [10]:
def ChatGPTClassifier(t, ab, q):
    tokens_ids_tensor, attn_mask = preproccesing(t, ab, q)
    cont_reps = bert_model(tokens_ids_tensor.unsqueeze(0), attention_mask = attn_mask.unsqueeze(0))
    cls_rep = cont_reps.last_hidden_state[:, 0]
    logits = cls_layer(cls_rep)
    probs = torch.sigmoid(logits)
    return probs.detach().cpu().numpy()[0]

In [11]:
q = "Does the article discuss the impact of methane (CH4) emissions?"
q = q.replace("article", "abstract")

t = "Proximal sensing for soil carbon accounting"
ab = "Maintaining or increasing soil organic carbon (C) is vital for securing food production and for mitigating greenhouse gas (GHG) emissions, climate change, and land degradation. Some land management practices in cropping, grazing, horticultural, and mixed farming systems can be used to increase organic C in soil, but to assess their effectiveness, we need accurate and cost-efficient methods for measuring and monitoring the change. To determine the stock of organic C in soil, one requires measurements of soil organic C concentration, bulk density, and gravel content, but using conventional laboratory-based analytical methods is expensive. Our aim here is to review the current state of proximal sensing for the development of new soil C accounting methods for emissions reporting and in emissions reduction schemes. We evaluated sensing techniques in terms of their rapidity, cost, accuracy, safety, readiness, and their state of development. The most suitable method for measuring soil organic C concentrations appears to be visible-near-infrared (vis-NIR) spectroscopy and, for bulk density, active gamma-ray attenuation. Sensors for measuring gravel have not been developed, but an interim solution with rapid wet sieving and automated measurement appears useful. Field-deployable, multi-sensor systems are needed for cost-efficient soil C accounting. Proximal sensing can be used for soil organic C accounting, but the methods need to be standardized and procedural guidelines need to be developed to ensure proficient measurement and accurate reporting and verification. These are particularly important if the schemes use financial incentives for landholders to adopt management practices to sequester soil organic C. We list and discuss requirements for developing new soil C accounting methods based on proximal sensing, including requirements for recording, verification, and auditing."

ChatGPTClassifier(t, ab, q)

array([0.5450293 , 0.29016036], dtype=float32)

In [17]:
bsq = [
    "Does the article discuss agroecological practices?",
    "Does the article cover climate change mitigation?",
    "Does the article cover climate change adaptation?",
    "Does the article comprehensively cover climate change and environmental aspects?",
    "Does the article address greenhouse gas emissions?",
    "Does the article assess agroecological practices' impact on climate change?",
    "Does the article provide relevance to stakeholders and farmers in the agricultural sector?",
    "Does the article address limitations, challenges, and potential risks?",
    "Does the article address policy implications?",
    "Does the article have a specific geographic focus?",
    "Does the article discuss the role of agroecology in enhancing climate resilience in agricultural systems?",
    "Does the article assess the impact of agroecology on nitrogen dynamics?",
    "Does the article discuss the impact of methane (CH4) emissions?"
]

In [51]:
%%time
for i, qq in enumerate(bsq):
    print("ChatGPT question:", f"b{i+1}")
    new_A_train = abstracts.copy()
    y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
    new_A_train["chatgpt_label"] = y_pred
    new_A_train.to_excel(f"nllf_b{i+1}.xlsx")

ChatGPT question: b1
ChatGPT question: b2
ChatGPT question: b3
ChatGPT question: b4
ChatGPT question: b5
ChatGPT question: b6
ChatGPT question: b7
ChatGPT question: b8
ChatGPT question: b9
ChatGPT question: b10
ChatGPT question: b11
ChatGPT question: b12
ChatGPT question: b13
CPU times: user 5min 46s, sys: 140 ms, total: 5min 46s
Wall time: 5min 46s


In [18]:
variant_bsq = {
    "b6": [
        "Does the article evaluate how agroecological practices affect climate change?",
        "Does the article analyze the impact of agroecological practices on climate change?",
        "Does the article investigate whether agroecological practices influence climate change?"
    ],
    "b2": [
        "Does the article discuss measures to mitigate climate change?",
        "Does the article address climate change mitigation efforts?",
        "Does the article touch upon strategies for reducing climate change impacts?",
    ],
    "b12": [
        "Does the article evaluate agroecology's impact on nitrogen dynamics?",
        "Does the article analyze how agroecology affects nitrogen dynamics?",
        "Does the article study the influence of agroecology on nitrogen dynamics?",
    ]
}

In [53]:
%%time
for k, v in variant_bsq.items():
    for i, qq in enumerate(v):
        print("ChatGPT question:", f"{k}_v{i+1}")
        new_A_train = abstracts.copy()
        y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
        new_A_train["chatgpt_label"] = y_pred
        new_A_train.to_excel(f"nllf_{k}_v{i+1}.xlsx")

ChatGPT question: b6_v1
ChatGPT question: b6_v2
ChatGPT question: b6_v3
ChatGPT question: b2_v1
ChatGPT question: b2_v2
ChatGPT question: b2_v3
ChatGPT question: b12_v1
ChatGPT question: b12_v2
ChatGPT question: b12_v3
CPU times: user 4min 1s, sys: 83.8 ms, total: 4min 1s
Wall time: 4min 1s


In [13]:
exp = [
    "Does the article assess the impact of agroecological practices or systems on climate change mitigation or adaptation?",
    "Does the article refer to the application of one or more agroecological practices?",
    "Does the article refer to the application of one or more Ecological agriculture practices?",
    "Does the article refer to the application of one or more Regenerative agriculture practices?",
    "Does the article refer to the application of one or more organic agriculture practices?",
    "Does the article refer to the application of one or more Mixed farming system or practices?",
    "Does the article refer to the application of one or more Diversified farming practices?",
    "Does the article refer to the application of one or more Climate-Smart Agriculture practices?",
    "Does the article refer to the application of one or more conservation agriculture practices?",
    "Does the article refer to the application of one or more Sustainable Rice Intensification practices?",
    "Does the article refer to the application of a type of agroforestry system or practice?",
    "Does the article refer to the application of a type of intercropping practice?",
    "Does the article refer to the application of a type of organic fertilisation practice?",
    "Does the article refer to the application of a type of water collection practice?",
    "Does the article refer to the application of a type of ecological or mechanical weed management practice?",
    "Does the article refer to the application of a type of Bio-control practice?",
    "Does the article refer to the substitution of different varieties or cultivar?",
    "Does the article refer to the application of Cropping system diversification?",
    "Does the article refer to the application of cover crops or mulching practices?",
    "Does the article refer to the inclusion of legume in the crop roattion?",
    "Does the article refer to the application of a type of Residues management practice?",
    "Does the article refer to the integration of semi-natural landscape elements?",
    "Does the article refer to planting or managing natural or semi-natural landscape elements?",
    "Does the article refer to soil preservation and/or restoration practices?",
    "Are these practices at the field or farm scale?",
    "Does the article refer to the impact (or effect) of these practices on climate change?",
    "Does the article refer to the impact (or effect) of these practices on climate change mitigation?",
    "Does the article refer to the impact (or effect) of these practices on greenshouse gasses (GHG) emissions?",
    "Does the article refer to the impact (or effect) of these practices on Carbon/CO2/carbon dioxide emissions?",
    "Does the article refer to the impact (or effect) of these practices on Nitrogen/N2O/nitrogen oxide emissions?",
    "Does the article refer to the impact (or effect) of these practices on Carbon/CH4/methane emissions?",
    "Does the article refer to the impact (or effect) of these practices on CO2 or N2O or CH4 soil fluxes?",
    "Does the article refer to the impact (or effect) of these practices on Carbon storage in the soil?",
    "Does the article refer to the impact (or effect) of these practices on the carbon sequestration in the soil?",
    "Does the article refer to the Carbon footprint of these practices?",
    "Does the article refer to the impact (or effect) of these practices on climate change adaptation?",
]

In [13]:
exp[22-1]

'Does the article refer to the integration of semi-natural landscape elements?'

In [55]:
%%time
for i, qq in enumerate(exp):
    print("ChatGPT question:", f"exp{i+1}")
    new_A_train = abstracts.copy()
    y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
    new_A_train["chatgpt_label"] = y_pred
    new_A_train.to_excel(f"nllf_exp{i+1}.xlsx")

ChatGPT question: exp1
ChatGPT question: exp2
ChatGPT question: exp3
ChatGPT question: exp4
ChatGPT question: exp5
ChatGPT question: exp6
ChatGPT question: exp7
ChatGPT question: exp8
ChatGPT question: exp9
ChatGPT question: exp10
ChatGPT question: exp11
ChatGPT question: exp12
ChatGPT question: exp13
ChatGPT question: exp14
ChatGPT question: exp15
ChatGPT question: exp16
ChatGPT question: exp17
ChatGPT question: exp18
ChatGPT question: exp19
ChatGPT question: exp20
ChatGPT question: exp21
ChatGPT question: exp22
ChatGPT question: exp23
ChatGPT question: exp24
ChatGPT question: exp25
ChatGPT question: exp26
ChatGPT question: exp27
ChatGPT question: exp28
ChatGPT question: exp29
ChatGPT question: exp30
ChatGPT question: exp31
ChatGPT question: exp32
ChatGPT question: exp33
ChatGPT question: exp34
ChatGPT question: exp35
ChatGPT question: exp36
CPU times: user 16min 6s, sys: 400 ms, total: 16min 7s
Wall time: 16min 8s


In [20]:
variant_exp = {
    "exp1": [
        "Does the article evaluate agroecological practices' impact on climate change?",
        "Does the article analyze how agroecological systems affect climate change?",
        "Does the article explore the connection between agroecological practices and climate change?",

    ],
    "exp17": [
        "Does the article mention the replacement of various varieties or cultivars?",
        "Does the article discuss the substitution of different varieties or cultivars?",
        "Does the article cover the use of alternate varieties or cultivars?"
    ],
    "exp5": [
        "Does the article mention any organic agriculture practices being applied?",
        "Does the article discuss the utilization of organic farming techniques?",
        "Does the article refer to the use of one or multiple organic farming methods?",
    ]
}

In [57]:
%%time
for k, v in variant_exp.items():
    for i, qq in enumerate(v):
        print("ChatGPT question:", f"{k}_v{i+1}")
        new_A_train = abstracts.copy()
        y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
        new_A_train["chatgpt_label"] = y_pred
        new_A_train.to_excel(f"nllf_{k}_v{i+1}.xlsx")

ChatGPT question: exp1_v1
ChatGPT question: exp1_v2
ChatGPT question: exp1_v3
ChatGPT question: exp17_v1
ChatGPT question: exp17_v2
ChatGPT question: exp17_v3
ChatGPT question: exp5_v1
ChatGPT question: exp5_v2
ChatGPT question: exp5_v3
CPU times: user 4min 1s, sys: 55.9 ms, total: 4min 2s
Wall time: 4min 2s


In [17]:
raw = ['Does the article include data?',
 'Does the article provide insights?',
 'Does the article assess CO2 fluxes?',
 'Are the findings supported by data?',
 'Does the article assess NH3 fluxes?',
 'Does the article assess N2O fluxes?',
 'Is the evidence provided verifiable?',
 'Did the study assess implementation?',
 'Does the article measure field plots?',
 'Does the article focus on NH3 fluxes?',
 'Does the article focus on GHG fluxes?',
 'Does the article focus on no-tillage?',
 'Does the article discuss soil health?',
 'Does the article assess GHG emissions?',
 'Does the article provide new insights?',
 'Does the article study rye cover crops?',
 'Does the article mention N2O emissions?',
 'Does the article discuss N2O emissions?',
 'Does the article mention crop residues?',
 'Is there evidence of NH3 and GHG fluxes?',
 'Does the article provide recommendations?',
 'Does the article provide recommendations?',
 'Does the article assess generalizability?',
 'Did the study analyze rubber plantations?',
 'Does the article review previous studies?',
 'Does the article conduct a meta-analysis?',
 'Does the article examine nitrogen dynamics?',
 'Does the article discuss advisory services?',
 'Does the article present empirical evidence?',
 'Does the article focus on the United States?',
 'Does the article discuss biofuel production?',
 'Does the article provide new empirical data?',
 'Does the article focus on rubber plantations?',
 'Does the article provide a balanced analysis?',
 'Does the article include statistical analyses?',
 'Does the article suggest a lack of correlation?',
 'Does the article provide evidence of the impact?',
 'Does the article examine agricultural production?',
 'Do the statistical analyses support the findings?',
 'Does the article discuss nitrogen surplus control?',
 'Does the article discuss knowledge dissemination? ',
 'Does the article discuss biodiversity conservation?',
 'Does the article report a decrease in CO2 emissions?',
 'Is the article based on outdated or unreliable data?',
 'Does the article specifically focus on corn ethanol? ',
 'Does the article report an increase in N2O emissions?',
 'Does the article assess net global warming potential?',
 'Does the article lack new empirical data or insights?',
 'Does the article provide evidence for its conclusions?',
 'Does the article discuss certified organic production?',
 'Does the article discuss social aspects of agroecology?',
 'Does the article examine the impact of these practices?',
 'Did the study focus on rubber-leguminous shrub systems?',
 'Is the study focused on rubber-leguminous shrub systems?',
 'Does the study focus on conventional cultivation methods?',
 'Does the study utilize life cycle assessment methodology?',
 'Does the article discuss cultural aspects of agroecology?',
 'Does the article explore soil organic sequestration rate?',
 'Does the article compare industrial agriculture practices?',
 'Does the article evaluate rubber-leguminous shrub systems?',
 'Does the article only assess the potential for improvement?',
 'Does the article specifically analyze nitrous oxide emissions?',
 'Does the article only consider cradle-to-farm-gate activities?',
 'Does the article discuss climate change impacts on agriculture?',
 'Does the article analyze yield-scaled global warming potential?',
 'Does the article lack empirical evidence or scientific research?',
 'Does the article focus on small-scale or family farming systems?',
 'Does the article discuss their overall impact on climate change?',
 "Does the article discuss agroecology's benefits for biodiversity?",
 'Does the article specifically examine the impact on GHG profiles? ',
 'Is the discussed context not applicable to the broader assessment?',
 'Does the article focus primarily on social aspects of agroecology?',
 'Does the article specifically analyze the impact of these practices?',
 'Does the article focus primarily on economic aspects of agroecology?',
 'Does the article measure NH3, N2O, CH4, and CO2 fluxes in field plots?',
 "Does the article discuss agroecology's benefits for ecosystem services?",
 'Does the article analyze the role of agroecology in carbon sequestration?',
 'Does the article compare organic and conventional arable farming practices?',
 'Does the article primarily address peat emissions and their quantification?',
 'Does the article provide empirical evidence or data to support its findings?',
 'Does the article mention optimized timing of grass-clover ley phase removal?',
]

In [16]:
raw[21-1]

'Does the article provide recommendations?'

In [59]:
%%time
for i, qq in enumerate(raw):
    print("ChatGPT question:", f"raw{i+1}")
    new_A_train = abstracts.copy()
    y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
    new_A_train["chatgpt_label"] = y_pred
    new_A_train.to_excel(f"nllf_raw{i+1}.xlsx")

ChatGPT question: raw1
ChatGPT question: raw2
ChatGPT question: raw3
ChatGPT question: raw4
ChatGPT question: raw5
ChatGPT question: raw6
ChatGPT question: raw7
ChatGPT question: raw8
ChatGPT question: raw9
ChatGPT question: raw10
ChatGPT question: raw11
ChatGPT question: raw12
ChatGPT question: raw13
ChatGPT question: raw14
ChatGPT question: raw15
ChatGPT question: raw16
ChatGPT question: raw17
ChatGPT question: raw18
ChatGPT question: raw19
ChatGPT question: raw20
ChatGPT question: raw21
ChatGPT question: raw22
ChatGPT question: raw23
ChatGPT question: raw24
ChatGPT question: raw25
ChatGPT question: raw26
ChatGPT question: raw27
ChatGPT question: raw28
ChatGPT question: raw29
ChatGPT question: raw30
ChatGPT question: raw31
ChatGPT question: raw32
ChatGPT question: raw33
ChatGPT question: raw34
ChatGPT question: raw35
ChatGPT question: raw36
ChatGPT question: raw37
ChatGPT question: raw38
ChatGPT question: raw39
ChatGPT question: raw40
ChatGPT question: raw41
ChatGPT question: raw42
C

In [22]:
new_variants = {
    "raw49": [
        "Does the article back its conclusions with evidence?",
        "Does the article support its conclusions with evidence?",
        "Does the article offer proof for its conclusions?",
    ],
    "exp22": [
        "Does the article mention the inclusion of semi-natural landscape elements?",
        "Does the article discuss the integration of semi-natural landscape features?",
        "Does the article cover the incorporation of semi-natural landscape elements?"
        
    ],
    "raw21": [
        "Does the article offer any suggestions?",
        "Does the article give advice?",
        "Does the article include recommended actions?"
    ]
}

In [17]:
%%time
for k, v in new_variants.items():
    for i, qq in enumerate(v):
        print("ChatGPT question:", f"{k}_v{i+1}")
        new_A_train = abstracts.copy()
        y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
        new_A_train["chatgpt_label"] = y_pred
        new_A_train.to_excel(f"nllf_{k}_v{i+1}.xlsx")

ChatGPT question: raw49_v1
ChatGPT question: raw49_v2
ChatGPT question: raw49_v3
ChatGPT question: exp22_v1
ChatGPT question: exp22_v2
ChatGPT question: exp22_v3
ChatGPT question: raw21_v1
ChatGPT question: raw21_v2
ChatGPT question: raw21_v3
CPU times: user 3min 52s, sys: 95.4 ms, total: 3min 52s
Wall time: 3min 52s


In [65]:
more_new_variants = {
    "b1": [
        "Does the article cover agroecological practices?",
        "Is agroecological practices discussed in the article?",
        "Does the article mention agroecological practices?"
    ],
    "b12_v1": [
        "Does the article assess nitrogen dynamics influenced by agroecology?",
        "Does the article analyze how agroecology affects nitrogen dynamics?",
        "Does the article examine the impact of agroecology on nitrogen dynamics?"

    ],
    "b13": [
        "Does the article address methane (CH4) emissions' impact?",
        "Does the article cover the effects of methane (CH4) emissions?",
        "Does the article examine the implications of methane (CH4) emissions?"
    ],
    "b4": [
        "Does the article thoroughly address climate change and environmental aspects?",
        "Does the article provide a comprehensive coverage of climate change and environmental aspects?",
        "Does the article include a comprehensive discussion on climate change and environmental aspects?"
    ],
    "b6_v1": [
        "Does the article explore the impact of agroecological practices on climate change?",
        "Does the article examine if agroecological practices affect climate change?",
        "Does the article investigate the relationship between agroecological practices and climate change?"
    ],
    "b8": [
        "Does the article discuss limitations, challenges, and potential risks?",
        "Does the article cover limitations, challenges, and potential risks?",
        "Does the article mention limitations, challenges, and potential risks?"
    ],
    "b9": [
        "Does the article consider policy implications?",
        "Does the article touch upon policy implications?",
        "Does the article discuss policy implications?"
    ],
    "exp10": [
        "Does the article mention the implementation of Sustainable Rice Intensification practices?",
        "Does the article discuss the utilization of any Sustainable Rice Intensification techniques?",
        "Does the article cover the use of one or more Sustainable Rice Intensification methods?"
    ],
    "exp12": [
        "Does the article mention the use of intercropping practices?",
        "Is the article about applying a specific intercropping technique?",
        "Does the article discuss the implementation of intercropping in practice?"
    ],
    "exp16": [
        "Does the article discuss the implementation of Bio-control practices?",
        "Does the article pertain to the use of Bio-control methods?",
        "Does the article address the application of Bio-control techniques?"
    ],
    "exp1_v3": [    
        "Does the article discuss the link between agroecological practices and climate change?",
        "Does the article examine how agroecological practices impact climate change?",
        "Does the article address the relationship between agroecological practices and climate change?"
    ],
    "exp1_v1": [
        "Does the article assess the impact of agroecological practices on climate change?",
        "Does the article analyze how agroecological practices affect climate change?",
        "Does the article study the influence of agroecological practices on climate change?"
    ],
    "exp21": [
        "Does the article discuss a form of Residues management practice?",
        "Is the article about implementing Residues management techniques?",
        "Does the article mention the application of Residues management methods?"
    ],
    "exp22_v2": [
        "Does the article cover semi-natural landscape features integration?",
        "Does the article address the integration of semi-natural landscape features?",
        "Does the article mention the integration of semi-natural landscape features?"
    ],
    "exp27": [
        "Does the article discuss the impact of these practices on climate change mitigation?",
        "Does the article address how these practices affect climate change mitigation?",
        "Does the article mention the effect of these practices on climate change mitigation?"
    ],
    "exp33": [
        "Does the article discuss the impact of these practices on soil carbon storage?",
        "Does the article mention the effect of these practices on soil carbon storage?",
        "Does the article address how these practices affect soil carbon storage?"
    ],
    "exp4": [
        "Does the article mention the use of Regenerative agriculture practices?",
        "Does the article discuss the application of Regenerative agriculture methods?",
        "Does the article refer to implementing Regenerative agriculture techniques?"
    ],
    "raw30": [
        "Does the article center on the United States?",
        "Is the article primarily about the United States?",
        "Does the article emphasize the United States?"
    ],
    "raw33": [
        "Does the article center on rubber plantations?",
        "Is the article primarily about rubber plantations?",
        "Does the article give prominence to rubber plantations?"
    ],
    "raw45": [
        "Does the article specifically cover corn ethanol as its focus?",
        "Is corn ethanol the main subject of the article?",
        "Does the article concentrate on corn ethanol exclusively?"
    ],
    "raw47": [
        "Does the article evaluate net global warming potential?",
        "Does the article analyze net global warming potential?",
        "Does the article measure net global warming potential?"
    ],
    "raw48": [
        "Does the article lack fresh empirical data or insights?",
        "Does the article fail to provide new empirical data or insights?",
        "Does the article lack any new empirical data or fresh insights?"
    ],
    "raw63": [
        "Does the article solely focus on cradle-to-farm-gate activities?",
        "Does the article restrict its scope to cradle-to-farm-gate activities only?",
        "Does the article exclusively address cradle-to-farm-gate activities?"
    ],
    "raw66": [
        "Does the article lack empirical evidence or scientific research?",
        "Does the article fail to present empirical evidence or scientific research?",
        "Does the article provide no empirical evidence or scientific research?"
    ],
    "raw70": [
        "Does the article focus on analyzing the GHG profile impact?",
        "Does the article specifically address GHG profiles in its examination?",
        "Does the article examine the impact on GHG profiles in particular?"
    ],
    "raw74": [
        "Does the article primarily emphasize economic aspects of agroecology?",
        "Is the main focus of the article on the economic aspects of agroecology?",
        "Does the article center around the economic aspects of agroecology?"
    ],
    "raw76": [
        "Does the article cover agroecology's positive impact on ecosystem services?",
        "Does the article explore how agroecology benefits ecosystem services?",
        "Does the article examine the advantages of agroecology for ecosystem services?"
    ]
}

In [66]:
%%time
for k, v in more_new_variants.items():
    for i, qq in enumerate(v):
        print("ChatGPT question:", f"{k}_v{i+1}")
        new_A_train = abstracts.copy()
        y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
        new_A_train["chatgpt_label"] = y_pred
        new_A_train.to_excel(f"nllf_{k}_v{i+1}.xlsx")

ChatGPT question: b1_v1
ChatGPT question: b1_v2
ChatGPT question: b1_v3
ChatGPT question: b12_v1_v1
ChatGPT question: b12_v1_v2
ChatGPT question: b12_v1_v3
ChatGPT question: b13_v1
ChatGPT question: b13_v2
ChatGPT question: b13_v3
ChatGPT question: b4_v1
ChatGPT question: b4_v2
ChatGPT question: b4_v3
ChatGPT question: b6_v1_v1
ChatGPT question: b6_v1_v2
ChatGPT question: b6_v1_v3
ChatGPT question: b8_v1
ChatGPT question: b8_v2
ChatGPT question: b8_v3
ChatGPT question: b9_v1
ChatGPT question: b9_v2
ChatGPT question: b9_v3
ChatGPT question: exp10_v1
ChatGPT question: exp10_v2
ChatGPT question: exp10_v3
ChatGPT question: exp12_v1
ChatGPT question: exp12_v2
ChatGPT question: exp12_v3
ChatGPT question: exp16_v1
ChatGPT question: exp16_v2
ChatGPT question: exp16_v3
ChatGPT question: exp1_v3_v1
ChatGPT question: exp1_v3_v2
ChatGPT question: exp1_v3_v3
ChatGPT question: exp1_v1_v1
ChatGPT question: exp1_v1_v2
ChatGPT question: exp1_v1_v3
ChatGPT question: exp21_v1
ChatGPT question: exp21_v2
C

In [34]:
bsq[9-1], variant_bsq["b8"][3-1], exp[4-1], variant_exp["exp22"][2-1], new_variants["exp22"][2-1], raw[76-1]

'Does the article address policy implications?'

In [24]:
other_new_variants = {
    "exp19": [
    "Does the article discuss the use of cover crops or mulching?",
    "Does the article mention the application of cover crops or mulching?",
    "Does the article refer to the use of cover crops or mulching?"
    ],
    "exp14": [
    "Does the article mention the application of a water collection practice?",
    "Does the article discuss implementing a type of water collection practice?",
    "Does the article pertain to the use of a water collection method?"
    ],
    "raw26": [
    "Does the article include a meta-analysis?",
    "Is a meta-analysis conducted in the article?",
    "Has the article performed a meta-analysis?"
    ],
    "raw69": [
    "Does the article cover agroecology's positive impact on biodiversity?",
    "Does the article address the benefits of agroecology for biodiversity?",
    "Does the article examine how agroecology benefits biodiversity?"
    ],
    "raw73": [
    "Does the article focus on analyzing the impact of these practices?",
    "Does the article specifically assess the effects of these practices?",
    "Does the article examine the specific impact of these practices?"
    ],   
    "raw31": [
    "Does the article cover biofuel production?",
    "Does the article address the topic of biofuel production?",
    "Does the article mention anything about biofuel production?"
    ],
    "exp2": [
    "Does the article mention the use of agroecological practices?",
    "Is the article discussing the application of agroecological methods?",
    "Do the contents of the article pertain to agroecological practices?"
    ]
}

In [25]:
%%time
for k, v in other_new_variants.items():
    for i, qq in enumerate(v):
        print("ChatGPT question:", f"{k}_v{i+1}")
        new_A_train = abstracts.copy()
        y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
        new_A_train["chatgpt_label"] = y_pred
        new_A_train.to_excel(f"nllf_{k}_v{i+1}.xlsx")

ChatGPT question: exp19_v1
ChatGPT question: exp19_v2
ChatGPT question: exp19_v3
ChatGPT question: exp14_v1
ChatGPT question: exp14_v2
ChatGPT question: exp14_v3
ChatGPT question: raw26_v1
ChatGPT question: raw26_v2
ChatGPT question: raw26_v3
ChatGPT question: raw69_v1
ChatGPT question: raw69_v2
ChatGPT question: raw69_v3
ChatGPT question: raw73_v1
ChatGPT question: raw73_v2
ChatGPT question: raw73_v3
ChatGPT question: raw31_v1
ChatGPT question: raw31_v2
ChatGPT question: raw31_v3
ChatGPT question: exp2_v1
ChatGPT question: exp2_v2
ChatGPT question: exp2_v3
CPU times: user 9min 4s, sys: 232 ms, total: 9min 4s
Wall time: 9min 4s


In [26]:
lf = {
    "Does the article mention any terms starting with 'agro'?",
    "Is the term 'conventional' mentioned in the article?",
    "Does the article address the topic of GHG emissions?",
    "Does the article delve into the discussion of practices?",
    "Does the article touch upon the concept of climate-smart?",
    "Is the term 'conventional' mentioned in the article?",
    "Does the article propose the idea of lower?",
    "Does the article make reference to the concept of cover?",
    "Does the article mention any terms starting with 'bio'?",
    "Is the term 'soil' mentioned in the article?",
    "Does the article discuss any systems?",
    "Does the article mention the term 'rice'?",
    "Is the concept of storage discussed in the article?",
    "Does the article discuss emissions?",
    "Is the concept of intercropping mentioned in the article?",
    "Does the article mention any terms starting with 'ecolog'?",
    "Does the article specifically mention CH4?",
    "Is the term 'crop' mentioned in the article?",
    "Is the concept of agroforestry mentioned in the article?",
    "Is the term 'water' mentioned in the article?",
    "Does the article discuss strategies?",
    "Does the article mention any terms starting with 'nitr'?",
    "Does the article discuss reducing something?",
    "Is the term 'social' mentioned in the article?",
    "Does the article discuss the topic of climate?",
    "Does the article mention any terms starting with 'convent'?"
}

In [27]:
%%time
for i, qq in enumerate(lf):
        print("ChatGPT question:", f"lf{i+1}")
        new_A_train = abstracts.copy()
        y_pred = abstracts.apply(lambda x: ChatGPTClassifier(x["t"], x["ab"], qq.replace("article", "abstract")), axis=1)
        new_A_train["chatgpt_label"] = y_pred
        new_A_train.to_excel(f"nllf_lf{i+1}.xlsx")

ChatGPT question: lf1
ChatGPT question: lf2
ChatGPT question: lf3
ChatGPT question: lf4
ChatGPT question: lf5
ChatGPT question: lf6
ChatGPT question: lf7
ChatGPT question: lf8
ChatGPT question: lf9
ChatGPT question: lf10
ChatGPT question: lf11
ChatGPT question: lf12
ChatGPT question: lf13
ChatGPT question: lf14
ChatGPT question: lf15
ChatGPT question: lf16
ChatGPT question: lf17
ChatGPT question: lf18
ChatGPT question: lf19
ChatGPT question: lf20
ChatGPT question: lf21
ChatGPT question: lf22
ChatGPT question: lf23
ChatGPT question: lf24
ChatGPT question: lf25
CPU times: user 10min 48s, sys: 328 ms, total: 10min 48s
Wall time: 10min 49s
