<a href="https://colab.research.google.com/github/aaalexlit/omdena_climate_change_challenge_notebooks/blob/main/Applying_trained_GWStance_model_streamlit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
%%capture
## we need this to be able to download big-ish files from GDrive
!pip3 install --upgrade gdown
!pip install nltk transformers streamlit
# Download the model
!gdown https://drive.google.com/uc?id=12rVg_bpuDfZbdWRtEN2Jf6SNyMEnax76
# Clone GWStance repo 
!git clone https://github.com/yiweiluo/GWStance.git
# Untar the model
!tar -xvzf final_model.tar.gz
# Install virtualenv and all the requirements
!pip install virtualenv
!virtualenv gwstance
!source /content/gwstance/bin/activate; pip install transformers scipy pandas matplotlib scikit-learn tqdm tensorboard boto3 torch torchvision
# copy things for transformers
!cp -r /content/GWStance/3_stance_detection/2_Stance_model/for_transformers/* /content/gwstance/lib/python3.9/site-packages/transformers

In [5]:
%%writefile GWStance_prediction.py
import streamlit as st
from transformers import AutoTokenizer, pipeline, RobertaForSequenceClassification
import nltk
from nltk.tokenize import sent_tokenize
import torch
import pandas as pd
import subprocess

label_mapping = {0: 'disagree', 1: 'neutral', 2: 'agree'}

def is_about_climate(texts, model, tokenizer):
    if torch.cuda.is_available():
      device = 0
      batch_size = 128
    else:
      device = -1
      batch_size = 1
    pipe = pipeline("text-classification", model=model,
                    tokenizer=tokenizer, device=device,
                    truncation=True, padding=True)
    labels = []
    probs = []
    for out in pipe(texts, batch_size=batch_size):
        labels.append(out['label'])
        probs.append(out['score'])
    torch.cuda.empty_cache()
    return labels, probs


def filter_climate_related(sentences, model, tokenizer):
    labels, _ = is_about_climate(sentences, model, tokenizer)
    return [doc for label, doc in zip(labels, sentences) if label == 'Yes']

def predict_climate_relatedness(sentences, model, tokenizer):
  labels, probs = is_about_climate(sentences, model, tokenizer)
  df_cli = pd.DataFrame(zip(sentences, labels, probs))
  return df_cli
    
def predict_gw_stance(input_sentences, model, tokenizer):
  sentences = filter_climate_related(input_sentences, model, tokenizer)

  if not sentences:
    print("No climate related sentences found in the text")
    return None
  df = pd.DataFrame(sentences)
  df["lab"]= "neutral"
  df["weight"]= 1.0
  df.to_csv('test.tsv', sep='\t', index=False, header=False)

  subprocess.run("python /content/GWStance/3_stance_detection/2_Stance_model/predict.py \
  /content/final_model/config.json \
  /content/final_model/no-dev \
  --data-dir /content/ \
  --transformers-dir /content/gwstance/lib/python3.9/site-packages/transformers".split())

  input_df = pd.read_csv("/content/test.tsv", sep='\t', header=None, names=["text", "fake1", "fake2"])
  preds_df = pd.read_csv("/content/final_model/no-dev/predictions_test.tsv", sep='\t')

  res_df = input_df.join(preds_df)[["text", "predicted"]]

  res_df['predicted'] = res_df['predicted'].apply(lambda x: label_mapping[x])

  return res_df

@st.cache_resource
def download_models():
    nltk.download('punkt')

    # Load the pre-trained model
    model = RobertaForSequenceClassification.from_pretrained('kruthof/climateattention-10k-upscaled',num_labels=2)
    tokenizer = AutoTokenizer.from_pretrained("climatebert/distilroberta-base-climate-f")
    return model, tokenizer

# Create the Streamlit app
def main():
    st.set_page_config(page_title="Applying trained Global Warming stance model", 
                       page_icon=":earth_americas:",
                       layout='wide')
    model, tokenizer = download_models()

    # Add a sidebar with links
    st.sidebar.title("Omdena, Local Chapter, 🇩🇪 Cologne")
    project_link = '[Project Description](https://omdena.com/chapter-challenges/detecting-bias-in-climate-reporting-in-english-and-german-language-news-media/)'
    st.sidebar.markdown(project_link, unsafe_allow_html=True)
    github_link = '[Github Repo](https://github.com/OmdenaAI/cologne-germany-reporting-bias/)'
    st.sidebar.markdown(github_link, unsafe_allow_html=True)

    st.header("Applying trained Global Warming stance model to the sentences extracted from climate news articles")
    
    tab_bias_detection, tab_how_to, tab_faq = st.tabs(["Global Warming Stance Detection", "How-To", "FAQ"])

    with tab_bias_detection:
      
      st.write("""Enter a Text below and click the Classify Button 
      to extract change related claim sentences from text and classify them
      as agreeing with Global warming, disagreeing with Global Warming or neutral""")

      text_input = st.text_area("Enter Text")
      input_sentences = sent_tokenize(text_input)

      # Classify text and show result
      if st.button("Detect Global Warming stance in climate related sentences"):
        with st.spinner(text='Performing stance detection'):
          res = predict_gw_stance(input_sentences, model, tokenizer)
          if res is not None:
            st.dataframe(res, use_container_width=True)
          else:
            st.warning("None of the extracted sentences are climate related.")

      if st.button("Classify sentences from text"):
        with st.spinner(text='Classifying sentences as climate-related or not'):
          res = predict_climate_relatedness(input_sentences, model, tokenizer)
          st.dataframe(res, use_container_width=True)
    
    with tab_how_to:
      st.write("tbd")
    
    with tab_faq:
      st.write("tbd")


if __name__ == "__main__":
    main()

Overwriting GWStance_prediction.py


In [7]:
# Run streamlit 
!streamlit run GWStance_prediction.py & npx localtunnel --port 8501

[#######...........] / extract:localtunnel: verb lock using /root/.npm/_locks/s[0m[K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[K[?25hnpx: installed 22 in 3.587s
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.196.118.197:8501[0m
[0m
your url is: https://odd-pumas-hang-35-196-118-197.loca.lt
2023-03-17 21:42:52.291984: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-17 21:42:53.724661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot 