<a href="https://colab.research.google.com/github/aaalexlit/omdena_climate_change_challenge_notebooks/blob/main/Streamlit_test_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

%%capture
!pip install streamlit
!pip install pandas
!pip install matplotlib
!pip install transformers torch
!pip3 install newspaper3k
!pip install nltk 

In [21]:
# %%writefile CRBC_preMVP.py
import streamlit as st
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from newspaper import Article
import nltk
from nltk.tokenize import sent_tokenize
import os

# Define a function for classifying text
def classify_text(text, model, tokenizer):
    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1).detach().numpy()[0]
    return probabilities[1]

def extract_article_text(url):
    article = Article(url)
    article.download()
    article.parse()
    article.nlp()
    return [article.text, article.title, article.authors, article.publish_date, article.keywords, article.summary] 

def extract_sentences(article_text):
    tokenized_text = sent_tokenize(article_text)
    return tokenized_text

def print_result(input_text, prob):
  if prob > 0.5:
    st.success('This text contains an climate change relaited claim:')
    st.write(input_text)
    #st.write('Probability:', prob)
    bias_score = "tbd"
    st.metric(label="**Bias Score**", value=bias_score)
    st.write() 
  else:
    st.write("This text does not contain an climate change related claim.")

@st.cache_resource
def download_models():
    nltk.download('punkt')

    # Load the pre-trained model
    model_name = "climatebert/environmental-claims"
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

# Create the Streamlit app
def main():
    st.set_page_config(page_title="Applying trained Global Warming stance model", page_icon=":earth_americas:")
    model, tokenizer = download_models()

    # Add a sidebar with links
    st.sidebar.title("Omdena, Local Chapter, 🇩🇪 Cologne")
    project_link = '[Project Description](https://omdena.com/chapter-challenges/detecting-bias-in-climate-reporting-in-english-and-german-language-news-media/)'
    st.sidebar.markdown(project_link, unsafe_allow_html=True)
    github_link = '[Github Repo](https://github.com/OmdenaAI/cologne-germany-reporting-bias/)'
    st.sidebar.markdown(github_link, unsafe_allow_html=True)

    st.header("Applying trained Global Warming stance model to the sentences extracted from climate news articles")
    
    tab_bias_detection, tab_how_to, tab_faq = st.tabs(["Global Warming Stance Detection", "How-To", "FAQ"])

    with tab_bias_detection:
      
      st.write("Enter a Text or URL below and click the Classify Button to see if it contains an climate change related claim.")

      # Choose between text or url input
      input_type = st.radio("Input type:", ("Text", "URL"))

      # Get input text
      if input_type == "Text":
        text_input = st.text_area("Enter Text")
      else:
        url_input = st.text_input("Enter URL")
        if url_input:
          try:
            article_text, article_title, article_authors, article_publish_date, 
            article_keywords, article_summary = extract_article_text(url_input)

            text_input = extract_sentences(article_text)

            with st.expander("Article Info"):
              st.write('Title:', article_title)
              st.write('Authors:', article_authors)
              st.write('Publish Date:', article_publish_date)
              st.write('Article Keywords:', article_keywords)
              st.write('Article Summary:', article_summary)

          except:
            st.write("Could not extract text from article.")
            return

      # Classify text and show result
      if st.button("Classify"):
        if input_type == "Text":
          text_input = extract_sentences(text_input)
        has_climate_claim = False
        for sentence in text_input:
          prob = classify_text(sentence, model, tokenizer)
          if prob > 0.5:
            has_climate_claim = True
            print_result(sentence, prob)
        if not has_climate_claim:
          st.write("None of the sentences contain an climate change related claim.")
    
    with tab_how_to:
      st.write("tbd")
    
    with tab_faq:
      st.write("tbd")


if __name__ == "__main__":
    main()

Overwriting CRBC_preMVP.py


In [22]:
# Run streamlit 
!streamlit run CRBC_preMVP.py & npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 7.166s

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.105.121.229:8501[0m
[0m
your url is: https://dry-areas-stick-34-105-121-229.loca.lt
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[34m  Stopping...[0m
^C
