In [None]:
!pip -q install langchain huggingface_hub tiktoken
!pip -q install --upgrade together

In [None]:
pip install python-multipart
pip install pydantic==1.10.9


Collecting python-multipart
  Downloading python_multipart-0.0.6-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-multipart
Successfully installed python-multipart-0.0.6


## LLaMA 2 on Together API

In [None]:
import os

os.environ["TOGETHER_API_KEY"] = "600cc396304095668ad4c6020bdfcdef1819fa8a4b024cc546d2857d5032f685"

In [None]:
!pip show langchain

Name: langchain
Version: 0.0.350
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: aiohttp, async-timeout, dataclasses-json, jsonpatch, langchain-community, langchain-core, langsmith, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 


# Setting up Together API


In [None]:
import together

# set your API key
together.api_key = os.environ["TOGETHER_API_KEY"]

# list available models and descriptons
models = together.Models.list()

In [None]:
for idx, model in enumerate(models):
    print(idx, model['name'])

In [None]:
together.Models.start("togethercomputer/llama-2-70b-chat")

{'success': True,
 'value': 'c6f573620f17a33a2334d0638d040db4210f20245243627bf2b1b26cfddb6f36-2d400eeb70ea94dc0a0d6c04b61ba1669f304ab8201ddf3369471a4c0a66c154'}

In [None]:
import together

import logging
from typing import Any, Dict, List, Mapping, Optional

from pydantic import Extra, Field, root_validator

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from langchain.utils import get_from_dict_or_env

class TogetherLLM(LLM):
    """Together large language models."""

    model: str = "togethercomputer/llama-2-70b-chat"
    """model endpoint to use"""

    together_api_key: str = os.environ["TOGETHER_API_KEY"]
    """Together API key"""

    temperature: float = 0.7
    """What sampling temperature to use."""

    max_tokens: int = 512
    """The maximum number of tokens to generate in the completion."""

    class Config:
        extra = Extra.forbid

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that the API key is set."""
        api_key = get_from_dict_or_env(
            values, "together_api_key", "TOGETHER_API_KEY"
        )
        values["together_api_key"] = api_key
        return values

    @property
    def _llm_type(self) -> str:
        """Return type of LLM."""
        return "together"

    def _call(
        self,
        prompt: str,
        **kwargs: Any,
    ) -> str:
        """Call to Together endpoint."""
        together.api_key = self.together_api_key
        output = together.Complete.create(prompt,
                                          model=self.model,
                                          max_tokens=self.max_tokens,
                                          temperature=self.temperature,
                                          )
        text = output['output']['choices'][0]['text']
        return text


In [None]:
test_llm = TogetherLLM(
    model= "togethercomputer/llama-2-70b-chat",
    temperature=0.1,
    max_tokens=512
)

In [None]:
txt = "Members of the group and advocates say they fear they could face discrimination or violence if forced to return to the worldâ€™s largest Muslim-majority country."
test_llm(f"classify this sentence as propaganda or not propaganda, answer just by one word : {txt} ")

'Not propaganda.'

In [None]:
import pandas as pd

# Assuming your CSV file is named "your_dataset.csv"
csv_file_path = "test.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Process each row in the DataFrame
results = []
for index, row in df.iterrows():
    txt = row['text']
    # Modify the function call as needed based on your actual implementation
    result = test_llm(f"classify this sentence as propaganda or not propaganda, answer just by one word : {txt}")
    results.append(result)

# Print or use the results list as needed
print(results)

In [None]:
together.Models.stop("togethercomputer/llama-2-70b-chat")

{'success': True, 'wasAlreadyDisabled': True}

## LangChain

In [None]:
import json
import textwrap

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are expert in propaganda."""


def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

# def cut_off_text(text, prompt):
#     cutoff_phrase = prompt
#     index = text.find(cutoff_phrase)
#     if index != -1:
#         return text[:index]
#     else:
#         return text

# def remove_substring(string, substring):
#     return string.replace(substring, "")


# def parse_text(text):
#         wrapped_text = textwrap.fill(text, width=100)
#         print(wrapped_text +'\n\n')
#         # return assistant_text


In [None]:
from langchain import PromptTemplate,  LLMChain

In [None]:
# llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})

llm = TogetherLLM(
    model= "togethercomputer/llama-2-70b-chat",
    temperature=0.1,
    max_tokens=512
)

In [None]:
system_prompt = "You are expert in propaganda. "
instruction = "classify this sentence as propaganda or not propaganda, answer by one word :\n\n {text}"
template = get_prompt(instruction, system_prompt)
print(template)

prompt = PromptTemplate(template=template, input_variables=["text"])

[INST]<<SYS>>
You are expert in propaganda. 
<</SYS>>

classify this sentence as propaganda or not propaganda, answer by one word :

 {text}[/INST]


In [None]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [None]:
import pandas as pd
from tqdm import tqdm
import time

# Assuming your CSV file is named "your_dataset.csv"
csv_file_path = "test.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Process each row in the DataFrame
results = []

# Use tqdm to add a progress bar
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing Rows"):
    txt = row['text']
    # Modify the function call as needed based on your actual implementation
    result = llm_chain.run(txt)

    # Split the result on the first newline character and take the first part
    result_before_newline = result.split('\n', 1)[0]

    results.append(result_before_newline)
    time.sleep(2)

# Print or use the results list as needed
print(results)


Processing Rows: 100%|██████████| 4042/4042 [3:02:54<00:00,  2.72s/it]

[' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Propaganda', ' Propaganda', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Propaganda', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Propaganda', ' Not propaganda.', ' Propaganda', ' Not propaganda.', ' Propaganda', ' Propaganda', ' Propaganda', ' Propaganda', ' Not propaganda.', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Propaganda.', ' Not propaganda.', ' Not propaganda.', ' Propaganda', ' Propaganda', ' Not propaganda.', ' Propaganda', ' Propaganda', ' Not propaganda.', ' N




In [None]:
# dictionary of lists
dictcc = {'prediction': results}

cc = pd.DataFrame(dictcc)

# saving the dataframe
cc.to_csv('llama2-70b-predictions-Oana-b.csv')


In [None]:
results[0]

' Not propaganda.'

In [None]:
updated_predictions = [0 if prediction ==  ' Not propaganda.' else 1 for prediction in results]
updated_predictions

In [None]:
y = list(df['label'].replace({'O':0, 'P':1}))
y

In [None]:
from sklearn.metrics import f1_score
f1_score(y,updated_predictions)

0.12101118939079983

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y,updated_predictions))

              precision    recall  f1-score   support

           0       0.90      0.42      0.57      3052
           1       0.32      0.85      0.47       990

    accuracy                           0.52      4042
   macro avg       0.61      0.64      0.52      4042
weighted avg       0.76      0.52      0.55      4042



In [None]:
for i,x in enumerate(results):
  print(i+2,x)

2  Not propaganda.
3  Propaganda
4  Not propaganda.

The sentence is a factual statement made by a credible source, the World Health Organization (WHO) Director-General Tedros Adhanom Ghebreyesus, about a serious issue. It does not contain any emotive language or appeals to bias, and is not intended to influence public opinion or behavior. Therefore, it does not meet the criteria for propaganda.
5  Not propaganda.
6  Propaganda
7  Not propaganda.
8  Not propaganda.
9  Not propaganda.
10  Propaganda
11  Not propaganda.
12  Not propaganda.
13  Not propaganda.
14  Not propaganda.
15  Not propaganda.
16  Propaganda
17  Not propaganda.
18  Not propaganda.
19  Propaganda
20  Propaganda
21  Propaganda
22  Propaganda
23  Not propaganda.

The sentence is a factual report of a statement made by a judge in a court of law. It does not contain any emotive language or appeals to bias, and its purpose is to inform the reader of a factual development in a legal case. Therefore, it does not meet the cr

In [None]:
text = "Members of the group and advocates say they fear they could face discrimination or violence if forced to return to the worldâ€™s largest Muslim-majority country."
output = llm_chain.run(text)

print(output)

 Propaganda


In [None]:
together.Models.stop("togethercomputer/llama-2-70b-chat")

{'success': True, 'wasAlreadyDisabled': True}