In [None]:
# ✅ STEP 1: Install necessary libraries
!pip install transformers torch -q

# ✅ STEP 2: Import required packages
import pandas as pd
from transformers import pipeline
import torch
from tqdm import tqdm

# ✅ STEP 3: Load your file (after uploading it in Colab)
# Replace the filename below with the one you uploaded
df = pd.read_csv("/content/df_sampled.csv")

# ✅ STEP 4: Set up device (GPU if available)
device = 0 if torch.cuda.is_available() else -1

# ✅ STEP 5: Load the models
sentiment_pipe = pipeline(
    "sentiment-analysis",
    model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
    device=device
)

zero_shot_pipe = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli",
    device=device
)

# ✅ STEP 6: Define labels
emotion_labels = ["joy", "sadness", "anger", "fear", "disgust", "trust", "surprise", "anticipation"]
moral_labels = ["care/harm", "fairness/cheating", "loyalty/betrayal", "authority/subversion", "sanctity/degradation"]

# ✅ STEP 7: Prepare texts
texts = df["Details"].fillna("").astype(str).tolist()

# ✅ STEP 8: Create empty lists for results
sentiments = []
emotions = []
morals = []

# ✅ STEP 9: Run labeling in a loop with progress bar
for text in tqdm(texts, desc="Labeling"):
    try:
        sentiment_result = sentiment_pipe(text[:512])[0]["label"]
    except:
        sentiment_result = "neutral"

    try:
        emotion_result = zero_shot_pipe(text[:512], emotion_labels)["labels"][0]
    except:
        emotion_result = "unknown"

    try:
        moral_result = zero_shot_pipe(text[:512], moral_labels)["labels"][0]
    except:
        moral_result = "unknown"

    sentiments.append(sentiment_result)
    emotions.append(emotion_result)
    morals.append(moral_result)

# ✅ STEP 10: Add to DataFrame
df["Sentiment"] = sentiments
df["Emotion"] = emotions
df["Moral_Foundation"] = morals

# ✅ STEP 11: Save the labeled file
df.to_csv("manually_labeled_news.csv", index=False)

print("✅ Done! File saved as 'manually_labeled_news.csv'")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cuda:0


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0
Labeling:   0%|          | 5/1000 [00:03<08:27,  1.96it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Labeling: 100%|██████████| 1000/1000 [07:40<00:00,  2.17it/s]

✅ Done! File saved as 'manually_labeled_news.csv'





In [None]:
import pandas as pd
data=pd.read_csv("/content/manually_labeled_news (2).csv")

In [None]:
data

Unnamed: 0,Date,Title,Link,Details,newspaper_name,Sentiment,Emotion,Moral_Foundation
0,2024-05-23,"Bangladesh elect to bowl, Liton dropped",https://en.prothomalo.com/sports/cricket/wc2os...,Bangladesh won the toss and elected to bowl fi...,Prothom Alo,negative,surprise,authority/subversion
1,2024-03-13,Man found dead in the Shitalakkhya,https://www.thedailystar.net/news/bangladesh/n...,Police yesterday found an unidentified body in...,The Daily Star,negative,surprise,care/harm
2,2024-04-20,Bhutan to consider reducing Sustainable Develo...,https://www.thedailystar.net/news/bangladesh/d...,Bhutan has assured positively considering the ...,The Daily Star,neutral,anticipation,care/harm
3,2024-10-12,Govt sugar mills incur Tk 91.75b loss in 18 yrs,https://en.prothomalo.com/business/local/jp6dy...,The Bangladesh Sugar and Food Industries Corpo...,Prothom Alo,negative,surprise,care/harm
4,2024-09-23,"Atishi takes oath as Delhi CM, BJP calls it 'd...",https://en.prothomalo.com/international/india/...,Aam Aadmi Party (AAP) leader Atishi Marlena wa...,Prothom Alo,neutral,surprise,authority/subversion
...,...,...,...,...,...,...,...,...
995,2024-05-18,"‘Communal forces becoming stronger, united eff...",https://en.prothomalo.com/bangladesh/hc7cfs4uum,Communal forces are gathering their strength i...,Prothom Alo,negative,surprise,care/harm
996,2024-09-25,China launches intercontinental ballistic miss...,https://en.prothomalo.com/international/china/...,China publicly acknowledged for the first time...,Prothom Alo,neutral,fear,care/harm
997,2024-10-19,Outsourcers block road at Shahbagh demanding j...,https://en.prothomalo.com/bangladesh/city/cpj5...,Members of Bangladesh Outsourcing Employees We...,Prothom Alo,negative,anger,care/harm
998,2024-08-04,Why did they shoot my father?,https://www.thedailystar.net/news/bangladesh/l...,"Jahangir Hossain Mridha, 51, was the sole earn...",The Daily Star,negative,anticipation,care/harm


In [None]:
data.groupby("Emotion").count()

Unnamed: 0_level_0,Date,Title,Link,Details,newspaper_name,Sentiment,Moral_Foundation
Emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
anger,78,78,78,78,78,78,78
anticipation,113,113,113,113,113,113,113
disgust,19,19,19,19,19,19,19
fear,41,41,41,41,41,41,41
joy,11,11,11,11,11,11,11
sadness,47,47,47,47,47,47,47
surprise,646,646,646,646,646,646,646
trust,45,45,45,45,45,45,45


In [None]:
import pandas as pd
data=pd.read_csv("/content/all_english_newspaper.csv")

In [None]:
data


Unnamed: 0.1,Unnamed: 0,Date,Title,Link,Details,newspaper_name
0,0,2024-09-24,"Touhid, top US diplomat Verma discuss peaceful...",https://www.thedailystar.net/news/bangladesh/d...,Foreign Affairs Adviser Touhid Hossain met wit...,The Daily Star
1,1,2024-09-24,Ex-IGP Mamun on 4-day remand in murder case,https://www.thedailystar.net/news/bangladesh/c...,A Dhaka court today placed former inspector ge...,The Daily Star
2,2,2024-09-24,"Touhid, Jaishankar meet at UNGA, discuss mutua...",https://www.thedailystar.net/news/bangladesh/d...,"Foreign Affairs Adviser, HE Md. Touhid Hossai...",The Daily Star
3,3,2024-09-24,Army chief pledges support for Yunus' interim ...,https://www.thedailystar.net/news/bangladesh/n...,Bangladesh's army chief vowed to back the coun...,The Daily Star
4,4,2024-09-24,Released top criminals at it again,https://www.thedailystar.net/news/bangladesh/c...,"Nasir Biswas, a 26-year-old mason, was walking...",The Daily Star
...,...,...,...,...,...,...
37748,139942,2024-10-16,World Cup qualifierVintage Messi nets hat tric...,https://en.prothomalo.com/sports/football/c0gc...,Lionel Messi struck a vintage hat trick as wor...,Prothom Alo
37749,139943,2024-10-16,Netanyahu vows ‘no ceasefire’ in Lebanon after...,https://en.prothomalo.com/international/middle...,Israeli prime minister Benjamin Netanyahu reje...,Prothom Alo
37750,139944,2024-10-16,Italy transfers migrants including Bangladeshi...,https://en.prothomalo.com/bangladesh/qlek3uucey,Italian prime minister Giorgia Meloni on Tuesd...,Prothom Alo
37751,139945,2024-10-16,OpinionWhy gender-based violence is worth disc...,https://en.prothomalo.com/opinion/op-ed/xkaaeq...,"Growing up, I often witnessed misogynistic nar...",Prothom Alo


In [None]:
# prompt: find all null value

data.isnull()

Unnamed: 0.1,Unnamed: 0,Date,Title,Link,Details,newspaper_name
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
...,...,...,...,...,...,...
37748,False,False,False,False,False,False
37749,False,False,False,False,False,False
37750,False,False,False,False,False,False
37751,False,False,False,False,False,False


In [None]:
# prompt: randomly select 6000 sample  and save it

df_sampled = data.sample(n=6000, random_state=42)
df_sampled.to_csv("df_sampled_6000.csv", index=False)

In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv("/content/df_sampled.csv")

# Ensure 'Date' is in datetime format
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

# Sort by date
df_sorted = df.sort_values(by="Date")

# Save the sorted data
df_sorted.to_csv("sorted_df_sample.csv", index=False)


In [None]:
df_sorted

Unnamed: 0,Date,Title,Link,Details,newspaper_name
410,2024-03-01,Bailey road fireNone of the 12 injured are out...,https://en.prothomalo.com/bangladesh/city/ivzo...,The death toll from the fire at a multi-storie...,Prothom Alo
305,2024-03-01,A carnival for pets,https://www.thedailystar.net/my-dhaka/news/car...,"A city of surprises, Dhaka is set to bring for...",The Daily Star
147,2024-03-01,US' messaging towards Bangladesh stressing on ...,https://www.thedailystar.net/news/bangladesh/d...,The US has concluded that smooth relations wit...,The Daily Star
755,2024-03-01,Investigation of 789 cases stalled for DNA rep...,https://en.prothomalo.com/bangladesh/crime-and...,A 10-year old child was stabbed to death in Cu...,Prothom Alo
154,2024-03-01,JCD gets partial central and DU committees,https://www.thedailystar.net/news/bangladesh/p...,BNP today announced a new seven-member partial...,The Daily Star
...,...,...,...,...,...
568,2024-10-20,Contractual workers block Shahbagh for 7hrs,https://www.thedailystar.net/news/bangladesh/n...,Contractual employees of government institutio...,The Daily Star
346,2024-10-20,Bringing back laundered money is govt’s priori...,https://en.prothomalo.com/bangladesh/c4v55k2q0r,Adviser to the finance and commerce ministries...,Prothom Alo
285,2024-10-20,Constitute commission to run Palli Bidyut Samity,https://www.thedailystar.net/news/bangladesh/n...,Palli Bidyut Samity (PBS) staffers yesterday d...,The Daily Star
181,2024-10-20,Govt to bar AL from ‘political participation’,https://www.thedailystar.net/news/bangladesh/n...,The interim government will bar the Awami Leag...,The Daily Star


In [None]:
# prompt: Sheikh Hasina vows fair election serch in df_sorted

search_term = "Sheikh Hasina"
df_search_results = df_sorted[df_sorted['Details'].str.contains(search_term, case=False, na=False)]

print(f"Found {len(df_search_results)} rows containing '{search_term}':")
df_search_results

Found 169 rows containing 'Sheikh Hasina':


Unnamed: 0,Date,Title,Link,Details,newspaper_name
410,2024-03-01,Bailey road fireNone of the 12 injured are out...,https://en.prothomalo.com/bangladesh/city/ivzo...,The death toll from the fire at a multi-storie...,Prothom Alo
147,2024-03-01,US' messaging towards Bangladesh stressing on ...,https://www.thedailystar.net/news/bangladesh/d...,The US has concluded that smooth relations wit...,The Daily Star
21,2024-03-02,Bailey Road Fire: PM deplores absence of fire ...,https://www.thedailystar.net/news/bangladesh/a...,Prime Minister Sheikh Hasina yesterday bemoane...,The Daily Star
129,2024-03-02,Step up efforts to prevent fire incidents: hea...,https://www.thedailystar.net/news/bangladesh/a...,Health Minister Samanta Lal Sen today urged al...,The Daily Star
268,2024-03-03,Padma rail bridge: Shortage in workforce hinde...,https://en.prothomalo.com/bangladesh/12g7p36ont,The railway authorities have failed to increas...,Prothom Alo
...,...,...,...,...,...
521,2024-10-13,Bureaucrats in fear of cases and arrests,https://en.prothomalo.com/bangladesh/8dydokg7z1,After the fall of the Awami League government ...,Prothom Alo
245,2024-10-17,OpinionChanging the culture of public fund emb...,https://en.prothomalo.com/opinion/op-ed/ku2o31...,What are the best and most effective ways and ...,Prothom Alo
372,2024-10-18,US says no excuse for violence in Bangladesh,https://www.thedailystar.net/news/bangladesh/d...,The US has said there is no excuse for violenc...,The Daily Star
172,2024-10-19,"NYT report on Aynaghar details physical, psych...",https://www.thedailystar.net/news/extrajudicia...,The New York Times on Thursday published a rep...,The Daily Star


In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv("/content/df_sampled_manual_labeled.csv")

# Ensure 'Date' is in datetime format
#df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

# Sort by date
#df_sorted1 = df.sort_values(by="Date")

# Save the sorted data
#df_sorted.to_csv("sorted_df-lebel.csv", index=False)
df




Unnamed: 0,Date,Title,Link,Details,newspaper_name,Sentiment,Emotion,Moral_Foundation
0,2024-05-23,"Bangladesh elect to bowl, Liton dropped",https://en.prothomalo.com/sports/cricket/wc2os...,Bangladesh won the toss and elected to bowl fi...,Prothom Alo,Neutral,Sadness,
1,2024-03-13,Man found dead in the Shitalakkhya,https://www.thedailystar.net/news/bangladesh/n...,Police yesterday found an unidentified body in...,The Daily Star,Negative,Neutral,
2,2024-04-20,Bhutan to consider reducing Sustainable Develo...,https://www.thedailystar.net/news/bangladesh/d...,Bhutan has assured positively considering the ...,The Daily Star,Neutral,Neutral,Care/Harm
3,2024-10-12,Govt sugar mills incur Tk 91.75b loss in 18 yrs,https://en.prothomalo.com/business/local/jp6dy...,The Bangladesh Sugar and Food Industries Corpo...,Prothom Alo,Positive,Joy,Care/Harm
4,2024-09-23,"Atishi takes oath as Delhi CM, BJP calls it 'd...",https://en.prothomalo.com/international/india/...,Aam Aadmi Party (AAP) leader Atishi Marlena wa...,Prothom Alo,Positive,Joy,Care/Harm
...,...,...,...,...,...,...,...,...
995,2024-05-18,"‘Communal forces becoming stronger, united eff...",https://en.prothomalo.com/bangladesh/hc7cfs4uum,Communal forces are gathering their strength i...,Prothom Alo,Negative,Anger,Authority/Subversion
996,2024-09-25,China launches intercontinental ballistic miss...,https://en.prothomalo.com/international/china/...,China publicly acknowledged for the first time...,Prothom Alo,Positive,Neutral,Authority/Subversion
997,2024-10-19,Outsourcers block road at Shahbagh demanding j...,https://en.prothomalo.com/bangladesh/city/cpj5...,Members of Bangladesh Outsourcing Employees We...,Prothom Alo,Neutral,Neutral,Authority/Subversion
998,2024-08-04,Why did they shoot my father?,https://www.thedailystar.net/news/bangladesh/l...,"Jahangir Hossain Mridha, 51, was the sole earn...",The Daily Star,Negative,Sadness,Care/Harm


In [None]:
df.groupby("Emotion").count()


Unnamed: 0_level_0,Date,Title,Link,Details,newspaper_name,Sentiment,Moral_Foundation
Emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Anger,112,112,112,112,112,112,96
Fear,32,32,32,32,32,32,25
Joy,304,304,304,304,304,304,251
Neutral,281,281,281,281,281,281,215
Sadness,271,271,271,271,271,271,251


In [None]:
import pandas as pd

# Load main and labeled data
df_main = pd.read_csv("/content/sorted_df_sample.csv")        # <- your original news data
df_labeled = pd.read_csv("/content/sorted_df-lebel.csv")  # <- labeled with Sentiment, Emotion, Moral_Foundation

# Helper to get first 3 words
def first_three_words(text):
    return " ".join(str(text).strip().lower().split()[:3])

# Apply to both dataframes
df_main["head_key"] = df_main["Title"].apply(first_three_words)
df_labeled["head_key"] = df_labeled["Title"].apply(first_three_words)

# Merge based on head_key
df_updated = df_main.merge(
    df_labeled[["head_key", "Sentiment", "Emotion", "Moral_Foundation"]],
    on="head_key",
    how="left",
    suffixes=("", "_labeled")
)

# Update main columns where labels exist
for col in ["Sentiment", "Emotion", "Moral_Foundation"]:
    df_updated[col] = df_updated[f"{col}"].combine_first(df_updated[col])
    df_updated.drop(columns=[f"{col}"], inplace=True)

# Drop helper key
df_updated.drop(columns=["head_key"], inplace=True)

# Save the updated main file
df_updated.to_csv("main_file_updated.csv", index=False)


In [None]:
df=pd.read_csv('/content/manual_labeled_1_to_1000.csv')
df

Unnamed: 0,Date,Title,Details,Sentiment,Emotion,Moral_Foundation,Topic
0,2024-10-01,Interview: Baharul AlamWhy we gave the police ...,Prothom Alo:After the fall of Sheikh Hasina's ...,,,,
1,2024-08-05,"AL headquarters, Dhanmondi-32 torched",Agitators vandalised and torched key establish...,,,,
2,2024-08-15,OpinionUnearthing partition in Bangladesh: A f...,Three weeks before protests against the quota ...,,,,
3,2024-06-06,Rajshahi Shaheed Minar: Activists demand cance...,"Academics, environmentalists, and civil societ...",,,,
4,2024-08-04,One killed in clash between protesters and pol...,At least one person was killed and 20 others i...,,,,
...,...,...,...,...,...,...,...
595,2024-05-18,"‘Communal forces becoming stronger, united eff...",Communal forces are gathering their strength i...,Positive,Respect,Loyalty/Betrayal,Culture
596,2024-09-25,China launches intercontinental ballistic miss...,China publicly acknowledged for the first time...,Negative,Anger,Care/Harm,Crime
597,2024-10-19,Outsourcers block road at Shahbagh demanding j...,Members of Bangladesh Outsourcing Employees We...,Negative,Concern,Fairness/Cheating,Politics
598,2024-08-04,Why did they shoot my father?,"Jahangir Hossain Mridha, 51, was the sole earn...",Negative,Frustration,Fairness/Cheating,Economics


In [None]:
# prompt: combine all csv file

import glob

# Get list of CSV files in the current directory
csv_files = glob.glob("*.csv")

# Create an empty list to store dataframes
list_of_dfs = []

# Loop through the list of csv files and read each one into a dataframe
for filename in csv_files:
  if filename != "main_file_updated.csv": # Exclude the potentially existing output file
    try:
      df_temp = pd.read_csv(filename)
      list_of_dfs.append(df_temp)
      print(f"Successfully read {filename}")
    except Exception as e:
      print(f"Error reading {filename}: {e}")


# Concatenate all dataframes into one
if list_of_dfs:
  combined_df = pd.concat(list_of_dfs, ignore_index=True)

  # Save the combined dataframe to a new CSV file
  combined_df.to_csv("combined_output.csv", index=False)

  print("\nSuccessfully combined all CSV files into 'combined_output.csv'")
  print(f"Combined dataframe shape: {combined_df.shape}")
else:
  print("No CSV files found or read successfully.")

# Display the head of the combined dataframe
# if 'combined_df' in locals():
#   display(combined_df.head())

Successfully read manual_labeled_601_to_700.csv
Successfully read manual_labeled_301_to_400.csv
Successfully read manual_labeled_801_to_900.csv
Successfully read manual_labeled_101_to_200.csv
Successfully read manual_labeled_701_to_800.csv
Successfully read manual_labeled_201_to_300.csv
Successfully read next20_manual_labeled.csv
Successfully read next20c_manual_labeled.csv
Successfully read manual_labeled_901_to_1000.csv
Successfully read next20d_manual_labeled.csv
Successfully read first20_manual_labeled.csv
Successfully read manual_labeled_501_to_600.csv
Successfully read next20b_manual_labeled.csv
Successfully read manual_labeled_401_to_500.csv

Successfully combined all CSV files into 'combined_output.csv'
Combined dataframe shape: (1000, 7)


In [None]:
combined_df

Unnamed: 0,Date,Title,Details,Sentiment,Emotion,Moral_Foundation,Topic
0,2024-08-05,"Broadband services, 4G restored without social...",Broadband internet service was restored in Ban...,Negative,Suppression,Liberty/Oppression,Technology
1,2024-10-17,8 nat’l days including March 7 cancelled,The interim government has cancelled eight nat...,Negative,Disapproval,Authority/Subversion,Politics
2,2024-10-07,Foreign secy to visit US October 7-14,Foreign Secretary Md Jashim Uddin is scheduled...,Neutral,Anticipation,Authority/Subversion,International
3,2024-06-28,"1 killed, 3 injured in fire at Chattogram",A person was killed and three others were inju...,Negative,Tragedy,Care/Harm,Accident
4,2024-08-09,OpinionWhat decision is India taking about She...,In the backdrop of Sheikh Hasina taking shelte...,Negative,Speculation,Authority/Subversion,Politics
...,...,...,...,...,...,...,...
995,2024-06-16,ISPR release addresses situation around St Mar...,An ISPR release has sought to quell speculatio...,,,,
996,2024-08-21,US electionsUS ready for a Harris presidency: ...,Barack Obama told fellow Democrats in Chicago ...,,,,
997,2024-03-22,Policies alone insufficient without implementa...,Policies alone are insufficient to ensure safe...,,,,
998,2024-07-26,"Arson, vandalism: ‘Play your part to bring th...",Prime Minister Sheikh Hasina yesterday said th...,,,,


In [None]:
combined_df.groupby("Emotion").count()

Unnamed: 0_level_0,Date,Title,Details,Sentiment,Moral_Foundation,Topic
Emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Accident,2,2,2,2,2,2
Accountability,1,1,1,1,1,1
Accusation,4,4,4,4,4,4
Achievement,1,1,1,1,1,1
Administrative,4,4,4,4,4,4
...,...,...,...,...,...,...
Victory,5,5,5,5,3,5
Violence,1,1,1,1,1,1
Warning,3,3,3,3,3,3
Weather,1,1,1,1,0,1


In [None]:
combined_df.groupby("Moral_Foundation").count()

Unnamed: 0_level_0,Date,Title,Details,Sentiment,Emotion,Topic
Moral_Foundation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Authority/Subversion,100,100,100,100,100,100
Care/Harm,221,221,221,221,221,221
Fairness/Cheating,154,154,154,154,154,154
Liberty/Oppression,41,41,41,41,41,41
Loyalty/Betrayal,53,53,53,53,53,53


In [None]:
combined_df.groupby("Sentiment").count()

Unnamed: 0_level_0,Date,Title,Details,Emotion,Moral_Foundation,Topic
Sentiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Mixed,2,2,2,2,1,2
Negative,360,360,360,360,360,360
Neutral,56,56,56,56,45,56
Positive,182,182,182,182,163,182


In [None]:
combined_df.isnull().sum()

Unnamed: 0,0
Date,0
Title,0
Details,0
Sentiment,400
Emotion,400
Moral_Foundation,431
Topic,400


In [None]:
# prompt: find out which file contain how much null value

import pandas as pd
import glob

# Get list of CSV files in the current directory
csv_files = glob.glob("*.csv")

print("Null value counts per file:")
print("-" * 30)

# Loop through the list of csv files and read each one into a dataframe
for filename in csv_files:
  try:
    df_temp = pd.read_csv(filename)
    null_counts = df_temp.isnull().sum()
    print(f"\nFile: {filename}")
    print(null_counts[null_counts > 0]) # Print only columns with null values
    if null_counts.sum() == 0:
        print("  No null values found.")
  except Exception as e:
    print(f"Error reading {filename}: {e}")

print("-" * 30)


Null value counts per file:
------------------------------

File: manual_labeled_601_to_700.csv
Moral_Foundation    6
dtype: int64

File: manual_labeled_301_to_400.csv
Series([], dtype: int64)
  No null values found.

File: manual_labeled_801_to_900.csv
Moral_Foundation    3
dtype: int64

File: manual_labeled_101_to_200.csv
Series([], dtype: int64)
  No null values found.

File: manual_labeled_701_to_800.csv
Moral_Foundation    2
dtype: int64

File: manual_labeled_201_to_300.csv
Series([], dtype: int64)
  No null values found.

File: next20_manual_labeled.csv
Moral_Foundation    2
dtype: int64

File: next20c_manual_labeled.csv
Moral_Foundation    2
dtype: int64

File: manual_labeled_901_to_1000.csv
Series([], dtype: int64)
  No null values found.

File: combined_output.csv
Sentiment           400
Emotion             400
Moral_Foundation    431
Topic               400
dtype: int64

File: next20d_manual_labeled.csv
Moral_Foundation    1
dtype: int64

File: first20_manual_labeled.csv
Mora

In [None]:
# prompt: /content/manual_labeled_1_to_1000.csv value count
df=pd.read_csv("/content/manual_labeled_1_to_1000.csv")
df.groupby("Emotion").count()


Unnamed: 0_level_0,Date,Title,Details,Sentiment,Moral_Foundation,Topic
Emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Accident,2,2,2,2,2,2
Accountability,1,1,1,1,1,1
Accusation,4,4,4,4,4,4
Achievement,1,1,1,1,1,1
Administrative,4,4,4,4,4,4
...,...,...,...,...,...,...
Victory,5,5,5,5,3,5
Violence,1,1,1,1,1,1
Warning,3,3,3,3,3,3
Weather,1,1,1,1,0,1


In [None]:
df.isna().sum()

Unnamed: 0,0
Date,0
Title,0
Details,0
Sentiment,100
Emotion,100
Moral_Foundation,119
Topic,100


In [None]:
import pandas as pd

In [None]:
df=pd.read_csv("/content/moral_behavior_analysis.csv")
df

Unnamed: 0,Date,Title,Link,Details,newspaper_name,text,sentiment_score,sentiment,topic,Care/Harm,Fairness/Cheating,Loyalty/Betrayal,Authority/Subversion,Sanctity/Degradation
0,2024-09-24,"Touhid, top US diplomat Verma discuss peaceful...",https://www.thedailystar.net/news/bangladesh/d...,Foreign Affairs Adviser Touhid Hossain met wit...,The Daily Star,"Touhid, top US diplomat Verma discuss peaceful...",-0.014870,Negative,0,0,6,2,0,0
1,2024-09-24,Ex-IGP Mamun on 4-day remand in murder case,https://www.thedailystar.net/news/bangladesh/c...,A Dhaka court today placed former inspector ge...,The Daily Star,Ex-IGP Mamun on 4-day remand in murder case. A...,-0.003623,Negative,4,0,0,0,1,0
2,2024-09-24,"Touhid, Jaishankar meet at UNGA, discuss mutua...",https://www.thedailystar.net/news/bangladesh/d...,"Foreign Affairs Adviser, HE Md. Touhid Hossai...",The Daily Star,"Touhid, Jaishankar meet at UNGA, discuss mutua...",0.000000,Neutral,0,0,2,0,0,0
3,2024-09-24,Army chief pledges support for Yunus' interim ...,https://www.thedailystar.net/news/bangladesh/n...,Bangladesh's army chief vowed to back the coun...,The Daily Star,Army chief pledges support for Yunus' interim ...,0.001253,Positive,0,3,1,0,2,0
4,2024-09-24,Released top criminals at it again,https://www.thedailystar.net/news/bangladesh/c...,"Nasir Biswas, a 26-year-old mason, was walking...",The Daily Star,Released top criminals at it again. Nasir Bisw...,-0.001387,Negative,1,0,0,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17692,2024-10-16,World Cup qualifierVintage Messi nets hat tric...,https://en.prothomalo.com/sports/football/c0gc...,Lionel Messi struck a vintage hat trick as wor...,Prothom Alo,World Cup qualifierVintage Messi nets hat tric...,0.005618,Positive,0,0,1,0,0,0
17693,2024-10-16,Netanyahu vows ‘no ceasefire’ in Lebanon after...,https://en.prothomalo.com/international/middle...,Israeli prime minister Benjamin Netanyahu reje...,Prothom Alo,Netanyahu vows ‘no ceasefire’ in Lebanon after...,0.000000,Neutral,5,0,0,0,3,0
17694,2024-10-16,Italy transfers migrants including Bangladeshi...,https://en.prothomalo.com/bangladesh/qlek3uucey,Italian prime minister Giorgia Meloni on Tuesd...,Prothom Alo,Italy transfers migrants including Bangladeshi...,0.000000,Neutral,0,1,0,0,1,0
17695,2024-10-16,OpinionWhy gender-based violence is worth disc...,https://en.prothomalo.com/opinion/op-ed/xkaaeq...,"Growing up, I often witnessed misogynistic nar...",Prothom Alo,OpinionWhy gender-based violence is worth disc...,-0.007470,Negative,0,3,10,2,3,0


In [None]:
# prompt: Using dataframe df: show all the analysis with this df

# Display the first 5 rows of the dataframe
print(df.head())

# Display the data types of each column
print(df.info())

# Display descriptive statistics for numerical columns
print(df.describe())

# Display the count of unique values for categorical columns
print(df.nunique())

# Display the distribution of the 'newspaper_name' column
print(df['newspaper_name'].value_counts())

# Display the distribution of the 'sentiment' column
print(df['sentiment'].value_counts())

# Display the distribution of the 'topic' column
print(df['topic'].value_counts())

# Group by 'newspaper_name' and calculate the average sentiment score
print(df.groupby('newspaper_name')['sentiment_score'].mean())

# Group by 'topic' and calculate the average sentiment score
print(df.groupby('topic')['sentiment_score'].mean())

# Group by 'newspaper_name' and 'sentiment' and count the occurrences
print(df.groupby(['newspaper_name', 'sentiment']).size().unstack(fill_value=0))

         Date                                              Title  \
0  2024-09-24  Touhid, top US diplomat Verma discuss peaceful...   
1  2024-09-24        Ex-IGP Mamun on 4-day remand in murder case   
2  2024-09-24  Touhid, Jaishankar meet at UNGA, discuss mutua...   
3  2024-09-24  Army chief pledges support for Yunus' interim ...   
4  2024-09-24                 Released top criminals at it again   

                                                Link  \
0  https://www.thedailystar.net/news/bangladesh/d...   
1  https://www.thedailystar.net/news/bangladesh/c...   
2  https://www.thedailystar.net/news/bangladesh/d...   
3  https://www.thedailystar.net/news/bangladesh/n...   
4  https://www.thedailystar.net/news/bangladesh/c...   

                                             Details  newspaper_name  \
0  Foreign Affairs Adviser Touhid Hossain met wit...  The Daily Star   
1  A Dhaka court today placed former inspector ge...  The Daily Star   
2   Foreign Affairs Adviser, HE Md. To

In [None]:
!pip install transformers sentencepiece langdetect pandas torch
!pip install nltk spacy banglanltk bertopic
!python -m nltk.downloader punkt
!python -m spacy download en_core_web_sm

# -------------------- IMPORTS --------------------
import pandas as pd
import torch
import re
import nltk
from langdetect import detect
from nltk.tokenize import sent_tokenize
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
import spacy

# Download NLTK models
nltk.download("punkt")



Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# ✅ Universal News Analyzer: Multilingual & Multi-dimensional (English + Bangla)

# -------------------- SETUP --------------------


# Load English NLP model
nlp_en = spacy.load("en_core_web_sm")

# -------------------- MODELS --------------------
# Summarizer
summarizer = pipeline("summarization", model="google/pegasus-xsum", tokenizer="google/pegasus-xsum")

# Sentiment (English)
sentiment_pipeline_en = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Emotion (English)
emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_pipeline = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer, return_all_scores=True)

# Sentence model for topic modeling
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
topic_model = BERTopic(embedding_model=sentence_model, verbose=False)

# -------------------- MORAL KEYWORDS --------------------
moral_keywords = {
    "Care/Harm": ["protect", "hurt", "aid", "injury", "rescue", "suffer", "heal", "compassion"],
    "Fairness/Cheating": ["justice", "equality", "discrimination", "bias", "corruption", "transparency"],
    "Loyalty/Betrayal": ["patriotism", "allegiance", "nation", "betrayal", "unity", "treason"],
    "Authority/Subversion": ["law", "order", "authority", "respect", "disobedience", "revolt", "government"],
    "Purity/Degradation": ["clean", "pollution", "sacred", "sin", "immoral", "filth", "deviant"],
}


# -------------------- CORE FUNCTIONS --------------------
def detect_language(text):
    try:
        return detect(text)
    except:
        return "unknown"

def extract_summary(text):
    try:
        return summarizer(text[:1024])[0]['summary_text']
    except:
        return "Summary generation failed."

def detect_sentiment(text, lang):
    if lang == "en":
        try:
            result = sentiment_pipeline_en(text[:512])[0]
            return result['label'], round(result['score'], 3)
        except:
            return "UNKNOWN", 0.0
    return "UNSUPPORTED", 0.0

def detect_emotion(text, lang):
    if lang != "en":
        return "UNSUPPORTED", 0.0
    try:
        result = emotion_pipeline(text[:512])[0]
        result = sorted(result, key=lambda x: x['score'], reverse=True)[0]
        return result['label'], round(result['score'], 3)
    except:
        return "ERROR", 0.0

def detect_moral_foundations(text):
    text_lower = text.lower()
    scores = {}
    for moral, keywords in moral_keywords.items():
        score = sum(len(re.findall(rf"\\b{re.escape(word)}\\b", text_lower)) for word in keywords)
        if score > 0:
            scores[moral] = score
    if scores:
        return sorted(scores, key=scores.get, reverse=True)[:2]
    return ["Non-moral"]

def extract_entities(text, lang):
    if lang == "en":
        doc = nlp_en(text)
        return list(set([ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG", "GPE"]]))
    return [e for e in bangla_entities if e in text]

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load FLAN-T5 model (strong abstractive summary / QA model)
flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

def extract_main_narrative(text):
    prompt = f"Summarize this news article in 2-3 sentences and explain the main issue discussed:\n\n{text}"
    inputs = flan_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
    output = flan_model.generate(**inputs, max_length=256)
    summary = flan_tokenizer.decode(output[0], skip_special_tokens=True)
    return summary


# -------------------- WRAPPER FUNCTION --------------------
def analyze_article(text):
    lang = detect_language(text)
    summary = extract_summary(text)
    sentiment, sentiment_score = detect_sentiment(text, lang)
    emotion, emotion_score = detect_emotion(text, lang)
    morals = detect_moral_foundations(text)
    entities = extract_entities(text, lang)
    main_theme = extract_main_narrative(text)

    return {
        "Language": lang,
        "Summary": summary,
        "Sentiment": sentiment,
        "Sentiment Score": sentiment_score,
        "Emotion": emotion,
        "Emotion Score": emotion_score,
        "Moral Foundations": morals,
        "Key Figures": entities,
        "Main Theme ": main_theme
    }

# -------------------- EXAMPLE USAGE --------------------
english_text = """Gary Shteyngart liked the stick. It was a handsome, polished staff called a shillelagh, used in Ireland for walking and the occasional cudgeling. This one was on sale at the Armoury, a high-end men’s clothing shop in TriBeCa that could double as an Ivy League library.

“I’m in love with this thing,” he said of the shillelagh, which was made by Fox Umbrellas of London. President John F. Kennedy, who came to embody Ivy cool, had been a Fox enthusiast. Now, so was Mr. Shteyngart, the bespectacled 53-year-old Russian American novelist. “This might be my new way of living,” he said.

Having recently turned into an unlikely men’s style icon with a penchant for crisp martinis, tailored suits and vintage watches, Mr. Shteyngart could credibly entertain the purchase of a $250 stick, even if doing so might make him look like one of the insecure, status-obsessed Manhattanites who populate his novels. The most recent of those, “Vera, or Faith,” about a precocious Korean American girl growing up in a privileged Manhattan household while the nation descends into an all-too-familiar mix of extremism and indifference, is out now.

Mr. Shteyngart had been working on another novel — long and complex, involving spies — when David Ebershoff, Mr. Shteyngart’s longtime editor at Random House, invited him to lunch at the restaurant Blue Ribbon in Midtown Manhattan in the fall of 2023. Mr. Ebershoff broke some bad news: Mr. Shteyngart’s epic was not working.

Mr. Shteyngart, who had been having his own doubts, sat silently for a few moments. “And then he put his finger up in the air and said, ‘I have another idea,’” Mr. Ebershoff recalled. That idea — his new novel, coalesced into a manuscript in just 51 days. “I’ve never seen anything like it,” the editor said, praising the author’s “new level of emotional openness.”

Mr. Shteyngart’s sartorial tastes have also deepened. “I used to be so against dressing up,” he said, as Daniel Greenwood, the Armoury’s director for U.S. sales, outfitted him in an ocean blue City Hunter jacket, made in Hong Kong from Irish linen and selling for $1,000. Born and raised in chilly Leningrad (now St. Petersburg), Mr. Shteyngart had transformed into a Mediterranean flâneur, ready to face a New York City afternoon in late spring."""



print("\n🔍 English Article Analysis:")
for k, v in analyze_article(english_text).items():
    print(f"{k}: {v}")


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


tokenizer_config.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/768 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Device set to use cpu


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]


🔍 English Article Analysis:


In [None]:
# --- Install required packages ---
!pip install transformers sentencepiece langdetect pandas torch --quiet
!pip install nltk spacy banglanltk bertopic --quiet
# Install required libraries
!pip install transformers sentencepiece --quiet



# Download models and data
import nltk
nltk.download("punkt")
import spacy
spacy.cli.download("en_core_web_sm")

# --- Imports ---
import re
from langdetect import detect
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
import torch
import spacy



# --- Initialize ---
translator = Translator()

# English NLP
nlp_en = spacy.load("en_core_web_sm")

# Summarizer - Pegasus XSum (English)
summarizer = pipeline("summarization", model="google/pegasus-xsum", tokenizer="google/pegasus-xsum")

# Sentiment - English RoBERTa Twitter
sentiment_pipeline_en = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Emotion - English GoEmotions DistilBERT
emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_pipeline = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer, return_all_scores=True)

# Sentence transformer + BERTopic for themes
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
topic_model = BERTopic(embedding_model=sentence_model, verbose=False)

# Moral foundations keywords English + Bangla (expanded)
moral_keywords = {
    "Care/Harm": ["protect", "hurt", "aid", "injury", "rescue", "suffer", "heal", "compassion", "রক্ষা", "আঘাত", "সহায়তা", "ব্যথা"],
    "Fairness/Cheating": ["justice", "equality", "discrimination", "bias", "corruption", "transparency", "বিচার", "সমতা", "অনিয়ম", "দুর্নীতি"],
    "Loyalty/Betrayal": ["patriotism", "allegiance", "nation", "betrayal", "unity", "treason", "দেশপ্রেম", "বিশ্বাসঘাতকতা", "একতা"],
    "Authority/Subversion": ["law", "order", "authority", "respect", "disobedience", "revolt", "government", "আইন", "সরকার", "অবাধ্যতা"],
    "Purity/Degradation": ["clean", "pollution", "sacred", "sin", "immoral", "filth", "deviant", "পবিত্র", "অপবিত্র", "পাপ"],
}



from transformers import MarianMTModel, MarianTokenizer

# Load Bangla → English model
bn2en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-bn-en")
bn2en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-bn-en")

# Load English → Bangla model
en2bn_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-bn")
en2bn_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-bn")

def translate_bn_to_en(text):
    inputs = bn2en_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    translated = bn2en_model.generate(**inputs, max_length=512)
    return bn2en_tokenizer.decode(translated[0], skip_special_tokens=True)

def translate_en_to_bn(text):
    inputs = en2bn_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    translated = en2bn_model.generate(**inputs, max_length=512)
    return en2bn_tokenizer.decode(translated[0], skip_special_tokens=True)


# --- Core Functions ---

def detect_language(text):
    try:
        lang = detect(text)
        if lang.startswith('bn'):
            return 'bn'
        elif lang.startswith('en'):
            return 'en'
        else:
            return 'other'
    except:
        return 'unknown'

def translate_text(text, src="bn", dest="en"):
    try:
        translated = translator.translate(text, src=src, dest=dest)
        return translated.text
    except Exception as e:
        return text  # fallback: return original

def reverse_translate_text(text, src="en", dest="bn"):
    try:
        translated = translator.translate(text, src=src, dest=dest)
        return translated.text
    except Exception as e:
        return text  # fallback

def extract_summary(text):
    try:
        out = summarizer(text[:1024], max_length=130, min_length=30, do_sample=False)
        return out[0]['summary_text']
    except Exception as e:
        return "Summary generation failed."

def detect_sentiment(text, lang='en'):
    if lang == 'en':
        try:
            out = sentiment_pipeline_en(text[:512])[0]
            return out['label'], round(out['score'], 3)
        except:
            return "UNKNOWN", 0.0

def detect_emotion(text, lang='en'):
    if lang != 'en':
        return "UNSUPPORTED", 0.0  # Emotion models for Bangla are limited
    try:
        scores = emotion_pipeline(text[:512])[0]
        best = sorted(scores, key=lambda x: x['score'], reverse=True)[0]
        return best['label'], round(best['score'], 3)
    except:
        return "ERROR", 0.0

def detect_moral_foundations(text):
    text_lower = text.lower()
    scores = {}
    for moral, keywords in moral_keywords.items():
        score = sum(len(re.findall(rf"\b{re.escape(word)}\b", text_lower)) for word in keywords)
        if score > 0:
            scores[moral] = score
    if scores:
        return sorted(scores, key=scores.get, reverse=True)[:2]
    else:
        return ["Non-moral"]

def extract_entities(text, lang='en'):
    if lang == 'en':
        doc = nlp_en(text)
        ents = list(set([ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG", "GPE"]]))
        return ents
    else:
        # Bangla heuristic
        return [ent for ent in bangla_entities if ent in text]

def detect_main_theme(text):
    try:
        topics, _ = topic_model.fit_transform([text])
        topic_words = topic_model.get_topic(topics[0])
        if topic_words is None:
            return "Theme detection failed."
        # Compose a short phrase from keywords
        theme = ", ".join([word for word, _ in topic_words[:7]])
        return theme
    except Exception as e:
        return "Theme detection failed."

def extract_main_narrative(text):
    # Use FLAN-T5 to generate a 2-3 sentence main narrative summary
    try:
        flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
        flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
        prompt = f"Summarize this news article in 2-3 sentences explaining the main issue:\n\n{text}"
        inputs = flan_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        output = flan_model.generate(**inputs, max_length=256)
        summary = flan_tokenizer.decode(output[0], skip_special_tokens=True)
        return summary
    except Exception as e:
        return "Narrative extraction failed."

# --- Unified Analysis Function ---

def analyze_article(text):
    lang = detect_language(text)
    print(f"Detected language: {lang}")

    if lang == 'bn':
        # Translate Bangla to English for analysis
        translated = translate_text(text, src="bn", dest="en")
        # Run English analysis on translated text
        summary_en = extract_summary(translated)
        sentiment_en, sent_score = detect_sentiment(translated, 'en')
        emotion_en, emo_score = detect_emotion(translated, 'en')
        morals = detect_moral_foundations(translated)
        entities = extract_entities(translated, 'en')
        theme = detect_main_theme(translated)
        narrative = extract_main_narrative(translated)

        # Translate back outputs to Bangla
        summary_bn = reverse_translate_text(summary_en, src="en", dest="bn")
        theme_bn = reverse_translate_text(theme, src="en", dest="bn")
        narrative_bn = reverse_translate_text(narrative, src="en", dest="bn")

        # For sentiment and emotion labels, translate manually
        sentiment_map = {"POSITIVE": "ধনাত্মক", "NEGATIVE": "ঋণাত্মক", "NEUTRAL": "নিরপেক্ষ", "UNKNOWN": "অজানা"}
        emotion_map = {
            "joy": "আনন্দ", "love": "ভালোবাসা", "anger": "রাগ", "sadness": "দুঃখ", "fear": "ভয়",
            "surprise": "অবিশ্বাস", "disgust": "ঘৃণা", "neutral": "নিরপেক্ষ"
        }

        sentiment_bn = sentiment_map.get(sentiment_en, sentiment_en)
        emotion_bn = emotion_map.get(emotion_en.lower(), emotion_en)

        return {
            "Language": "বাংলা",
            "Summary": summary_bn,
            "Sentiment": sentiment_bn,
            "Sentiment Score": sent_score,
            "Emotion": emotion_bn,
            "Emotion Score": emo_score,
            "Moral Foundations": morals,
            "Key Figures": entities,
            "Main Theme": theme_bn,
            "Main Narrative": narrative_bn,
        }

    elif lang == 'en':
        summary = extract_summary(text)
        sentiment, sent_score = detect_sentiment(text, 'en')
        emotion, emo_score = detect_emotion(text, 'en')
        morals = detect_moral_foundations(text)
        entities = extract_entities(text, 'en')
        theme = detect_main_theme(text)
        narrative = extract_main_narrative(text)

        return {
            "Language": "English",
            "Summary": summary,
            "Sentiment": sentiment,
            "Sentiment Score": sent_score,
            "Emotion": emotion,
            "Emotion Score": emo_score,
            "Moral Foundations": morals,
            "Key Figures": entities,
            "Main Theme": theme,
            "Main Narrative": narrative,
        }
    else:
        return {"Error": "Unsupported language or detection failed."}

# --- Example Usage ---
english_text = """President Biden addressed the ongoing conflict and emphasized unity with NATO allies.
He pledged continued military support and humanitarian aid. The Kremlin responded with sharp criticism."""

text = """‘শাপলা’কে নির্বাচনী প্রতীক হিসেবে বিধিমালার তফসিলভুক্ত না করার নীতিগত সিদ্ধান্ত নিয়েছে নির্বাচন কমিশন (ইসি)। এর ফলে কোনো রাজনৈতিক দল তাদের দলীয় প্রতীক হিসেবে ‘শাপলা’ পাবে না।গণ–অভ্যুত্থানে নেতৃত্বদানকারী তরুণদের গড়া রাজনৈতিক দল জাতীয় নাগরিক পার্টি (এনসিপি) সম্প্রতি দল হিসেবে নিবন্ধনের জন্য ইসিতে আবেদন করে। তারা প্রতীক হিসেবে ‘শাপলা’ চেয়েছে। এনসিপির আবেদনে পছন্দের প্রতীকের তালিকায় শাপলা ছাড়াও ‘কলম’ ও ‘মোবাইল ফোন’ রাখা হয়েছে।অন্যদিকে নাগরিক ঐক্যও দলীয় প্রতীক শাপলা চেয়েছিল। তবে তারা পেয়েছে ‘কেটলি’। এখন নাগরিক ঐক্যও কেটলির পরিবর্তে শাপলা প্রতীক দাবি করেছে। গত ২ জুলাই প্রধান নির্বাচন কমিশনারের (সিইসি) সঙ্গে দেখা করার পর নাগরিক ঐক্যের সাংগঠনিক সম্পাদক সাকিব আনোয়ার সাংবাদিকদের বলেন, ‘আমরা যখন নিবন্ধন পাই, তখন আর আমরা পছন্দ অনুযায়ী প্রতীক পাইনি। এ বিবেচনায় আমরা গত ১৭ জুন প্রতীক পরিবর্তনের জন্য আবেদন করি এবং পছন্দের ক্রমে “শাপল” ও “দোয়েল” চাই।’নির্বাচন কমিশনার আব্দুর রহমানেল মাছউদ আজ বুধবার প্রথম আলোকে বলেন, শাপলাকে নির্বাচনী প্রতীক হিসেবে তফসিলভুক্ত করা হবে না মর্মে নীতিগত সিদ্ধান্ত হয়েছে। তিনি বলেন, অতীতেও কোনো কোনো দল শাপলা প্রতীক চেয়েছিল, কিন্তু দেওয়া হয়নি। জাতীয় প্রতীক ও জাতীয় পতাকার সম্মান রক্ষার্থে আইন আছে। তবে জাতীয় ফুল বা ফলের বিষয়ে আইন করা হয়নি। এসব বিষয় বিবেচনায় নিয়ে শাপলাকে নির্বাচনী প্রতীক হিসেবে তফসিলভুক্ত না করার নীতিগত সিদ্ধান্ত নেওয়া হয়েছে।নির্বাচন পরিচালনা বিধিমালার তফসিলে এখন ৬৯টি নির্বাচনী প্রতীক আছে। আগামী সংসদ নির্বাচন সামনে রেখে প্রতীকের সংখ্যা বাড়ানোর উদ্যোগ নিয়েছে নির্বাচন কমিশন। এবার দল ও স্বতন্ত্র প্রার্থীদের জন্য মোট প্রতীক ১০০–এর বেশি করার চিন্তা করছে ইসি। সংশ্লিষ্ট সূত্র জানায়, প্রতীক তালিকার তফসিল সংশোধনের জন্য ইসির সিদ্ধান্ত শিগগির ভেটিংয়ের জন্য আইন মন্ত্রণালয়ে পাঠানো হবে।"""

print("\n--- English Article Analysis ---")
result_en = analyze_article(text)
for k, v in result_en.items():
    print(f"{k}: {v}")





[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


NameError: name 'Translator' is not defined

In [None]:
!pip install --upgrade --force-reinstall transformers sentencepiece langdetect pandas torch --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.9/16.9 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-genai 1.24.0 requires httpx<1.0.0,>=0.28.1, which is not installed.
openai 1.93.0 requires httpx<1,>=0.23.0, which is not installed.
ipython 7.34.0 requires jedi>=0.16, which is not installed.
langsmith 0.4.4 requires httpx<1,>=0.23.0, which is not installed.
firebase-admin 6.9.0 requires httpx[http2]==0.28.1, which is not installed.
gradio-client 1.10.1 requires httpx>=0.24.1, which is not installed.
gradio 5.31.0 requires httpx>=0.24.1, which is not installed.
numba 0.61.2 requires numpy<2.3,>=1.24, but you have numpy 2.3.1 which is incompatible.
google-colab 1.0

In [None]:
### ✅ Ready-to-Run Universal News Analyzer (Bangla + English)
# Works fully in Google Colab

# --- Install Dependencies (Run in Colab) ---
!pip uninstall -y bertopic httpx --quiet
!pip install bertopic --quiet
!pip install nltk spacy banglanltk --quiet
!pip install transformers


# --- Download Models & Resources ---
import nltk
nltk.download("punkt")
import spacy
spacy.cli.download("en_core_web_sm")

# --- Imports ---
import re
from langdetect import detect
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
import torch
import spacy


# --- Load Models ---
# Translation models (Bangla <--> English)
from transformers import MarianMTModel, MarianTokenizer
bn2en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-bn-en")
bn2en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-bn-en")
en2bn_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-mul") # Corrected model name
en2bn_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-mul")     # Corrected model name


def translate_bn_to_en(text):
    inputs = bn2en_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    translated = bn2en_model.generate(**inputs, max_length=512)
    return bn2en_tokenizer.decode(translated[0], skip_special_tokens=True)

def translate_en_to_bn(text):
    inputs = en2bn_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    translated = en2bn_model.generate(**inputs, max_length=512)
    return en2bn_tokenizer.decode(translated[0], skip_special_tokens=True)

# NLP
nlp_en = spacy.load("en_core_web_sm")

# Summary (English)
summarizer = pipeline("summarization", model="google/pegasus-xsum")

# Sentiment (English)
sentiment_pipeline_en = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Emotion (English)
emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_pipeline = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer, return_all_scores=True)

# Main theme detection (BERTopic)
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
topic_model = BERTopic(embedding_model=sentence_model, verbose=False)

# Moral Foundation Keywords
moral_keywords = {
    "Care/Harm": ["protect", "hurt", "aid", "injury", "rescue", "suffer", "heal", "compassion", "রক্ষা", "আঘাত", "সহায়তা", "ব্যথা"],
    "Fairness/Cheating": ["justice", "equality", "discrimination", "bias", "corruption", "transparency", "বিচার", "সমতা", "অনিয়ম", "দুর্নীতি"],
    "Loyalty/Betrayal": ["patriotism", "allegiance", "nation", "betrayal", "unity", "treason", "দেশপ্রেম", "বিশ্বাসঘাতকতা", "একতা"],
    "Authority/Subversion": ["law", "order", "authority", "respect", "disobedience", "revolt", "government", "আইন", "সরকার", "অবাধ্যতা"],
    "Purity/Degradation": ["clean", "pollution", "sacred", "sin", "immoral", "filth", "deviant", "পবিত্র", "অপবিত্র", "পাপ"]
}

# --- Core Functions ---
def detect_language(text):
    try:
        lang = detect(text)
        return 'bn' if lang.startswith('bn') else 'en'
    except:
        return 'unknown'

def extract_summary(text):
    try:
        out = summarizer(text[:1024], max_length=130, min_length=30, do_sample=False)
        return out[0]['summary_text']
    except:
        return "Summary generation failed."

def detect_sentiment(text):
    try:
        out = sentiment_pipeline_en(text[:512])[0]
        return out['label'], round(out['score'], 3)
    except:
        return "UNKNOWN", 0.0

def detect_emotion(text):
    try:
        scores = emotion_pipeline(text[:512])[0]
        best = sorted(scores, key=lambda x: x['score'], reverse=True)[0]
        return best['label'], round(best['score'], 3)
    except:
        return "ERROR", 0.0

def detect_moral_foundations(text):
    text_lower = text.lower()
    scores = {m: sum(len(re.findall(rf"\\b{re.escape(word)}\\b", text_lower)) for word in kw) for m, kw in moral_keywords.items()}
    scores = {k: v for k, v in scores.items() if v > 0}
    return sorted(scores, key=scores.get, reverse=True)[:2] if scores else ["Non-moral"]

def extract_entities(text):
    doc = nlp_en(text)
    return list(set([ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG", "GPE"]]))

def detect_main_theme(text):
    try:
        topics, _ = topic_model.fit_transform([text])
        topic_words = topic_model.get_topic(topics[0])
        return ", ".join([w for w, _ in topic_words[:5]]) if topic_words else "Theme detection failed."
    except:
        return "Theme detection failed."

def extract_main_narrative(text):
    try:
        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
        model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
        prompt = f"Summarize this news article in 2-3 sentences explaining the main issue:\n{text}"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        output = model.generate(**inputs, max_length=256)
        return tokenizer.decode(output[0], skip_special_tokens=True)
    except:
        return "Narrative extraction failed."

# --- Main Analysis Pipeline ---
def analyze_article(text):
    lang = detect_language(text)
    original_text = text

    if lang == 'bn':
        text = translate_bn_to_en(text)

    summary = extract_summary(text)
    sentiment, sent_score = detect_sentiment(text)
    emotion, emo_score = detect_emotion(text)
    morals = detect_moral_foundations(text)
    entities = extract_entities(text)
    theme = detect_main_theme(text)
    narrative = extract_main_narrative(text)

    if lang == 'bn':
        summary = translate_en_to_bn(summary)
        theme = translate_en_to_bn(theme)
        narrative = translate_en_to_bn(narrative)

        sentiment_map = {"POSITIVE": "ধনাত্মক", "NEGATIVE": "ঋণাত্মক", "NEUTRAL": "নিরপেক্ষ", "UNKNOWN": "অজানা"}
        emotion_map = {
            "joy": "আনন্দ", "love": "ভালোবাসা", "anger": "রাগ", "sadness": "দুঃখ", "fear": "ভয়",
            "surprise": "অবিশ্বাস", "disgust": "ঘৃণা", "neutral": "নিরপেক্ষ"
        }

        sentiment = sentiment_map.get(sentiment.upper(), sentiment)
        emotion = emotion_map.get(emotion.lower(), emotion)

        return {
            "Language": "বাংলা",
            "Summary": summary,
            "Sentiment": sentiment,
            "Sentiment Score": sent_score,
            "Emotion": emotion,
            "Emotion Score": emo_score,
            "Moral Foundations": morals,
            "Key Figures": entities,
            "Main Theme": theme,
            "Main Narrative": narrative
        }

    return {
        "Language": "English",
        "Summary": summary,
        "Sentiment": sentiment,
        "Sentiment Score": sent_score,
        "Emotion": emotion,
        "Emotion Score": emo_score,
        "Moral Foundations": morals,
        "Key Figures": entities,
        "Main Theme": theme,
        "Main Narrative": narrative
    }


# --- Example Run (Paste your article here) ---
text = """‘শাপলা’কে নির্বাচনী প্রতীক হিসেবে বিধিমালার তফসিলভুক্ত না করার নীতিগত সিদ্ধান্ত নিয়েছে নির্বাচন কমিশন (ইসি)। এর ফলে কোনো রাজনৈতিক দল তাদের দলীয় প্রতীক হিসেবে ‘শাপলা’ পাবে না।গণ–অভ্যুত্থানে নেতৃত্বদানকারী তরুণদের গড়া রাজনৈতিক দল জাতীয় নাগরিক পার্টি (এনসিপি) সম্প্রতি দল হিসেবে নিবন্ধনের জন্য ইসিতে আবেদন করে। তারা প্রতীক হিসেবে ‘শাপলা’ চেয়েছে। এনসিপির আবেদনে পছন্দের প্রতীকের তালিকায় শাপলা ছাড়াও ‘কলম’ ও ‘মোবাইল ফোন’ রাখা হয়েছে।অন্যদিকে নাগরিক ঐক্যও দলীয় প্রতীক শাপলা চেয়েছিল। তবে তারা পেয়েছে ‘কেটলি’। এখন নাগরিক ঐক্যও কেটলির পরিবর্তে শাপলা প্রতীক দাবি করেছে। গত ২ জুলাই প্রধান নির্বাচন কমিশনারের (সিইসি) সঙ্গে দেখা করার পর নাগরিক ঐক্যের সাংগঠনিক সম্পাদক সাকিব আনোয়ার সাংবাদিকদের বলেন, ‘আমরা যখন নিবন্ধন পাই, তখন আর আমরা পছন্দ অনুযায়ী প্রতীক পাইনি। এ বিবেচনায় আমরা গত ১৭ জুন প্রতীক পরিবর্তনের জন্য আবেদন করি এবং পছন্দের ক্রমে “শাপল” ও “দোয়েল” চাই।’নির্বাচন কমিশনার আব্দুর রহমানেল মাছউদ আজ বুধবার প্রথম আলোকে বলেন, শাপলাকে নির্বাচনী প্রতীক হিসেবে তফসিলভুক্ত করা হবে না মর্মে নীতিগত সিদ্ধান্ত হয়েছে। তিনি বলেন, অতীতেও কোনো কোনো দল শাপলা প্রতীক চেয়েছিল, কিন্তু দেওয়া হয়নি। জাতীয় প্রতীক ও জাতীয় পতাকার সম্মান রক্ষার্থে আইন আছে। তবে জাতীয় ফুল বা ফলের বিষয়ে আইন করা হয়নি। এসব বিষয় বিবেচনায় নিয়ে শাপলাকে নির্বাচনী প্রতীক হিসেবে তফসিলভুক্ত না করার নীতিগত সিদ্ধান্ত নেওয়া হয়েছে।নির্বাচন পরিচালনা বিধিমালার তফসিলে এখন ৬৯টি নির্বাচনী প্রতীক আছে। আগামী সংসদ নির্বাচন সামনে রেখে প্রতীকের সংখ্যা বাড়ানোর উদ্যোগ নিয়েছে নির্বাচন কমিশন। এবার দল ও স্বতন্ত্র প্রার্থীদের জন্য মোট প্রতীক ১০০–এর বেশি করার চিন্তা করছে ইসি। সংশ্লিষ্ট সূত্র জানায়, প্রতীক তালিকার তফসিল সংশোধনের জন্য ইসির সিদ্ধান্ত শিগগির ভেটিংয়ের জন্য আইন মন্ত্রণালয়ে পাঠানো হবে।"""
result = analyze_article(text)
for k, v in result.items():
    print(f"{k}: {v}")

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gradio 5.31.0 requires httpx>=0.24.1, which is not installed.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.1 which is incompatible.
google-colab 1.0.0 requires requests==2.32.3, but you have requests 2.32.4 which is incompatible.
fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.1 which is incompatible.
cudf-cu12 25.2.1 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.61.2 which is incompatible.
cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.
dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.
distributed-ucxx-cu12 0.42.0 requires numba<0.61.0a0,>=0.59.1, but you have numba 0.61.2 which is incompatible.
plotnine 0.14.6 requires scipy<1.16.0,>=1.8.0, but you have 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


ImportError: cannot import name 'pipeline' from 'transformers' (/usr/local/lib/python3.11/dist-packages/transformers/__init__.py)

In [None]:
!pip uninstall -y httpx
!pip install httpx==0.23.3

Found existing installation: httpx 0.23.3
Uninstalling httpx-0.23.3:
  Successfully uninstalled httpx-0.23.3
Collecting httpx==0.23.3
  Using cached httpx-0.23.3-py3-none-any.whl.metadata (7.1 kB)
Using cached httpx-0.23.3-py3-none-any.whl (71 kB)
Installing collected packages: httpx
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
googletrans 4.0.0rc1 requires httpx==0.13.3, but you have httpx 0.23.3 which is incompatible.
google-genai 1.24.0 requires httpx<1.0.0,>=0.28.1, but you have httpx 0.23.3 which is incompatible.
firebase-admin 6.9.0 requires httpx[http2]==0.28.1, but you have httpx 0.23.3 which is incompatible.
gradio-client 1.10.1 requires httpx>=0.24.1, but you have httpx 0.23.3 which is incompatible.
gradio 5.31.0 requires httpx>=0.24.1, but you have httpx 0.23.3 which is incompatible.[0m[31m
[0mSuccessfully installed httpx-0.23.3


In [None]:

!pip install --upgrade --force-reinstall bertopic


Collecting bertopic
  Using cached bertopic-0.17.3-py3-none-any.whl.metadata (24 kB)
Collecting hdbscan>=0.8.29 (from bertopic)
  Using cached hdbscan-0.8.40-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (15 kB)
Collecting umap-learn>=0.5.0 (from bertopic)
  Using cached umap_learn-0.5.9.post2-py3-none-any.whl.metadata (25 kB)
Collecting numpy>=1.20.0 (from bertopic)
  Using cached numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting pandas>=1.1.5 (from bertopic)
  Using cached pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting plotly>=4.7.0 (from bertopic)
  Using cached plotly-6.2.0-py3-none-any.whl.metadata (8.5 kB)
Collecting scikit-learn>=1.0 (from bertopic)
  Using cached scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)
Collecting sentence-transformers>=0.4.1 (from bertopic)
  Using cached sentence_transformers-5.0.0-py3-none-any.whl.metada

In [None]:
### ✅ FastAPI Backend for Universal News Analyzer (Bangla + English)

# 1️⃣ Install required dependencies first (on your server/local):
# pip install fastapi uvicorn transformers sentencepiece torch langdetect spacy bertopic sentence-transformers
# python -m nltk.downloader punkt
# python -m spacy download en_core_web_sm

# 2️⃣ Save this code as `main.py`

from fastapi import FastAPI, Request
from pydantic import BaseModel
from langdetect import detect
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from bertopic import BERTopic
import torch
import spacy
import re

app = FastAPI()

# --- Models & Pipelines ---
nlp_en = spacy.load("en_core_web_sm")
summarizer = pipeline("summarization", model="google/pegasus-xsum")
sentiment_pipeline_en = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
emotion_pipeline = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer, return_all_scores=True)
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
topic_model = BERTopic(embedding_model=sentence_model, verbose=False)

# Translation Models
from transformers import MarianMTModel, MarianTokenizer
bn2en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-bn-en")
bn2en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-bn-en")
en2bn_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-bn")
en2bn_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-bn")

def translate_bn_to_en(text):
    inputs = bn2en_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    translated = bn2en_model.generate(**inputs, max_length=512)
    return bn2en_tokenizer.decode(translated[0], skip_special_tokens=True)

def translate_en_to_bn(text):
    inputs = en2bn_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    translated = en2bn_model.generate(**inputs, max_length=512)
    return en2bn_tokenizer.decode(translated[0], skip_special_tokens=True)

moral_keywords = {
    "Care/Harm": ["protect", "hurt", "aid", "injury", "rescue", "suffer", "heal", "compassion", "রক্ষা", "আঘাত", "সহায়তা", "ব্যথা"],
    "Fairness/Cheating": ["justice", "equality", "discrimination", "bias", "corruption", "transparency", "বিচার", "সমতা", "অনিয়ম", "দুর্নীতি"],
    "Loyalty/Betrayal": ["patriotism", "allegiance", "nation", "betrayal", "unity", "treason", "দেশপ্রেম", "বিশ্বাসঘাতকতা", "একতা"],
    "Authority/Subversion": ["law", "order", "authority", "respect", "disobedience", "revolt", "government", "আইন", "সরকার", "অবাধ্যতা"],
    "Purity/Degradation": ["clean", "pollution", "sacred", "sin", "immoral", "filth", "deviant", "পবিত্র", "অপবিত্র", "পাপ"]
}

# --- Utility Functions ---
def detect_language(text):
    try:
        return 'bn' if detect(text).startswith('bn') else 'en'
    except:
        return 'unknown'

def extract_summary(text):
    try:
        out = summarizer(text[:1024], max_length=130, min_length=30, do_sample=False)
        return out[0]['summary_text']
    except:
        return "Summary failed."

def detect_sentiment(text):
    try:
        out = sentiment_pipeline_en(text[:512])[0]
        return out['label'], round(out['score'], 3)
    except:
        return "UNKNOWN", 0.0

def detect_emotion(text):
    try:
        scores = emotion_pipeline(text[:512])[0]
        best = sorted(scores, key=lambda x: x['score'], reverse=True)[0]
        return best['label'], round(best['score'], 3)
    except:
        return "ERROR", 0.0

def detect_morals(text):
    text_lower = text.lower()
    scores = {m: sum(len(re.findall(rf"\\b{re.escape(w)}\\b", text_lower)) for w in kws) for m, kws in moral_keywords.items()}
    scores = {k: v for k, v in scores.items() if v > 0}
    return sorted(scores, key=scores.get, reverse=True)[:2] if scores else ["Non-moral"]

def extract_entities(text):
    doc = nlp_en(text)
    return list(set([ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG", "GPE"]]))

def detect_theme(text):
    try:
        topics, _ = topic_model.fit_transform([text])
        words = topic_model.get_topic(topics[0])
        return ", ".join([w for w, _ in words[:5]]) if words else "Failed"
    except:
        return "Failed"

def extract_narrative(text):
    try:
        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
        model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
        prompt = f"Summarize this article in 2-3 lines showing key issue:\n{text}"
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        output = model.generate(**inputs, max_length=256)
        return tokenizer.decode(output[0], skip_special_tokens=True)
    except:
        return "Narrative failed."

# --- API Schema ---
class Article(BaseModel):
    text: str

@app.post("/analyze")
async def analyze(article: Article):
    raw_text = article.text
    lang = detect_language(raw_text)
    original = raw_text

    if lang == 'bn':
        raw_text = translate_bn_to_en(raw_text)

    summary = extract_summary(raw_text)
    sentiment, sent_score = detect_sentiment(raw_text)
    emotion, emo_score = detect_emotion(raw_text)
    morals = detect_morals(raw_text)
    entities = extract_entities(raw_text)
    theme = detect_theme(raw_text)
    narrative = extract_narrative(raw_text)

    if lang == 'bn':
        summary = translate_en_to_bn(summary)
        theme = translate_en_to_bn(theme)
        narrative = translate_en_to_bn(narrative)

    return {
        "Language": "Bangla" if lang == 'bn' else "English",
        "Summary": summary,
        "Sentiment": sentiment,
        "Sentiment Score": sent_score,
        "Emotion": emotion,
        "Emotion Score": emo_score,
        "Moral Foundations": morals,
        "Key Figures": entities,
        "Main Theme": theme,
        "Main Narrative": narrative
    }

# Run with: uvicorn main:app --reload



ModuleNotFoundError: Could not import module 'pipeline'. Are this object's requirements defined correctly?