In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os



In [14]:
!pip install -q groq


[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/137.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m137.3/137.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
from groq import Groq

client = Groq(api_key=os.environ["GROQ_API_KEY"])


In [8]:
import torch
import json
from transformers import XLMRobertaTokenizerFast, XLMRobertaForTokenClassification
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_PATH = "/content/drive/MyDrive/NER_RETRAIN/xlmr_location_ner"

tokenizer = XLMRobertaTokenizerFast.from_pretrained(MODEL_PATH)
model = XLMRobertaForTokenClassification.from_pretrained(MODEL_PATH)
model.eval()
model.to(DEVICE) # Move the model to the correct device

with open("/content/drive/MyDrive/NER_RETRAIN/id2tag.json") as f:
    id2tag = {int(k):v for k,v in json.load(f).items()}

def extract_locations(text):
    toks = tokenizer(text, return_tensors="pt")
    toks = {k: v.to(DEVICE) for k, v in toks.items()} # Move input tensors to the correct device
    with torch.no_grad():
        logits = model(**toks).logits[0]

    preds = torch.argmax(logits, dim=-1).tolist()
    tokens = tokenizer.convert_ids_to_tokens(toks["input_ids"][0])

    locations = []
    buf = ""

    for tok, p in zip(tokens, preds):
        label = id2tag[p]
        tok = tok.replace(" ", "")

        if label == "B-LOC":
            if buf:
                locations.append(buf)
            buf = tok
        elif label == "I-LOC":
            buf += " " + tok
        else:
            if buf:
                locations.append(buf)
                buf = ""

    if buf:
        locations.append(buf)

    return list(set(locations))

In [9]:
def analyze_incident_with_llm(text, locations):
    prompt = f"""
You are an AI incident-analysis engine for Indian news.

Given a news text and a list of detected locations, extract:

1. classification: "GOOD", "BAD", or "NEUTRAL"
2. event_type: (accident, crime, political, natural disaster, achievement, other)
3. severity: low / medium / high
4. deaths: number of deaths if mentioned
5. injured: number injured
6. main_incident_location: which location is the event-site
7. other_locations: everything else
8. summary: one sentence summary

TEXT: {text}
LOCATIONS: {locations}

Return ONLY valid JSON.
"""

    response = client.chat.completions.create(
        model="llama-3.1-8b-instant", # Changed to a valid model
        messages=[{"role": "user", "content": prompt}],
        temperature=0.1
    )

    return response.choices[0].message.content

In [10]:
import pandas as pd
india_df = pd.read_excel("/content/drive/MyDrive/NER_RETRAIN/India District (1).xlsx")

def lookup_location_info(location):
    # Try district match
    row = india_df[india_df["District Name"].str.lower() == location.lower()]
    if not row.empty:
        r = row.iloc[0]
        return {
            "district": r["District Name"],
            "state": r["State Name"],
            "district_code": int(r["District Code"]),
            "state_code": int(r["State Code"])
        }

    # Try state match
    row = india_df[india_df["State Name"].str.lower() == location.lower()]
    if not row.empty:
        r = row.iloc[0]
        return {
            "district": None,
            "state": r["State Name"],
            "district_code": None,
            "state_code": int(r["State Code"])
        }

    return None


In [11]:
import re
import json

def extract_json(text):
    """
    Extract the first valid JSON object from any LLM output.
    """

    # Find JSON block with regex
    match = re.search(r"\{[\s\S]*\}", text)

    if match:
        json_str = match.group(0)

        # Try to load JSON
        try:
            return json.loads(json_str)
        except:
            pass

    # If still failing, try fixing common issues:
    text = text.strip()

    # Remove markdown fences
    text = text.replace("```json", "").replace("```", "")

    # Attempt direct load
    try:
        return json.loads(text)
    except:
        raise ValueError("‚ùå Could not parse JSON from LLM output:\n" + text)


def extract_full_incident(text):
    # Step 1: Location detection
    locations = extract_locations(text)

    # Step 2: LLM incident analysis
    llm_json = analyze_incident_with_llm(text, locations)

    # Step 3: Robust JSON extraction
    data = extract_json(llm_json)

    # Step 4: Excel mapping for main incident location
    main_loc = data.get("main_incident_location")
    if main_loc:
        data["location_details"] = lookup_location_info(main_loc)

    return data

In [16]:
text = "Chennai and Bangalore have a water war and 3 people died in Dindigul."

result = extract_full_incident(text)
result


{'classification': 'BAD',
 'event_type': 'accident',
 'severity': 'high',
 'deaths': 3,
 'injured': None,
 'main_incident_location': 'Dindigul',
 'other_locations': ['Chennai', 'Bangalore'],
 'summary': 'A water war in Chennai and Bangalore resulted in 3 deaths in Dindigul.',
 'location_details': {'district': 'Dindigul',
  'state': 'TAMIL NADU',
  'district_code': 612,
  'state_code': 33}}

In [15]:
!pip install newspaper3k


Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading feedparser-6.0.12-py3-none-any.whl.metadata (2.7 kB)
Collecting tldextract>=2.0.1 (from newspaper3k)
  Downloading tldextract-5.3.0-py3-none-any.whl.metadata (11 kB)
Collecting feedfinder2>=0.0.4 (from newspaper3k)
  Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jieba3k>=0.35.1 (from newspaper3k)
  Downloading jieba3k-0.35.1.zip (7.4 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m7.4/7.4 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tinysegmenter==0.3 (from newspaper3k)
  Downloading tinysegmente

In [3]:
!pip install feedparser


Collecting feedparser
  Downloading feedparser-6.0.12-py3-none-any.whl.metadata (2.7 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading feedparser-6.0.12-py3-none-any.whl (81 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m81.5/81.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6046 sha256=04413f9813db9b2bb973cb7a941e177976faddf13e0cb6e67f5891fda8cd21ef
  Stored in directory: /root/.cache/pip/wheels/03/f5/1a/23761066dac1d0e8e683e5fdb27e12de53209d05a4a37e6246
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.12 sgmllib3k-1.0.0


In [17]:
import feedparser

def fetch_rss_articles(feed_url, limit=10):
    feed = feedparser.parse(feed_url)
    articles = []

    for entry in feed.entries[:limit]:
        title = entry.title
        summary = entry.summary if "summary" in entry else ""
        link = entry.link

        text = title + "\n\n" + summary
        articles.append({"title": title, "link": link, "text": text})

    return articles


In [18]:
rss_feeds = [
    "https://feeds.feedburner.com/ndtvnews-india-news",
    "https://www.hindustantimes.com/feeds/rss/india-news/rssfeed.xml",
    "https://timesofindia.indiatimes.com/rssfeeds/296589292.cms",
    "https://www.indiatoday.in/rss/1206614",
    "https://indianexpress.com/section/india/feed/"
]

all_articles = []

for feed in rss_feeds:
    try:
        print("üì° Fetching:", feed)
        articles = fetch_rss_articles(feed, limit=5)
        all_articles.extend(articles)
    except Exception as e:
        print("‚ùå RSS Error:", e)

len(all_articles)




üì° Fetching: https://feeds.feedburner.com/ndtvnews-india-news
üì° Fetching: https://www.hindustantimes.com/feeds/rss/india-news/rssfeed.xml
üì° Fetching: https://timesofindia.indiatimes.com/rssfeeds/296589292.cms
üì° Fetching: https://www.indiatoday.in/rss/1206614
üì° Fetching: https://indianexpress.com/section/india/feed/


25

In [19]:
results = []

for article in all_articles:
    print("\nüì∞ Analyzing:", article["title"])
    incident = extract_full_incident(article["text"])

    results.append({
        "title": article["title"],
        "link": article["link"],
        "incident": incident
    })



üì∞ Analyzing: Jaipur Girl Begged Teacher For Help "For 45 Minutes" Before Suicide: Report

üì∞ Analyzing: 'Very Intense Earthquake, Building Shook For 30 Seconds': Kolkata Residents On Tremors

üì∞ Analyzing: 5.7-Magnitude Earthquake Hits Bangladesh, Tremors Felt In Kolkata

üì∞ Analyzing: "Stand And Greet": New SOP For Maharashtra Officials To Welcome Politicians

üì∞ Analyzing: Over 200 Al Falah University Doctors, Staff Under Scanner After Delhi Blast

üì∞ Analyzing: ‚ÄòEmotionally exhausting‚Äô: Al Falah University students' kin unsure of next steps amid terror probe

üì∞ Analyzing: Himachal man who lost his memory reunited with family after 45 years

üì∞ Analyzing: Haryana man kills sister for marrying against family's wishes, held before attacking her husband

üì∞ Analyzing: Kolkata earthquake: Chandelier, light, water bottle shake as tremors jolt city

üì∞ Analyzing: ‚ÄòBollywood-style beating‚Äô: Assam CM after police seize drug contraband

üì∞ Analyzing: 'He‚Äôs f

In [20]:
import json
print(json.dumps(results, indent=2))


[
  {
    "title": "Jaipur Girl Begged Teacher For Help \"For 45 Minutes\" Before Suicide: Report",
    "link": "https://www.ndtv.com/india-news/jaipur-neerja-modi-school-suicide-class-4-student-probe-report-cbse-latest-news-rajasthan-9674428#publisher=newsstand",
    "incident": {
      "classification": "BAD",
      "event_type": "achievement",
      "severity": "high",
      "deaths": 1,
      "injured": 0,
      "main_incident_location": "Jaipur",
      "other_locations": [],
      "summary": "A student in Jaipur begged her teacher for help for 45 minutes before taking her own life.",
      "location_details": {
        "district": "Jaipur",
        "state": "RAJASTHAN",
        "district_code": 110,
        "state_code": 8
      }
    }
  },
  {
    "title": "'Very Intense Earthquake, Building Shook For 30 Seconds': Kolkata Residents On Tremors",
    "link": "https://www.ndtv.com/india-news/earthquake-kolkata-today-latest-news-most-intense-earthquake-of-my-life-residents-on-kolkat

In [21]:
import requests
from bs4 import BeautifulSoup

def scrape_article_hindi(url):
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        ),
        "Accept-Language": "hi-IN,hi;q=0.9,en;q=0.8",
        "Referer": "https://www.google.com/",
    }
    try:
        r = requests.get(url, headers=headers, timeout=10)
        if r.status_code != 200:
            print("‚ùå Request blocked:", r.status_code, url)
            return None
        soup = BeautifulSoup(r.text, "lxml")

        # headline
        h1 = soup.find("h1")
        title = h1.get_text(strip=True) if h1 else ""

        # body ‚Äì pick up article text paragraphs
        paragraphs = soup.find_all("p")
        body = "\n".join([p.get_text(strip=True) for p in paragraphs])

        full_text = title + "\n\n" + body
        return full_text if len(full_text) > 50 else None

    except Exception as e:
        print("Error scraping:", e, url)
        return None


In [23]:
def chunk_text_for_ner(text, max_tokens=450):
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        # estimate token length approx = words * 1.3
        if len(current_chunk) > max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = []

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks


In [24]:
def extract_locations(text):
    chunks = chunk_text_for_ner(text, max_tokens=400)

    all_locations = []

    for chunk in chunks:
        toks = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
        toks = {k: v.to(DEVICE) for k, v in toks.items()}
        with torch.no_grad():
            logits = model(**toks).logits[0]

        preds = torch.argmax(logits, dim=-1).tolist()
        tokens = tokenizer.convert_ids_to_tokens(toks["input_ids"][0])

        locations = []
        buf = ""

        for tok, p in zip(tokens, preds):
            label = id2tag[p]
            tok = tok.replace("‚ñÅ", "")

            if label == "B-LOC":
                if buf:
                    locations.append(buf)
                buf = tok
            elif label == "I-LOC":
                buf += " " + tok
            else:
                if buf:
                    locations.append(buf)
                    buf = ""

        if buf:
            locations.append(buf)

        all_locations.extend(locations)

    return list(set(all_locations))


In [25]:
text = scrape_article_hindi(url)
locations = extract_locations(text)
incident = extract_full_incident(text)
incident


{'classification': 'BAD',
 'event_type': 'crime',
 'severity': 'high',
 'deaths': None,
 'injured': None,
 'main_incident_location': 'Delhi',
 'other_locations': ['‡§´‡§∞‡•Ä‡§¶‡§æ‡§¨‡§æ‡§¶',
  '‡§Ü‡§ú‡§Æ‡§ó‡§¢‡§º',
  '‡§ó‡•Å‡§ú‡§∞‡§æ‡§§',
  '‡§â‡§°‡•Å‡§™‡•Ä',
  '‡§ú‡§Ø‡§™‡•Å‡§∞',
  '‡§ï‡§∞‡•ç‡§®‡§æ‡§ü‡§ï',
  '‡§ß‡•å‡§ú',
  '‡§Ö‡§´‡§ó‡§æ‡§®‡§ø‡§∏‡•ç‡§§‡§æ‡§®',
  '‡§Ö‡§π‡§Æ‡§¶‡§æ‡§¨‡§æ‡§¶'],
 'summary': '‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§¨‡•ç‡§≤‡§æ‡§∏‡•ç‡§ü ‡§ï‡•á ‡§¨‡§æ‡§¶ ‡§Ö‡§≤-‡§´‡§≤‡§æ‡§π ‡§µ‡§ø‡§∂‡•ç‡§µ‡§µ‡§ø‡§¶‡•ç‡§Ø‡§æ‡§≤‡§Ø ‡§∏‡•Å‡§∞‡•ç‡§ñ‡§ø‡§Ø‡•ã‡§Ç ‡§Æ‡•á‡§Ç ‡§π‡•à, ‡§î‡§∞ ‡§è‡§ú‡•á‡§Ç‡§∏‡§ø‡§Ø‡•ã‡§Ç ‡§ï‡•á ‡§∞‡§°‡§æ‡§∞ ‡§™‡§∞ ‡§Ö‡§¨ ‡§Ö‡§≤-‡§´‡§≤‡§æ‡§π ‡§Ø‡•Ç‡§®‡§ø‡§µ‡§∞‡•ç‡§∏‡§ø‡§ü‡•Ä ‡§ï‡•á 200 ‡§∏‡•á ‡§ú‡•ç‡§Ø‡§æ‡§¶‡§æ ‡§°‡•â‡§ï‡•ç‡§ü‡§∞ ‡§π‡•à‡§Ç‡•§',
 'location_details': None}

In [26]:
url = "https://www.amarujala.com/photo-gallery/delhi-ncr/delhi-blast-200-al-falah-doctors-on-radar-many-suddenly-absconded-on-day-of-bombing-2025-11-20?src=trending"

text = scrape_article_hindi(url)
print("SCRAPED TEXT PREVIEW:\n", text[:400], "\n")

locations = extract_locations(text)
print("DETECTED LOCATIONS:", locations)

incident = extract_full_incident(text)
incident


SCRAPED TEXT PREVIEW:
 Delhi Blast: ‡§Ö‡§≤ ‡§´‡§≤‡§æ‡§π ‡§ï‡•á 200 ‡§°‡•â‡§ï‡•ç‡§ü‡§∞ ‡§∞‡§°‡§æ‡§∞ ‡§™‡§∞, ‡§ß‡§Æ‡§æ‡§ï‡•á ‡§µ‡§æ‡§≤‡•á ‡§¶‡§ø‡§® ‡§ï‡§à ‡§è‡§ï‡§æ‡§è‡§ï ‡§π‡•Å‡§Ü ‡§´‡§∞‡§æ‡§∞; ‡§°‡•â. ‡§â‡§Æ‡§∞ ‡§ï‡•ã ‡§≤‡•á‡§ï‡§∞ ‡§è‡§ï ‡§î‡§∞ ‡§ñ‡•Å‡§≤‡§æ‡§∏‡§æ

‡§Æ‡•á‡§∞‡§æ ‡§∂‡§π‡§∞

Link Copied
‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§¨‡§Æ ‡§ß‡§Æ‡§æ‡§ï‡•á ‡§ï‡•á ‡§¨‡§æ‡§¶ ‡§Ö‡§≤-‡§´‡§≤‡§æ‡§π ‡§µ‡§ø‡§∂‡•ç‡§µ‡§µ‡§ø‡§¶‡•ç‡§Ø‡§æ‡§≤‡§Ø ‡§∏‡•Å‡§∞‡•ç‡§ñ‡§ø‡§Ø‡•ã‡§Ç ‡§Æ‡•á‡§Ç ‡§π‡•à‡•§ ‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§Æ‡•á‡§Ç 10 ‡§®‡§µ‡§Ç‡§¨‡§∞ ‡§ï‡•ã ‡§¨‡•ç‡§≤‡§æ‡§∏‡•ç‡§ü ‡§ï‡§∞‡§®‡•á ‡§µ‡§æ‡§≤‡§æ ‡§Ü‡§§‡•ç‡§Æ‡§ò‡§æ‡§§‡•Ä ‡§Ü‡§§‡§Ç‡§ï‡•Ä ‡§°‡•â. ‡§â‡§Æ‡§∞ ‡§â‡§® ‡§®‡§¨‡•Ä ‡§Ö‡§≤ ‡§´‡§≤‡§æ‡§π ‡§∏‡•á ‡§ú‡•Å‡§°‡§º‡§æ ‡§™‡§π‡§≤‡§æ ‡§Ü‡§§‡§Ç‡§ï‡•Ä ‡§®‡§π‡•Ä‡§Ç ‡§•‡§æ‡•§ ‡§µ‡§∞‡•ç‡§∑ 2008 ‡§Æ‡•á‡§Ç ‡§Ö‡§π‡§Æ‡§¶‡§æ‡§¨‡§æ‡§¶, ‡§ó‡•Å‡§ú‡§∞‡§æ‡§§ ‡§Æ‡•á‡§Ç ‡§π‡•Å‡§è ‡§∏‡•Ä‡§∞‡§ø‡§Ø‡§≤ ‡§¨‡§Æ ‡§ß‡§Æ‡§æ‡§ï‡•ã‡§Ç ‡§ï‡•á ‡§∏‡§Æ‡§Ø ‡§≠‡•Ä ‡§Ö‡§≤ ‡§´‡§≤‡§æ‡§π ‡§µ‡§ø‡§µ‡§ø ‡§ï‡§æ ‡§®‡§æ‡§Æ ‡§Ü‡§Ø‡§æ ‡§•‡§æ‡•§ ‡§Ö‡§π‡§Æ‡§¶‡§æ 

DETECTED

{'classification': 'BAD',
 'event_type': 'crime',
 'severity': 'high',
 'deaths': None,
 'injured': None,
 'main_incident_location': 'Delhi',
 'other_locations': ['‡§´‡§∞‡•Ä‡§¶‡§æ‡§¨‡§æ‡§¶',
  '‡§Ü‡§ú‡§Æ‡§ó‡§¢‡§º',
  '‡§ó‡•Å‡§ú‡§∞‡§æ‡§§',
  '‡§â‡§°‡•Å‡§™‡•Ä',
  '‡§ú‡§Ø‡§™‡•Å‡§∞',
  '‡§ï‡§∞‡•ç‡§®‡§æ‡§ü‡§ï',
  '‡§ß‡•å‡§ú',
  '‡§Ö‡§´‡§ó‡§æ‡§®‡§ø‡§∏‡•ç‡§§‡§æ‡§®',
  '‡§Ö‡§π‡§Æ‡§¶‡§æ‡§¨‡§æ‡§¶'],
 'summary': '‡§¶‡§ø‡§≤‡•ç‡§≤‡•Ä ‡§¨‡•ç‡§≤‡§æ‡§∏‡•ç‡§ü ‡§ï‡•á ‡§¨‡§æ‡§¶ ‡§Ö‡§≤-‡§´‡§≤‡§æ‡§π ‡§µ‡§ø‡§∂‡•ç‡§µ‡§µ‡§ø‡§¶‡•ç‡§Ø‡§æ‡§≤‡§Ø ‡§∏‡•Å‡§∞‡•ç‡§ñ‡§ø‡§Ø‡•ã‡§Ç ‡§Æ‡•á‡§Ç ‡§π‡•à, ‡§î‡§∞ ‡§è‡§ú‡•á‡§Ç‡§∏‡§ø‡§Ø‡•ã‡§Ç ‡§ï‡•á ‡§∞‡§°‡§æ‡§∞ ‡§™‡§∞ 200 ‡§∏‡•á ‡§ú‡•ç‡§Ø‡§æ‡§¶‡§æ ‡§°‡•â‡§ï‡•ç‡§ü‡§∞ ‡§π‡•à‡§Ç‡•§',
 'location_details': None}

In [2]:
!zip -r /content/xlmr_location_ner.zip /content/drive/MyDrive/NER_RETRAIN -x "/content/drive/MyDrive/NER_RETRAIN/xlmr_location_ner/*"


  adding: content/drive/MyDrive/NER_RETRAIN/ (stored 0%)
  adding: content/drive/MyDrive/NER_RETRAIN/Inference.ipynb (deflated 78%)
  adding: content/drive/MyDrive/NER_RETRAIN/eng.testb (deflated 81%)
  adding: content/drive/MyDrive/NER_RETRAIN/eng.testa (deflated 80%)
  adding: content/drive/MyDrive/NER_RETRAIN/eng.train (deflated 80%)
  adding: content/drive/MyDrive/NER_RETRAIN/ner.csv (deflated 79%)
  adding: content/drive/MyDrive/NER_RETRAIN/India District (1).xlsx (deflated 16%)
  adding: content/drive/MyDrive/NER_RETRAIN/comprehensive_indian_ner_dataset.csv (deflated 77%)
  adding: content/drive/MyDrive/NER_RETRAIN/.ipynb_checkpoints/ (stored 0%)
  adding: content/drive/MyDrive/NER_RETRAIN/synthetic_sample_head.csv (deflated 56%)
  adding: content/drive/MyDrive/NER_RETRAIN/id2tag.json (deflated 51%)
  adding: content/drive/MyDrive/NER_RETRAIN/tag2id.json (deflated 47%)
  adding: content/drive/MyDrive/NER_RETRAIN/NER_RETRAIN.ipynb (deflated 81%)


In [6]:
from google.colab import files
files.download('/content/drive/MyDrive/NER_RETRAIN')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>