Installing EDA Requirements

In [None]:
# ==== 0) Install deps ====
!pip -q install pandas numpy matplotlib chromadb sentence-transformers xgboost scikit-learn orjson

# Rabindra Dhant MMA Fighter Data EDA and Mind Map Structuring in one script
# This code loads, prepares, explores and organizes insights relevant to Rabindra Dhant's MMA career,
# supporting a mind map style representation of his journey using Python libraries.
# Suitable for running in Google Colab or any Python environment.

import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

Installations

In [None]:
import gradio as gr
import pandas as pd
import json
import re
import openai
import tempfile
import soundfile as sf
import os
import chromadb
from chromadb.config import Settings
import openai
from google.colab import userdata

Load Data Safely

In [None]:
# ==== 1) Imports & helpers ====
import os, json, re, ast, math, orjson
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from google.colab import files

# Robust JSON loader (handles "", None, malformed)
def safe_json_load(x):
    if pd.isna(x) or x=="":
        return {}
    # handle double-quoted JSON that might already be a dict-like string
    try:
        return orjson.loads(x)
    except:
        try:
            return json.loads(x)
        except:
            try:
                return ast.literal_eval(x)
            except:
                return {}

def to_seconds_mmss(s):
    if pd.isna(s) or s=="":
        return np.nan
    if isinstance(s, (int,float)):
        return float(s)
    m = re.match(r"^(\d{1,2}):(\d{2})$", str(s).strip())
    if not m:
        return np.nan
    return int(m.group(1))*60 + int(m.group(2))

def coalesce(*vals):
    for v in vals:
        if v not in (None, "", np.nan) and not (isinstance(v,float) and math.isnan(v)):
            return v
    return np.nan


In [10]:
import pandas as pd

# Safer loading: handle quotes and skip problematic lines
# Assuming you have uploaded the file to the root of your Google Drive ("My Drive").
# UPDATE THE PATH BELOW IF YOUR FILE IS IN A DIFFERENT LOCATION IN GOOGLE DRIVE.
df = pd.read_csv(
    "https://raw.githubusercontent.com/amulsapkota1/sports-pulse/main/master_data-rabindra-dhant.txt",
    sep=",",
    quotechar='"',
    engine="python",      # slower but handles messy quoting
    on_bad_lines="skip"   # skip any malformed rows
)

print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head(3)

Shape: (16, 32)
Columns: ['item_id', 'source_url', 'source_type', 'source_title', 'author_or_channel', 'published_date', 'accessed_date', 'language', 'entities', 'tags', 'org_promotion', 'event_name', 'event_date', 'location_city', 'location_country', 'fighter_a', 'fighter_b', 'weight_class', 'odds_a', 'odds_b', 'outcome', 'method', 'round', 'time_mmss', 'stats_json', 'extras_json', 'content_hash_sha1', 'chunk_index', 'char_start', 'char_end', 'token_est', 'chunk_text']


Unnamed: 0,item_id,source_url,source_type,source_title,author_or_channel,published_date,accessed_date,language,entities,tags,...,round,time_mmss,stats_json,extras_json,content_hash_sha1,chunk_index,char_start,char_end,token_est,chunk_text
0,MFN17_Dhant-vs-Koren,https://tapology.com/fightcenter/bouts/1032822...,stats_page,Chungreng Koren vs. Rabindra Dhant,Tapology,2025-08-02,2024-05-15,en,"Rabindra Dhant, Chungreng Koren, Matrix Fight ...",MMA|MFN|Bantamweight|Title Fight|KO,...,3.0,00:53,"{""fighter_a_record_at_fight"":""8-1-0"",""fighter_...","{""referee"":""Marc Goddard"",""fighter_a_height_cm...",421528c3038676c8c9a334b07fb1f99a38f619e8,0.0,0.0,1355.0,339,**Rabindra Dhant** (8-1-0) defeated **Chungren...
1,Dhant_struggles_triumphs,https://english.onlinekhabar.com/rabindra-dhan...,article,Rabindra Dhant's struggles and triumphs on the...,Shashwat Pant,2023-09-25,2024-05-15,en,"Rabindra Dhant, Bajhang, Torepchi Dongak, One ...",MMA|Biography|Nepal|Fighter Journey|One Champi...,...,,,,,"{""age_years"":24}",0.0,0.0,1473.0,369,"**Rabindra Dhant** grew up in Bajhang, Nepal, ..."
2,Dhant_struggles_triumphs,https://english.onlinekhabar.com/rabindra-dhan...,article,Rabindra Dhant's struggles and triumphs on the...,Shashwat Pant,2023-09-25,2024-05-15,en,"Rabindra Dhant, Bajhang, Torepchi Dongak, One ...",MMA|Biography|Nepal|Fighter Journey|One Champi...,...,,,,,"{""age_years"":24}",1.0,1473.0,2930.0,364,"In Delhi, he started training in karate in the..."


# ==== 3) Basic cleanup & de-duplication ====

In [11]:


# 2. Initial data info and preview
print("Dataframe info:")
print(df.info())
print("\nSample records:")
print(df.head())

# 3. Data cleaning and type fixing
# Convert date columns to datetime
df['published_date'] = pd.to_datetime(df['published_date'], errors='coerce')
df['accessed_date'] = pd.to_datetime(df['accessed_date'], errors='coerce')

# Check for missing values
print("\nMissing values per column:")
print(df.isnull().sum())

# Drop strict duplicates if any
df.drop_duplicates(inplace=True)

# 4. Extract key JSON columns for parsing relevant features: stats_json, extras_json
def parse_json_safe(x):
    try:
        return json.loads(x) if pd.notnull(x) and x.strip() != '' else {}
    except Exception:
        return {}

df['stats'] = df['stats_json'].apply(parse_json_safe)
df['extras'] = df['extras_json'].apply(parse_json_safe)

# Extract important stats from the 'stats' JSON as new columns for easy analysis
# Example: record stats for fighter_a
df['fighter_a_record_at_fight'] = df['stats'].apply(lambda x: x.get('fighter_a_record_at_fight', None))
df['fighter_b_record_at_fight'] = df['stats'].apply(lambda x: x.get('fighter_b_record_at_fight', None))

# Extract fighter ages from 'extras' JSON
df['fighter_a_age'] = df['extras'].apply(lambda x: x.get('fighter_a_age_at_fight_years', None))
df['fighter_b_age'] = df['extras'].apply(lambda x: x.get('fighter_b_age_at_fight_years', None))

# Extract height if available
df['fighter_a_height_cm'] = df['extras'].apply(lambda x: x.get('fighter_a_height_cm', None))
df['fighter_b_height_cm'] = df['extras'].apply(lambda x: x.get('fighter_b_height_cm', None))

# 5. Basic stats and summary for Rabindra Dhant
# Filter records related to Rabindra Dhant (fighter_a or fighter_b)
df_rabindra = df[(df['fighter_a'] == 'Rabindra Dhant') | (df['fighter_b'] == 'Rabindra Dhant')]

print("\nRabindra Dhant fight records count:", len(df_rabindra))

# Win/Loss record from this filtered data (assuming Rabindra as fighter_a means 'Win' means he won)
# Normalize outcome to Rabindra's perspective
def rabindra_outcome(row):
    if row['fighter_a'] == 'Rabindra Dhant':
        return row['outcome']
    elif row['fighter_b'] == 'Rabindra Dhant':
        # If opponent won, Rabindra lost
        return 'Loss' if row['outcome'].lower() == 'win' else 'Win'
    else:
        return None

df_rabindra['rabindra_outcome'] = df_rabindra.apply(rabindra_outcome, axis=1)

print("\nRabindra Dhant Fight Outcomes:")
print(df_rabindra['rabindra_outcome'].value_counts())

# 6. Key career highlights from data
# Extract fight methods by Rabindra in wins
methods_wins = df_rabindra[(df_rabindra['rabindra_outcome'] == 'Win')]['method'].value_counts()
print("\nWin methods by Rabindra Dhant:")
print(methods_wins)

# 7. Build summary text for mind map branches
background = [
    "Born: November 30, 1998, Bajhang District, Nepal",
    "Early life: Manual laborer in India (Pithoragarh, New Delhi)",
    "Initial training: Karate, then transitioned to MMA",
    "Nickname: The Tiger of Bajhang",
    "Nationality: Nepalese (refused Indian citizenship offer)"
]

professional_career = [
    "Record: 9 wins, 1 loss (7 KO/TKO, 1 Submission, 1 Decision)",
    "Notable fights: MFN Bantamweight Championship win vs Chungreng Koren",
    "Fight locations: India, China, Thailand, Nepal",
    "Current streak: 3 wins"
]

training_support = [
    "Current gym: Soma Fight Club, Bali (Indonesia)",
    "Previous training: Lock N Roll MMA, Kathmandu",
    "Mentorship: Coach Diwiz Piya Lama",
    "Sponsorship & finances: Supported by Nutrition Fit Nepali and Latido"
]

challenges_mindset = [
    "Height initially self-conscious for volleyball",
    "Citizenship barriers for international fights",
    "Financial struggles, lack of institutional support",
    "Family pressure to find stable job",
    "Intense training (2-3 times/day), body aches normal",
    "Mental toughness & discipline emphasized by coaches",
    "Focus on fighting itself to handle pressure",
    "Avoids social media distractions during fight week",
    "Finds peace in Nepali village vlogs",
    "Prefers not to watch other fights before bouts",
    "Goal: UFC World Champion (target age 33-34)"
]

impact_recognition = [
    "Widespread recognition in Nepal (public, media, political leaders)",
    "Milestone for combat sports in Nepal",
    "Congratulated by Prime Minister K.P. Sharma Oli and others",
    "Inspires young Nepali athletes",
    "Helped popularize MMA in Nepal",
    "Role model: humble, disciplined, respectful"
]

mma_in_nepal = [
    "Growing popularity over last 5 years",
    "Nepal Warriors Championship (NWC): platform for local fighters",
    "Challenges: dearth of players, lack of proper facilities, minimal government support",
    "High prices for gyms/ facilities in Kathmandu (~4.5-5K NPR/month)",
    "No official governing body for MMA as of 2019",
    "Emphasizes discipline, perseverance, and hard work"
]

controversy = [
    "Warriors Cove (Chungreng's team) claimed Koren had leg issues before MFN 17",
    "Coach Mike (Dhant's coach) strongly denied excuses, blaming Indian MMA culture",
    "Highlighted Dhant fought with an MCL tear, not wrestling for 6 weeks",
    "Warriors Cove criticized for inconsistent weight management",
    "Dhant outclassed opponent in wrestling, striking, grappling",
    "No rematch deserved due to Warriors Cove's lack of accountability",
    "Emphasis on humility and learning from losses for growth"
]

# 8. Visualize textual mind map structure using networkx (simple tree layout)
G = nx.DiGraph()

# Add main node
G.add_node("Rabindra Dhant: Nepali MMA Champion")

# Add branches with subnodes
for branch, items in zip(
    ["Background", "Professional Career", "Training & Support", "Challenges & Mindset", "Impact & Recognition", "MMA in Nepal", "Controversy"],
    [background, professional_career, training_support, challenges_mindset, impact_recognition, mma_in_nepal, controversy]
):
    G.add_node(branch)
    G.add_edge("Rabindra Dhant: Nepali MMA Champion", branch)
    for item in items:
        # truncate label if too long for visualization
        label = item if len(item) <= 60 else item[:60] + "..."
        G.add_node(label)
        G.add_edge(branch, label)



Dataframe info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 32 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   item_id            16 non-null     object 
 1   source_url         16 non-null     object 
 2   source_type        16 non-null     object 
 3   source_title       15 non-null     object 
 4   author_or_channel  15 non-null     object 
 5   published_date     14 non-null     object 
 6   accessed_date      15 non-null     object 
 7   language           15 non-null     object 
 8   entities           15 non-null     object 
 9   tags               16 non-null     object 
 10  org_promotion      14 non-null     object 
 11  event_name         7 non-null      object 
 12  event_date         6 non-null      object 
 13  location_city      14 non-null     object 
 14  location_country   14 non-null     object 
 15  fighter_a          15 non-null     object 
 16  fighter_b   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rabindra['rabindra_outcome'] = df_rabindra.apply(rabindra_outcome, axis=1)


In [12]:
df.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
published_date,13.0,2024-06-26 00:00:00,2023-06-16 00:00:00,2023-09-25 00:00:00,2023-10-26 00:00:00,2025-03-04 00:00:00,2025-08-10 00:00:00,
accessed_date,15.0,2024-05-15 00:00:00,2024-05-15 00:00:00,2024-05-15 00:00:00,2024-05-15 00:00:00,2024-05-15 00:00:00,2024-05-15 00:00:00,
odds_a,0.0,,,,,,,
odds_b,0.0,,,,,,,
round,5.0,2.2,1.0,1.0,3.0,3.0,3.0,1.095445
chunk_index,15.0,88.8,0.0,0.0,0.0,1.5,1322.0,341.15585
char_start,15.0,1542.133333,0.0,656.0,1523.0,1909.5,4427.0,1249.942277
char_end,15.0,2023.066667,188.0,428.0,1473.0,3158.0,5156.0,1652.100199
fighter_a_age,1.0,26.0,26.0,26.0,26.0,26.0,26.0,
fighter_b_age,1.0,27.0,27.0,27.0,27.0,27.0,27.0,


In [13]:
# Calculate the percentage of missing values for each column
missing_percentage_per_column = df.isnull().sum(axis=0) / len(df) * 100

print("Missing values percentage per column:")
print(missing_percentage_per_column)

# Calculate the percentage of missing values for each row
missing_percentage_per_row = df.isnull().sum(axis=1) / len(df.columns) * 100

# Identify rows with more than 50% missing values
rows_to_drop = missing_percentage_per_row[missing_percentage_per_row > 50].index
print("\nMissing percent per row:", missing_percentage_per_row)
print("Rows to drop:", rows_to_drop)

# Drop the identified rows
df_pruned = df.drop(rows_to_drop)
print("\nOriginal DataFrame shape:", df.shape)
print("DataFrame shape after pruning:", df_pruned.shape)

display(df_pruned)

Missing values percentage per column:
item_id                        0.00
source_url                     0.00
source_type                    0.00
source_title                   6.25
author_or_channel              6.25
published_date                18.75
accessed_date                  6.25
language                       6.25
entities                       6.25
tags                           0.00
org_promotion                 12.50
event_name                    56.25
event_date                    62.50
location_city                 12.50
location_country              12.50
fighter_a                      6.25
fighter_b                     62.50
weight_class                  62.50
odds_a                       100.00
odds_b                       100.00
outcome                       12.50
method                        68.75
round                         68.75
time_mmss                     81.25
stats_json                    62.50
extras_json                   87.50
content_hash_sha1         

Unnamed: 0,item_id,source_url,source_type,source_title,author_or_channel,published_date,accessed_date,language,entities,tags,...,token_est,chunk_text,stats,extras,fighter_a_record_at_fight,fighter_b_record_at_fight,fighter_a_age,fighter_b_age,fighter_a_height_cm,fighter_b_height_cm
0,MFN17_Dhant-vs-Koren,https://tapology.com/fightcenter/bouts/1032822...,stats_page,Chungreng Koren vs. Rabindra Dhant,Tapology,2025-08-02,2024-05-15,en,"Rabindra Dhant, Chungreng Koren, Matrix Fight ...",MMA|MFN|Bantamweight|Title Fight|KO,...,339,**Rabindra Dhant** (8-1-0) defeated **Chungren...,"{'fighter_a_record_at_fight': '8-1-0', 'fighte...","{'referee': 'Marc Goddard', 'fighter_a_height_...",8-1-0,7-1-0,26.0,27.0,175.0,173.0
1,Dhant_struggles_triumphs,https://english.onlinekhabar.com/rabindra-dhan...,article,Rabindra Dhant's struggles and triumphs on the...,Shashwat Pant,2023-09-25,2024-05-15,en,"Rabindra Dhant, Bajhang, Torepchi Dongak, One ...",MMA|Biography|Nepal|Fighter Journey|One Champi...,...,369,"**Rabindra Dhant** grew up in Bajhang, Nepal, ...",{},{},,,,,,
2,Dhant_struggles_triumphs,https://english.onlinekhabar.com/rabindra-dhan...,article,Rabindra Dhant's struggles and triumphs on the...,Shashwat Pant,2023-09-25,2024-05-15,en,"Rabindra Dhant, Bajhang, Torepchi Dongak, One ...",MMA|Biography|Nepal|Fighter Journey|One Champi...,...,364,"In Delhi, he started training in karate in the...",{},{},,,,,,
3,Dhant_struggles_triumphs,https://english.onlinekhabar.com/rabindra-dhan...,article,Rabindra Dhant's struggles and triumphs on the...,Shashwat Pant,2023-09-25,2024-05-15,en,"Rabindra Dhant, Bajhang, Torepchi Dongak, One ...",MMA|Biography|Nepal|Fighter Journey|One Champi...,...,374,Dhant's fighting career saw him compete in Ban...,{},{},,,,,,
4,Dhant_struggles_triumphs,https://english.onlinekhabar.com/rabindra-dhan...,article,Rabindra Dhant's struggles and triumphs on the...,Shashwat Pant,2023-09-25,2024-05-15,en,"Rabindra Dhant, Bajhang, Torepchi Dongak, One ...",MMA|Biography|Nepal|Fighter Journey|One Champi...,...,182,Dhant believes his professional MMA career is ...,{},{},,,,,,
5,Dhant_aspiring_fighter,https://theannapurnaexpress.com/news/rabindra-...,article,Rabindra Dhant: An aspiring MMA fighter,Shreya Shrestha,2025-03-04,2024-05-15,en,"Rabindra Dhant, Bajhang, India, Diwiz Piya, Th...",MMA|Fighter Journey|Nepal|Biography,...,328,"**Rabindra Dhant**, born and raised in Bajhang...",{},{},,,,,,
6,Dhant_aspiring_fighter,https://theannapurnaexpress.com/news/rabindra-...,article,Rabindra Dhant: An aspiring MMA fighter,Shreya Shrestha,2025-03-04,2024-05-15,en,"Rabindra Dhant, Bajhang, India, Diwiz Piya, Th...",MMA|Fighter Journey|Nepal|Biography,...,383,Dhant's persistence paid off with another figh...,{},{},,,,,,
7,Dhant_aspiring_fighter,https://theannapurnaexpress.com/news/rabindra-...,article,Rabindra Dhant: An aspiring MMA fighter,Shreya Shrestha,2025-03-04,2024-05-15,en,"Rabindra Dhant, Bajhang, India, Diwiz Piya, Th...",MMA|Fighter Journey|Nepal|Biography,...,294,MMA in Nepal is still in its early stages but ...,{},{},,,,,,
8,Dhant_MFN_win,https://www.thenepalweekly.com/news/rabindra-d...,article,Rabindra Dhant wins MFN Bantomweight Champions...,The Nepal Weekly,2025-08-05,2024-05-15,en,"Rabindra Dhant, Chungren Koren, Matrix Fight N...",MMA|MFN|Championship|Nepal|India|KO,...,"At this point, the MMA scene in Nepal gained m...",,"{'fighter_a_record': '8-1', 'fighter_b_record'...",{},,,,,,
9,Dhant_shines_KTM_Post,https://kathmandupost.com/sports/2025/08/10/ra...,article,Rabindra Dhant shines on international MMA stage,Basanta Pratap Singh,2025-08-10,2024-05-15,en,"Rabindra Dhant, Chungren Koren, Matrix Fight N...",MMA|MFN|Championship|Nepal|India,...,Nepali mixed martial artist **Rabindra Dhant**...,,"{'fighter_a_record': '9-1', 'amateur_bouts_won...",{},,,,,,


Embedding

In [14]:
# Drop rows with missing chunk_text
df_filtered = df.dropna(subset=["chunk_text"])

# Create list of texts
texts = df_filtered["chunk_text"].tolist()

# Create metadata for each chunk
metadatas = df_filtered[[
    "source_url",
     "tags",
]].to_dict(orient="records")


In [15]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Convert text to embeddings
embeddings = model.encode(texts, show_progress_bar=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Chroma DB


In [None]:
!pip install chromadb




In [16]:


# Create ChromaDB client
client = chromadb.Client(Settings())

# Create a collection
collection = client.create_collection(name="rabindra_info")

# Add data to the collection
collection.add(
    documents=texts,
    embeddings=embeddings,
    metadatas=metadatas,
    ids=[f"chunk_{i}" for i in range(len(texts))]
)


In [17]:
query = "Why is Rabindra popular"
query_embedding = model.encode([query])[0]

# Search in ChromaDB
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5,
    include=["documents", "metadatas"]
)

# Combine text and metadata for prompt
context_blocks = []
for doc, meta in zip(results["documents"][0], results["metadatas"][0]):
    block = f"""
Source Title: {meta.get("source_title", "N/A")}
Author: {meta.get("author_or_channel", "N/A")}
Published Date: {meta.get("published_date", "N/A")}
Source URL: {meta.get("source_url", "N/A")}
Tags: {meta.get("tags", "N/A")}
Entities: {meta.get("entities", "N/A")}

Content:
{doc}
"""

context_blocks.append(block)

context = "\n\n---\n\n".join(context_blocks)


In [None]:
!pip install --upgrade openai




In [19]:


client = openai.OpenAI(
    api_key=OPEN_API_KEY)

prompt = f"""
Use the following context to answer the question.
Also return the source URL and any relevant metadata for each part of your answer.

Context:
{context}

Question: Why is Rabindra Dhant popular?

Answer:
"""



response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": prompt}]
)

print(response.choices[0].message.content or "")


Rabindra Dhant is popular because he defeated Chungreng Koren via KO/TKO to win the MFN Bantamweight Championship during Matrix Fight Night 17. This victory helped boost Dhant's record and solidified his status as a talented fighter in the MMA community.

Source URL: https://tapology.com/fightcenter/bouts/1032822-chungreng-koren-vs-rabindra-dhant-mfn-bantamweight-championship


In [22]:
from google.colab import userdata


client = openai.OpenAI(
    api_key=OPEN_API_KEY)



SYSTEM_PROMPT = (
    " Answer the question based on the given context. Also return the source URL and any relevant metadata for each part of your answer."
)

# --- Static Data for UI ---
# Quick Facts about Rabindra Dhant and MMA in Nepal
quick_facts_data = """
**Key Highlights:**
*   **Rabindra Dhant is Nepal's first MFN Bantamweight Champion.** He secured this title at Matrix Fight Night 17 in August 2025, defeating India's Chungreng Koren by TKO in the third round.
*   **He famously refused an offer for Indian citizenship** to represent Nepal internationally after winning a national-level amateur MMA championship in India at age 18 in 2019. This decision, made due to Nepal lacking an official MMA association at the time, initially "shattered" his world championship dream, but he remained committed to representing Nepal.
*   **Dhant's early life involved manual labor in India** after leaving his village in Bajhang, Nepal, at 16, where he secretly began martial arts training.
*   **Diwiz Piya Lama is a crucial mentor and coach for Dhant.** Lama, a seasoned Jiu-Jitsu practitioner, met Dhant around 2021 in Kathmandu and has personally funded his training, becoming an "important figure" in his life. Dhant trains at Lock N Roll MMA Nepal and Soma Fight Club in Bali.
*   **MMA's popularity is growing in Nepal.** Dhant's journey and victories are seen as inspiring, generating massive public support and increasing interest in MMA within the country, despite challenges like a lack of infrastructure and government support.
"""

# Sample Prompts for the Chat Interface
sample_prompts_data = """
**Ask me about:**
*   "Tell me about Rabindra Dhant's background and his early struggles."
*   "What was the significance of Rabindra Dhant's refusal of Indian citizenship?"
"""

# Fighter comparison data for table display (attributes as rows, fighters as columns)
fighter_comparison_data = {
    "Attribute": [
        "Nationality",
        "Nickname",
        "Age",
        "Height",
        "Team",
        "Record",
        "Win %",
        "Win Prediction"
    ],
    "Rabindra Dhant": [
        "🇳🇵 Nepalese",
        "The Tiger of Bajhang",
        "26 years, 8 months, 3 days",
        "5'9\" (175cm)",
        "Lock N Roll MMA Nepal / Soma Fight Club Bali",
        "8-1-0",
        "88.9%",
        "🔥 65%"
    ],
    "Chungreng Koren": [
        "🇮🇳 Indian",
        "The Indian Rhino",
        "27 years, 6 months, 1 day",
        "5'8\" (173cm)",
        "Warrior's Cove Mixed Martial Arts",
        "7-1-0",
        "87.5%",
        "⚡ 35%"
    ]
}

# Fight event data
fight_event_data = {
    "event": "Matrix Fight Night 17 (MFN 17)",
    "date": "August 2, 2025",
    "location": "Greater Noida, India",
    "result_method": "KO/TKO (Punches)",
    "result_round": "Round 3",
    "result_time": "0:53",
    "winner": "Rabindra Dhant"
}


def extract_json_from_text(text):
    if not isinstance(text, str):
        return None
    match = re.search(r'\{.*\}', text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group())
        except json.JSONDecodeError:
            return None
    return None


def get_quick_facts():
    return quick_facts_data


def get_sample_prompts():
    return sample_prompts_data


def get_fighter_comparison_table():
    df = pd.DataFrame(fighter_comparison_data)
    return df


def get_fight_event_info():
    data = fight_event_data
    markdown_output = f"""
### 🏆 **{data['event']}** 🏆

**📅 Date:** {data['date']}
**📍 Location:** {data['location']}

**🥊 Fight Result:**
*   **Winner:** **{data['winner']}** 🏆
*   **Method:** {data['result_method']}
*   **Round:** {data['result_round']}
*   **Time:** {data['result_time']}
"""
    return markdown_output


def chat_with_markdown(user_input, history=[], stats_df=None):
    if not user_input or user_input.strip() == "":
      return history, history, stats_df, user_input  # Return current state unchanged

    #embedding user query
    query_embedding = model.encode([user_input])[0]

    # Search in ChromaDB
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=5,
        include=["documents", "metadatas"]
    )

    # Combine text and metadata for prompt
    context_blocks = []
    for doc, meta in zip(results["documents"][0], results["metadatas"][0]):
        block = f"""
    Source URL: {meta.get("source_url", "N/A")}
    Tags: {meta.get("tags", "N/A")}

    {doc}
"""

        context_blocks.append(block)

    context = "\n\n---\n\n".join(context_blocks)

    client = openai.OpenAI(
    api_key=OPEN_API_KEY)

    # Prepare messages for LLM
    messages = [{"role": "system", "content": SYSTEM_PROMPT + "Context: " + context}]
    print(messages)

    for h in history:
        messages.append({"role": "user", "content": re.sub(r"^👤 ", "", h[0])})
        messages.append({"role": "assistant", "content": re.sub(r"^🤖 ", "", h[1])})
    messages.append({"role": "user", "content": user_input})

    # Call GPT
    response = client.chat.completions.create(
        model="gpt-4",
        messages=messages
    )

    print(f"response: {response.choices}" )
    answer = response.choices[0].message.content or ""

    # Remove JSON from text before display
    #display_text = re.sub(r'\{.*\}', '', answer, flags=re.DOTALL).strip()

    # Wrap user text in white span
    user_display = f"<span style='color: red'>👤</span> <span style='color: #FFFFFF'>{user_input}</span>"

    history.append((user_display, f"🤖 {answer}"))

    # When called from quick question buttons, update the textbox with the question
    return history, history, stats_df, "" if not any(q in user_input for q in quick_questions) else user_input



quick_questions = [
        "Tell me about Rabindra Dhant's background",
        "What was the significance of his refusal of Indian citizenship?",
        "Who is his coach and mentor?",
        "How popular is MMA in Nepal?"
]
def send_quick_question(question, chatbot, msg):
    # Pass the quick question text directly as user_input
    return chat_with_markdown(question, chatbot, None), question  # Return the question to update the msg box


def transcribe_audio_to_input(audio_data):
    if not audio_data:
        return ""

    if isinstance(audio_data, tuple) and len(audio_data) == 2:
        sample_rate, audio_array = audio_data

        # Create temporary wav file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
            sf.write(temp_file.name, audio_array, sample_rate)
            temp_file_path = temp_file.name

        try:
            # Transcribe using OpenAI Whisper
            with open(temp_file_path, "rb") as f:
                transcript = client.audio.transcriptions.create(
                    model="whisper-1",
                    file=f
                ).text
            return transcript

        finally:
            # Clean up temporary file
            if os.path.exists(temp_file_path):
                os.unlink(temp_file_path)

    else:
        return "Invalid audio format"

def toggle_division(is_visible):
                        """Toggle the visibility of the division"""
                        return not is_visible



# Gradio UI
with gr.Blocks(css="""
 #response-box{
}
 #input-box {
     border-radius: 15px !important
}
 .user{
     background: rgb(46 111 156) !important;
}
 #clear{
     font-weight: 600;
     border-radius: 12px;
     border: none;
     padding: 12px 20px;
     cursor: pointer;
     width: 100%;
     margin-top: 10px;
     transition: all 0.3s ease;

 } #send-btn {
     background: linear-gradient(90deg, rgb(54 36 86), rgb(98 162 206)) !important;
     color: white;
     font-weight: 600;
     border-radius: 12px;
     border: none;
     padding: 12px 20px;
     cursor: pointer;
     width: 100%;
     margin-top: 10px;
     transition: all 0.3s ease;
}
 #clear:hover, #send-btn:hover {
     filter: brightness(1.1);
     transform: translateY(-1px);
}
 #fighter-table {
     border: none !important;
     border-radius: 12px !important;
     margin-bottom: 30px !important;
     overflow: hidden !important;
     box-shadow: 0 4px 20px rgba(54, 36, 86, 0.15) !important;
     background: white !important;
}
 #fighter-table table {
     border-collapse: collapse !important;
     width: 100% !important;
     margin: 0 !important;
}
 #fighter-table th, #fighter-table td {
     padding: 12px !important;
     text-align: left !important;
     border: none !important;
     transition: all 0.3s ease !important;
}
 #fighter-table th {
     background: linear-gradient(90deg, rgb(54, 36, 86), rgb(46, 112, 157)) !important;
     font-weight: 700 !important;
     color: white !important;
     text-align: center !important;
     font-size: 14px !important;
     text-transform: uppercase !important;
     letter-spacing: 0.5px !important;
     position: relative !important;
     border-bottom: 2px solid rgba(255, 255, 255, 0.2) !important;
}
 #fighter-table th::after {
     content: '' !important;
     position: absolute !important;
     bottom: 0 !important;
     left: 0 !important;
     right: 0 !important;
     height: 2px !important;
     background: linear-gradient(90deg, rgba(255, 255, 255, 0), rgba(255, 255, 255, 0.6), rgba(255, 255, 255, 0)) !important;
}
 #fighter-table td:first-child {
     background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%) !important;
     font-weight: 700 !important;
     color: rgb(54, 36, 86) !important;
     border-right: 3px solid rgba(54, 36, 86, 0.1) !important;
     text-transform: capitalize !important;
     width: 120px !important;
}
 #fighter-table td:not(:first-child) {
     background-color: white !important;
     color: #333 !important;
     border-bottom: 1px solid #f0f0f0 !important;
     font-size: 13px !important;
}
 #fighter-table tr:nth-child(even) td:not(:first-child) {
     background-color: #fafbfc !important;
}
 #fighter-table td:nth-child(2) {
     border-left: 3px solid rgba(54, 36, 86, 0.3) !important;
     padding-left: 15px !important;
}
 #fighter-table td:nth-child(3) {
     border-left: 3px solid rgba(46, 112, 157, 0.3) !important;
     padding-left: 15px !important;
}
 #fighter-table tr:last-child td {
     background: linear-gradient(135deg, #fff8e1 0%, #ffecb3 100%) !important;
     font-weight: 700 !important;
     font-size: 14px !important;
     border-top: 2px solid #ffd54f !important;
     position: relative !important;
}
 #fighter-table tr:last-child td:first-child {
     background: linear-gradient(135deg, rgb(54, 36, 86), rgb(46, 112, 157)) !important;
     color: white !important;
     border-right: 3px solid #ffd54f !important;
}
 #fighter-table tr:last-child td:not(:first-child) {
     animation: subtle-glow 3s ease-in-out infinite alternate !important;
}
 @keyframes subtle-glow {
     from {
         box-shadow: inset 0 0 5px rgba(255, 193, 7, 0.3) !important;
    }
     to {
         box-shadow: inset 0 0 15px rgba(255, 193, 7, 0.5) !important;
    }
}
 #fighter-table tr:not(:first-child):hover td {
     transform: translateY(-1px) !important;
     box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1) !important;
}
 #fighter-table tr:hover td:first-child {
     background: linear-gradient(135deg, rgb(54, 36, 86), rgb(46, 112, 157)) !important;
     color: white !important;
}
 #fighter-table tr:not(:first-child):hover td:not(:first-child) {
     background-color: #f0f7ff !important;
}
 @media (max-width: 768px) {
     #fighter-table th, #fighter-table td {
         padding: 8px 6px !important;
         font-size: 12px !important;
    }
     #fighter-table td:first-child {
         width: 100px !important;
    }
}
.audio_upload_area, .input_audio_upload, .audio_upload_dropzone {
    display: none !important;
}

#micphone {
width: 100% !important;
}

""") as demo:
    gr.Markdown("## MMS Chat Assistant")

    with gr.Row():
        with gr.Column(scale=3):
            # Chatbot
            chatbot = gr.Chatbot(elem_classes="chat-box", elem_id="response-box")

            # Quick question buttons above input
            with gr.Row():
                buttons = []
                for q_text in quick_questions:
                    btn = gr.Button(q_text, elem_classes="quick-btn")
                    buttons.append(btn)

            # User input textbox
            msg = gr.Textbox(label="Enter your message", placeholder="Type your question here...", elem_id="input-box")

            with gr.Row():
                with gr.Column(scale=2):
                    clear = gr.Button("Clear Chat", elem_id="clear", scale=3)

                with gr.Column(scale=2):
                    send_btn = gr.Button("Send", elem_id="send-btn")

                    microphone_toggle = gr.Button("🎤")

                    # Division that will be shown/hidden
                    with gr.Column(visible=False) as division:
                        speech_input = gr.Audio(label="🎤 Record your question", sources=["microphone"], type="numpy",
                                                interactive=True, elem_id="micphone")
                    # State to track division visibility
                    division_visible = gr.State(False)

                    # Event handler
                    microphone_toggle.click(
                        fn=toggle_division,
                        inputs=[division_visible],
                        outputs=[division_visible]
                    ).then(
                        fn=lambda x: gr.update(visible=x),
                        inputs=[division_visible],
                        outputs=[division]
                    ).then(
                        fn=lambda x: "🔴" if x else "🎤",
                        inputs=[division_visible],
                        outputs=[microphone_toggle]
                    )

            # Connect quick question buttons to chatbot
            for btn, q_text in zip(buttons, quick_questions):
                btn.click(
                    lambda q=q_text: chat_with_markdown(q, chatbot.value, None), # Pass question text and current history
                    inputs=[], # No direct inputs needed, using lambda to capture q_text and chatbot state
                    outputs=[chatbot, chatbot, msg]
                )


        # Right sidebar with fighter info
        with gr.Column(scale=1):
            gr.Markdown("## 🥊 Fighter Comparison & Prediction")
            fighter_table = gr.DataFrame(
                value=get_fighter_comparison_table(),
                elem_id="fighter-table",
                interactive=False,
                wrap=True,
            )

            gr.Markdown("## 💡 Quick Facts & Context")
            gr.Markdown(get_quick_facts())

    # Submit textbox on Enter
    msg.submit(
        chat_with_markdown,
        inputs=[msg, chatbot],
        outputs=[chatbot, chatbot, msg]
    )

    # Clear chat
    clear.click(
        lambda: ([], [], get_fighter_comparison_table(), ""),
        None,
        [chatbot, chatbot, fighter_table, msg],
        queue=False
    )

    send_btn.click(chat_with_markdown, inputs=[msg, chatbot], outputs=[chatbot, chatbot, msg])

    speech_input.change(transcribe_audio_to_input, inputs=[speech_input], outputs=[msg])

demo.launch()

  chatbot = gr.Chatbot(elem_classes="chat-box", elem_id="response-box")


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://28decbcaa769f0e5dd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


