### 1. Importing Dependencies

In [9]:
# 📚 Basic Data Analysis & Visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# 📄 File Parsing & Handling
import io
import os
import re

# 📦 For PDF and DOCX (optional but recommended)
try:
    import PyPDF2
except ImportError:
    PyPDF2 = None

try:
    import docx
except ImportError:
    docx = None

# 🖼️ Image Processing (optional, in case of image files)
from PIL import Image
import base64

# 🌐 Together.ai API (HTTP requests)
import requests

# 🚀 Gradio for the UI
import gradio as gr

# 📊 Matplotlib backend for inline display (if needed)
%matplotlib inline

# 🔧 Warnings (to keep the notebook clean)
import warnings
warnings.filterwarnings('ignore')


### 2. Defining Different Methods for uploading, Summerizing and answering the queries

In [10]:
class DataAnalystAgent:
    def __init__(self, api_key):
        self.api_key = api_key
        self.data = None
        self.data_type = None
        self.text_data = ""

    def load_file(self, uploaded_file):
        """
        Accepts a file path (string) and determines its type.
        """
        if isinstance(uploaded_file, str):
            extension = os.path.splitext(uploaded_file)[1].lower()

            if extension == '.csv':
                self.data = pd.read_csv(uploaded_file)
                self.data_type = 'table'
                print(f"Loaded CSV with shape {self.data.shape}")

            elif extension in ['.xlsx', '.xls']:
                self.data = pd.read_excel(uploaded_file)
                self.data_type = 'table'
                print(f"Loaded Excel with shape {self.data.shape}")

            elif extension == '.txt':
                with open(uploaded_file, 'r', encoding='utf-8') as f:
                    self.text_data = f.read()
                self.data_type = 'text'
                print(f"Loaded TXT with {len(self.text_data)} characters")

            elif extension == '.pdf':
                with open(uploaded_file, 'rb') as f:
                    self.text_data = self._parse_pdf(f)
                self.data_type = 'text'
                print(f"Loaded PDF with {len(self.text_data)} characters")

            elif extension == '.docx':
                if docx is None:
                    raise ImportError("python-docx is not installed.")
                with open(uploaded_file, 'rb') as f:
                    self.text_data = self._parse_docx(f)
                self.data_type = 'text'
                print(f"Loaded DOCX with {len(self.text_data)} characters")

            elif extension in ['.png', '.jpg', '.jpeg']:
                self.data = Image.open(uploaded_file)
                self.data_type = 'image'
                print(f"Loaded Image with size {self.data.size}")

            else:
                raise ValueError(f"Unsupported file type: {extension}")

        else:
            raise ValueError("Expected file path string but got file-like object which is not supported in this version.")

    def _parse_pdf(self, file_obj):
        pdf_reader = PyPDF2.PdfReader(file_obj)
        text = ''
        for page in pdf_reader.pages:
            text += page.extract_text()
        return text

    def _parse_docx(self, file_obj):
        doc = docx.Document(file_obj)
        text = '\n'.join([para.text for para in doc.paragraphs])
        return text

    def summarize_content(self):
        """
        Uses Together.ai API to summarize the actual content.
        """
        if self.data_type == 'text':
            content = self.text_data[:8000]
            prompt = (
                f"Please provide a concise, insightful summary of the following text:\n\n{content}\n\n"
                f"Focus on key points, trends, and relevant insights."
            )

        elif self.data_type == 'table':
            content = self.data.head(20).to_markdown()
            prompt = (
                f"Please analyze the following table data and provide a concise summary with trends, "
                f"key insights, and any interesting patterns:\n\n{content}"
            )

        elif self.data_type == 'image':
            buffered = io.BytesIO()
            self.data.save(buffered, format='PNG')
            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
            prompt = (
                f"Please describe this image based on the following base64 string:\n\n{img_base64}\n\n"
                f"Provide a concise, informative summary."
            )

        else:
            return "⚠️ Summarization not supported for this data type."

        return self._call_together_api(prompt)

    def ask_question(self, user_question):
        """
        Uses Together.ai API to answer questions based on the uploaded content.
        """
        if self.data_type == 'text':
            context = self.text_data[:8000]
            prompt = (
                f"Please answer the following question based on the given text:\n\n"
                f"--- Begin Text ---\n{context}\n--- End Text ---\n\n"
                f"Question: {user_question}\n\n"
                f"Provide a clear, concise, and accurate answer."
            )

        elif self.data_type == 'table':
            context = self.data.head(20).to_markdown()
            prompt = (
                f"Please answer the following question based on the table data:\n\n"
                f"--- Begin Table ---\n{context}\n--- End Table ---\n\n"
                f"Question: {user_question}\n\n"
                f"Provide a clear, concise, and accurate answer."
            )

        elif self.data_type == 'image':
            buffered = io.BytesIO()
            self.data.save(buffered, format='PNG')
            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
            prompt = (
                f"Please answer the following question based on the image provided (base64):\n\n"
                f"{img_base64}\n\n"
                f"Question: {user_question}\n\n"
                f"Provide a clear, concise, and accurate answer."
            )

        else:
            return "⚠️ Question answering not supported for this data type."

        return self._call_together_api(prompt)

    def _call_together_api(self, prompt):
        """
        Internal helper function to call Together.ai API with a given prompt.
        """
        url = "https://api.together.xyz/v1/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
            "messages": [
                {"role": "system", "content": "You are a helpful, expert data analyst assistant."},
                {"role": "user", "content": prompt}
            ]
        }

        response = requests.post(url, headers=headers, json=payload)
        if response.status_code == 200:
            result = response.json()['choices'][0]['message']['content']
            return f"📝 Answer:\n{result.strip()}"
        else:
            return f"❌ Error from Together.ai API: {response.text}"


### 3. Gradio Code for integrating TogetherApi, Designing UI

In [11]:
# 🔑 Replace with your Together.ai API key
TOGETHER_API_KEY = "2f1a3bfd8fc6fe9e16de002047cd42eaa873baad7afd3f38780938a88c7b2278"

# Instantiate the agent
agent = DataAnalystAgent(api_key=TOGETHER_API_KEY)

def handle_file_upload(uploaded_file):
    try:
        agent.load_file(uploaded_file)
        return "✅ File loaded successfully."
    except Exception as e:
        return f"❌ Error loading file: {str(e)}"

def handle_summarization():
    try:
        summary = agent.summarize_content()
        return summary
    except Exception as e:
        return f"❌ Error summarizing content: {str(e)}"

def handle_question_answering(user_question):
    try:
        answer = agent.ask_question(user_question)
        return answer
    except Exception as e:
        return f"❌ Error answering question: {str(e)}"

with gr.Blocks() as demo:
    gr.Markdown("# 📊 Data Analyst Agent")
    
    file_input = gr.File(
        label="Upload a file (.csv, .xlsx, .txt, .pdf, .docx, .png, .jpg, .jpeg)",
        file_types=['.csv', '.xlsx', '.xls', '.txt', '.pdf', '.docx', '.png', '.jpg', '.jpeg'],
    )
    
    file_summary = gr.Textbox(label="Upload Status", interactive=False)
    summarize_button = gr.Button("Summarize Content")
    summary_output = gr.Textbox(label="Summary Output", lines=15)
    
    # New elements for question answering
    question_input = gr.Textbox(label="Ask a Question", placeholder="Enter your question about the uploaded file")
    answer_output = gr.Textbox(label="Answer", lines=10)

    # Connect actions
    file_input.upload(
        fn=handle_file_upload,
        inputs=file_input,
        outputs=file_summary
    )
    
    summarize_button.click(
        fn=handle_summarization,
        outputs=summary_output
    )

    question_input.submit(
        fn=handle_question_answering,
        inputs=question_input,
        outputs=answer_output
    )

    # Alternatively, also add a button for submitting questions
    answer_button = gr.Button("Get Answer")
    answer_button.click(
        fn=handle_question_answering,
        inputs=question_input,
        outputs=answer_output
    )

demo.launch()


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




Loaded DOCX with 3730 characters
Loaded PDF with 0 characters
