In [1]:
from openai import OpenAI
import os
from dotenv import load_dotenv
import base64

load_dotenv()

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def describe_image(image_path):
    try:
        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        
        # Validate API key
        if not os.getenv("OPENAI_API_KEY"):
            raise ValueError("OpenAI API key not found in environment variables")
            
        # Validate image path
        if not image_path:
            raise ValueError("Image path cannot be empty")

        # Convert the image to base64
        base64_image = encode_image_to_base64(image_path)

        response = client.chat.completions.create(
            model="gpt-4-turbo",  # Using the correct model for vision
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Describe this image in detail."},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            },
                        },
                    ],
                }
            ],
            max_tokens=300,
        )
        
        return response.choices[0].message.content
        
    except Exception as e:
        raise Exception(f"Error describing image: {str(e)}")
    
def extract_text_from_img(image_path):
    try:
        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        
        # Validate API key
        if not os.getenv("OPENAI_API_KEY"):
            raise ValueError("OpenAI API key not found in environment variables")
            
        # Validate image path
        if not image_path:
            raise ValueError("Image path cannot be empty")

        # Convert the image to base64
        base64_image = encode_image_to_base64(image_path)

        response = client.chat.completions.create(
            model="gpt-4-turbo",  # Using the correct model for vision
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Extract text from this image only. Do not include any other information."},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            },
                        },
                    ],
                }
            ],
            max_tokens=300,
        )
        
        return response.choices[0].message.content
        
    except Exception as e:
        raise Exception(f"Error describing image: {str(e)}")


In [None]:
describe_image(image_path='img.jpeg')

In [None]:
extract_text_from_img(image_path='img.jpeg')

In [None]:
import streamlit as st
from langchain_community.llms import Ollama
from PIL import Image
import PyPDF2
import base64
import io

def init_ollama():
    return Ollama(model="llama3.2:latest")

def extract_text_from_pdf(pdf_file):
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text() + "\n"
    return text

def describe_image(image_file):
    try:
        # Open and process the image
        image = Image.open(image_file)
        
        # Convert PIL Image to base64 string
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
        
        # Create a more detailed prompt
        prompt = ("Provide a detailed description of this image, including: "
                 "main subjects, colors, composition, lighting, setting, "
                 "and any notable details or activities shown.")
    
        try:
            # Initialize Ollama with vision model
            llm = init_ollama()
            
            # Pass the base64 encoded image string
            description = llm(prompt, images=[img_str])
            return description
        except Exception as e:
            raise Exception(f"Error generating description: {str(e)}")
            
    except Exception as e:
        raise Exception(f"Error loading image: {str(e)}")

# Create the Streamlit interface
st.title('Sandbox for Marketing Materials Compliance Analysis')

# Initialize chat history in session state if it doesn't exist
if 'messages' not in st.session_state:
    st.session_state.messages = []

# File uploader
uploaded_file = st.file_uploader("Upload a PDF or Image file", type=['pdf', 'png', 'jpg', 'jpeg'])

if uploaded_file is not None:
    try:
        # Process based on file type
        file_type = uploaded_file.type
        if 'pdf' in file_type:
            text_content = extract_text_from_pdf(uploaded_file)
            st.success("PDF processed successfully!")
            
            # Create prompt for summarization
            prompt = f"Please summarize the following text:\n\n{text_content}"
            
            # Generate summary using Ollama
            llm = init_ollama()
            response = llm(prompt)
            
        elif 'image' in file_type:
            # Display the uploaded image
            st.image(uploaded_file, caption="Uploaded Image")
            
            # Get image description
            response = describe_image(uploaded_file)
            st.success("Image processed successfully!")
        
        # Add the response to chat history
        st.session_state.messages.append({
            "role": "assistant", 
            "content": f"Analysis of uploaded file:\n\n{response}"
        })

        st.session_state.uploaded_file = None
        
    except Exception as e:
        st.error(f"Error processing file: {str(e)}")

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Chat input for follow-up questions
if prompt := st.chat_input("Ask questions about the uploaded content"):
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})
    
    # Display user message
    with st.chat_message("user"):
        st.markdown(prompt)

    # Generate response
    try:
        llm = init_ollama()
        response = llm(prompt)

        # Display assistant response
        with st.chat_message("assistant"):
            st.markdown(response)
        
        # Add assistant response to chat history
        st.session_state.messages.append({"role": "assistant", "content": response})
    
    except Exception as e:
        st.error(f"Error communicating with Ollama: {str(e)}")

# Add a button to clear chat history
if st.button("Clear Chat History"):
    st.session_state.messages = []
    st.experimental_rerun()


In [23]:
import subprocess

try:
    result = subprocess.run(["ollama", "list"], text=True, capture_output=True)
    print(result.stdout)  # Prints the output of the command
except Exception as e:
    print(f"Error executing command: {e}")

# export to list
output=result.stdout.split('\n')

models = []

for model in output[1:-1]:
    #remove 8 characters from the beginning of the string
    model=model[:-42].strip()
    models.append(model)



NAME                      ID              SIZE      MODIFIED     
llava:7b                  8dd30f6b0cb1    4.7 GB    27 hours ago    
llama3.2-vision:latest    085a1fdae525    7.9 GB    27 hours ago    
llama3.2:3b               a80c4f17acd5    2.0 GB    32 hours ago    



In [24]:
models

['llava:7b', 'llama3.2-vision:latest', 'llama3.2:3b']

In [8]:
from langchain_community.chat_models import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()
llm = ChatOpenAI(model='gpt-4-turbo')

  llm = ChatOpenAI(model='gpt-4-turbo')


In [9]:
llm.invoke("What is the capital of France?")

AIMessage(content='The capital of France is Paris.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 14, 'total_tokens': 22, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4-turbo', 'system_fingerprint': 'fp_f17929ee92', 'finish_reason': 'stop', 'logprobs': None}, id='run-ae0de308-95e6-470a-adb3-52d4e9bdf708-0')