In [31]:
import pandas as pd
import re
import requests
import time
import os
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

from urllib3.exceptions import InsecureRequestWarning
warnings.filterwarnings('ignore', category=InsecureRequestWarning)

from dotenv import load_dotenv

import vertexai

In [32]:
load_dotenv()

project_id = os.getenv('VERTEXAI_PROJECT_ID')
vertexai.init(project=project_id, location="us-central1")


In [33]:
from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Image,
    Part,
)

In [34]:
text_model = GenerativeModel("gemini-1.5-pro")

In [35]:
import http.client
import typing
import urllib.request

import IPython.display
from PIL import Image as PIL_Image
from PIL import ImageOps as PIL_ImageOps


def display_images(
    images: typing.Iterable[Image],
    max_width: int = 600,
    max_height: int = 350,
) -> None:
    for image in images:
        pil_image = typing.cast(PIL_Image.Image, image._pil_image)
        if pil_image.mode != "RGB":
            # RGB is supported by all Jupyter environments (e.g. RGBA is not yet)
            pil_image = pil_image.convert("RGB")
        image_width, image_height = pil_image.size
        if max_width < image_width or max_height < image_height:
            # Resize to display a smaller notebook image
            pil_image = PIL_ImageOps.contain(pil_image, (max_width, max_height))
        IPython.display.display(pil_image)

def get_image_bytes_from_url(image_url: str) -> bytes:
    response = requests.get(image_url, headers={'User-Agent': 'Mozilla/5.0'})
    response.raise_for_status()  # Raise an error for bad status codes
    return response.content

def load_image_from_url(image_url: str) -> PIL_Image.Image:
    image_bytes = get_image_bytes_from_url(image_url)
    return Image.from_bytes(image_bytes)


def display_content_as_image(content: str | Image | Part) -> bool:
    if not isinstance(content, Image):
        return False
    display_images([content])
    return True


def display_content_as_video(content: str | Image | Part) -> bool:
    if not isinstance(content, Part):
        return False
    part = typing.cast(Part, content)
    file_path = part.file_data.file_uri.removeprefix("gs://")
    video_url = f"https://storage.googleapis.com/{file_path}"
    IPython.display.display(IPython.display.Video(video_url, width=600))
    return True


def print_multimodal_prompt(contents: list[str | Image | Part]):
    """
    Given contents that would be sent to Gemini,
    output the full multimodal prompt for ease of readability.
    """
    for content in contents:
        if display_content_as_image(content):
            continue
        if display_content_as_video(content):
            continue
        print(content)

## In-Context

In [36]:
# def calculate_image_features_title(image_url):
#     try:
#         image = load_image_from_url(image_url)

#         instruction = '''
# Context: You are an expert programmer experienced in different technology stacks. You encountered an issue while working on a project. The screenshot shows the problem but you are not given any textual content.
# Generate a Stack Overflow title that:
# 1. Follows Stack Overflow guidelines
# 2. Is clear and concise
# 3. Summarizes the main technical issue shown
# 4. Take inspiration from your trained data for stack overflow.

# Output Format: 
# TITLE: <<Generated Title>>

# Make sure to start with exactly "TITLE:"
# '''

#         prompt = '''
# Generate a realistic Stack Overflow title about the following IDE/code screenshot.
# '''

#         contents = [instruction,
#                     image,
#                     prompt]

#         generation_config = GenerationConfig(
#             temperature=0,
#             top_p=0.8,
#             top_k=40,
#             candidate_count=1,
#             max_output_tokens=2048,
#         )

#         responses = text_model.generate_content(
#             contents,
#             generation_config=generation_config,
#             stream=True,
#         )

#         generation_prompt = ''
#         for response in responses:
#             generation_prompt += response.text    

#         # Extract title
#         title = ""
#         if "TITLE:" in generation_prompt:
#             title = generation_prompt.split("TITLE:")[1].strip()

#         print(title)
#         return title
    
#     except Exception as e:
#         print(e)
#         return ""

# def calculate_image_features_body(image_url):
#     try:
#         image = load_image_from_url(image_url)

#         instruction = '''
# Context: You are an expert programmer experienced in different technology stacks. You encountered an issue while working on a project. The screenshot shows the problem but you are not given any textual content.
# Generate a detailed Stack Overflow question body that:
# 1. Follows Stack Overflow guidelines
# 2. Includes relevant code/IDE context
# 3. Clearly states the expected vs. actual behavior
# 4. Take inspiration from your trained data for stack overflow.

# Output Format: 
# BODY: <<Generated Body>>

# Make sure to start with exactly "BODY:"
# '''

#         prompt = '''
# Generate a realistic Stack Overflow question body about the following IDE/code screenshot.
# '''

#         contents = [instruction,
#                     image,
#                     prompt]

#         generation_config = GenerationConfig(
#             temperature=0,
#             top_p=0.8,
#             top_k=40,
#             candidate_count=1,
#             max_output_tokens=2048,
#         )

#         responses = text_model.generate_content(
#             contents,
#             generation_config=generation_config,
#             stream=True,
#         )

#         generation_prompt = ''
#         for response in responses:
#             generation_prompt += response.text    

#         # Extract body
#         body = ""
#         if "BODY:" in generation_prompt:
#             body = generation_prompt.split("BODY:")[1].strip()

#         print(body)
#         return body
    
#     except Exception as e:
#         print(e)
#         return ""

# def process_rows_with_delay(dataframe):
#     total_rows_processed = 0
#     results = []  # List to store results
    
#     for i, row in tqdm(dataframe.iterrows(), total=dataframe.shape[0], desc=f'Processing'):
#         print("\n" + "="*50)
#         print(f"Processing Row ID: {row['Id']}")
        
#         image_urls = [url.strip(",") for url in re.findall(r"'([^']*)'", str(row.get('ImageURLs')))]
#         title = row['Title'] 
#         body = row['Body']
        
#         # Store LLM responses for all images of this row
#         title_responses = []
#         body_responses = []
        
#         for image_url in image_urls:
#             print(f"\nProcessing image {len(title_responses) + 1}/{len(image_urls)}")
            
#             # Generate title
#             print("Generating title...")
#             title_text = calculate_image_features_title(image_url)
#             title_responses.append(title_text)
            
#             # Generate body
#             print("Generating body...")
#             body_text = calculate_image_features_body(image_url)
#             body_responses.append(body_text)
        
#         # Combine all responses into strings
#         combined_title_response = " ||| ".join(title_responses)
#         combined_body_response = " ||| ".join(body_responses)
        
#         # Store the results
#         results.append({
#             'Id': row['Id'],
#             'Title': title,
#             'Body': body,
#             'ImageURLs': row['ImageURLs'],
#             'llm_title_response': combined_title_response,
#             'llm_body_response': combined_body_response
#         })
        
#         total_rows_processed += 1
#         print(f"Completed processing Row ID: {row['Id']}")
#         print("="*50 + "\n")
        
#         # Add delay after every 2 rows
#         if total_rows_processed % 2 == 0:
#             print("\nPausing for 60 seconds after processing 2 rows...")
#             intermediate_df = pd.DataFrame(results)
#             intermediate_df.to_csv('Data/llm_responses_intermediate.csv', index=False)
#             print("Saved intermediate results to CSV")
            
#             for remaining in range(60, 0, -1):
#                 print(f"\rTime remaining: {remaining} seconds...", end='')
#                 time.sleep(1)
#             print("\nResuming processing...")
    
#     # Create final DataFrame and save to CSV
#     final_df = pd.DataFrame(results)
#     final_df.to_csv('Data/llm_responses_final_zero_shot.csv', index=False)
#     print("\nFinal results saved to Data/llm_responses_final_zero_shot.csv")
    
#     return final_df

## Few-Shot

In [37]:
# def calculate_image_features_title(image_url):
#     try:
#         image = load_image_from_url(image_url)
#         example_image1 = load_image_from_url("https://i.sstatic.net/rUHWv1Ok.png")
#         example_text1 = "TITLE: Trying to Stack 2 Columns into one Excel"
        
#         example_image2 = load_image_from_url("https://i.sstatic.net/TGFPo9Jj.png")
#         example_text2 = "TITLE: is getenv_s not part of cstdlib?"

#         instruction = '''
# You are an expert software developer and Stack Overflow analyst. You are given the following images and stack overflow post titles as examples:
# '''

#         prompt = '''
# Now generate a similar title for the given screenshot:
# Follow the pattern:
# 1. Clear and concise title that summarizes the main issue
# 2. Take inspiration from the example titles given

# Output Format: 
# TITLE: <<Generated Title>>
# Make sure to start with exactly "TITLE:" 
# '''

#         contents = [instruction,
#                     example_image1,
#                     example_text1,
#                     example_image2,
#                     example_text2,
#                     image,
#                     prompt]

#         generation_config = GenerationConfig(
#             temperature=0,
#             top_p=0.8,
#             top_k=40,
#             candidate_count=1,
#             max_output_tokens=2048,
#         )

#         responses = text_model.generate_content(
#             contents,
#             generation_config=generation_config,
#             stream=True,
#         )

#         generation_prompt = ''
#         for response in responses:
#             generation_prompt += response.text    

#         # Extract title
#         title = ""
#         if "TITLE:" in generation_prompt:
#             title = generation_prompt.split("TITLE:")[1].strip()

#         print(title)

#         return title
    
#     except Exception as e:
#         print(e)
#         return ""

# def calculate_image_features_body(image_url):
#     try:
#         image = load_image_from_url(image_url)
#         example_image1 = load_image_from_url("https://i.sstatic.net/rUHWv1Ok.png")
#         example_text1 = '''
# BODY: I am currently attempting to combine two columns into one but have encountered an error that prevents me from completing this task. Additionally, the data from the second column appears to be pasting incorrectly after the error messages.I would greatly appreciate any assistance with this issue.=IF(P2<>"",P2,INDEX($R$2:$R$5000,ROW()-COUNTA($P$2:$P$5000)))Column P and R contains formulas.
# '''
#         example_image2 = load_image_from_url("https://i.sstatic.net/TGFPo9Jj.png")
#         example_text2 = '''
# BODY: C11 added new bounds-checked functions to the standard library, such as getenv_s.However, when I include <cstdlib>, I do not have std::getenv_s, only getenv_s (global namespace).cppreference has the following note:As with all bounds-checked functions, getenv_s is only guaranteed to be available if __STDC_LIB_EXT1__ is defined by the implementation and if the user defines __STDC_WANT_LIB_EXT1__ to the integer constant 1 before including <stdlib.h>.Even when I define __STDC_WANT_LIB_EXT1__ as 1, My compiler (MSVC C++23) does not find the std::getenv_s function.Isn't <cstdlib> supposed to bring every symbol of <stdlib.h> into the std namespace?
# '''

#         instruction = '''
# You are an expert software developer and Stack Overflow analyst. You are given the following images and stack overflow post bodies as examples:
# '''

#         prompt = '''
# Now generate a similar question body for the given screenshot:
# Follow the pattern:
# 1. Clear and concise question body that explains the issue in detail
# 2. What is the most important part of an error that can be present in the image?
# 3. Take inspiration from the example bodies given

# Output Format: 
# BODY: <<Generated Body>>
# Make sure to start with exactly "BODY:" 
# '''

#         contents = [instruction,
#                     example_image1,
#                     example_text1,
#                     example_image2,
#                     example_text2,
#                     image,
#                     prompt]

#         generation_config = GenerationConfig(
#             temperature=0,
#             top_p=0.8,
#             top_k=40,
#             candidate_count=1,
#             max_output_tokens=2048,
#         )

#         responses = text_model.generate_content(
#             contents,
#             generation_config=generation_config,
#             stream=True,
#         )

#         generation_prompt = ''
#         for response in responses:
#             generation_prompt += response.text    

#         # Extract body
#         body = ""
#         if "BODY:" in generation_prompt:
#             body = generation_prompt.split("BODY:")[1].strip()

#         print(body)
#         return body
    
#     except Exception as e:
#         print(e)
#         return ""

# def process_rows_with_delay(dataframe):
#     total_rows_processed = 0
#     results = []  # List to store results
    
#     for i, row in tqdm(dataframe.iterrows(), total=dataframe.shape[0], desc=f'Processing'):
#         print("\n" + "="*50)
#         print(f"Processing Row ID: {row['Id']}")
        
#         image_urls = [url.strip(",") for url in re.findall(r"'([^']*)'", str(row.get('ImageURLs')))]
#         title = row['Title'] 
#         body = row['Body']
        
#         # Store LLM responses for all images of this row
#         title_responses = []
#         body_responses = []
        
#         for image_url in image_urls:
#             print(f"\nProcessing image {len(title_responses) + 1}/{len(image_urls)}")
            
#             # Generate title
#             print("Generating title...")
#             title_text = calculate_image_features_title(image_url)
#             title_responses.append(title_text)
            
#             # Generate body
#             print("Generating body...")
#             body_text = calculate_image_features_body(image_url)
#             body_responses.append(body_text)
        
#         # Combine all responses into strings
#         combined_title_response = " ||| ".join(title_responses)
#         combined_body_response = " ||| ".join(body_responses)
        
#         # Store the results
#         results.append({
#             'Id': row['Id'],
#             'Title': title,
#             'Body': body,
#             'ImageURLs': row['ImageURLs'],
#             'llm_title_response': combined_title_response,
#             'llm_body_response': combined_body_response
#         })
        
#         total_rows_processed += 1
#         print(f"Completed processing Row ID: {row['Id']}")
#         print("="*50 + "\n")
        
#         # Add delay after every 5 rows
#         if total_rows_processed % 2 == 0:
#             print("\nPausing for 60 seconds after processing 2 rows...")
#             intermediate_df = pd.DataFrame(results)
#             intermediate_df.to_csv('Data/llm_responses_intermediate.csv', index=False)
#             print("Saved intermediate results to CSV")
            
#             for remaining in range(60, 0, -1):
#                 print(f"\rTime remaining: {remaining} seconds...", end='')
#                 time.sleep(1)
#             print("\nResuming processing...")
    
#     # Create final DataFrame and save to CSV
#     final_df = pd.DataFrame(results)
#     final_df.to_csv('Data/llm_responses_final_few_shot.csv', index=False)
#     print("\nFinal results saved to Data/llm_responses_final_few_shot.csv")
    
#     return final_df

## Chain-Of-Thoughts

In [38]:
def calculate_image_features_title(image_url):
    try:
        image = load_image_from_url(image_url)

        instruction = '''
Reasoning Process for Title Generation:

1. Initial Observation
- What is immediately visible in the screenshot?
- What IDE/tool is being used or what kind of code is shown?
- Are there any error messages or unusual indicators?

2. Problem Identification
- What seems to be the main issue?
- Which specific components are involved?
- Is this a configuration, syntax, or runtime issue?

3. Title Formulation
Based on the above analysis, construct a title that:
- Clearly summarizes the main technical issue
- Uses relevant technical keywords
- Is concise and specific
- Takes inspiration from your trained data on stack overflow
- Would be easily searchable

Output Format: 
TITLE: <<Generated Title>>
Make sure to start with exactly "TITLE:"
'''

        prompt = '''
Analyze the given IDE/code screenshot and formulate a Stack Overflow title by following the given reasoning process:
'''

        contents = [instruction,
                    image,
                    prompt]

        generation_config = GenerationConfig(
            temperature=0,
            top_p=0.8,
            top_k=40,
            candidate_count=1,
            max_output_tokens=2048,
        )

        responses = text_model.generate_content(
            contents,
            generation_config=generation_config,
            stream=True,
        )

        generation_prompt = ''
        for response in responses:
            generation_prompt += response.text    

        # Extract title
        title = ""
        if "TITLE:" in generation_prompt:
            title = generation_prompt.split("TITLE:")[1].strip()

        print(title)
        return title
    
    except Exception as e:
        print(e)
        return ""

def calculate_image_features_body(image_url):
    try:
        image = load_image_from_url(image_url)

        instruction = '''
Reasoning Process for Body Generation:

1. Initial Observation
- What is immediately visible in the screenshot?
- What IDE/tool is being used or what kind of code is shown?
- Are there any error messages or unusual indicators?

2. Problem Identification
- What seems to be the main issue?
- Which specific components are involved?
- Is this a configuration, syntax, or runtime issue?

3. Context Building
- What background or programming information is needed to understand this issue?
- Which framework/language versions are relevant?
- What might have led to this situation?

4. Solution Attempts Analysis
- What obvious solutions might have been tried?
- What documentation might be relevant?
- What troubleshooting steps would make sense?

5. Question Body Formulation
Based on the above analysis, construct a detailed body that:
- Clearly explains the context and problem
- Includes all relevant technical details
- Takes inspiration from your trained data on stack overflow
- Shows research effort and attempted solutions
- Is specific and answerable

Output Format: 
BODY: <<Generated Body>>
Make sure to start with exactly "BODY:"
'''

        prompt = '''
Analyze the given IDE/code screenshot and formulate a Stack Overflow question body by following the given reasoning process:
'''

        contents = [instruction,
                    image,
                    prompt]

        generation_config = GenerationConfig(
            temperature=0,
            top_p=0.8,
            top_k=40,
            candidate_count=1,
            max_output_tokens=2048,
        )

        responses = text_model.generate_content(
            contents,
            generation_config=generation_config,
            stream=True,
        )

        generation_prompt = ''
        for response in responses:
            generation_prompt += response.text    

        # Extract body
        body = ""
        if "BODY:" in generation_prompt:
            body = generation_prompt.split("BODY:")[1].strip()

        print(body)
        return body
    
    except Exception as e:
        print(e)
        return ""

def process_rows_with_delay(dataframe):
    total_rows_processed = 0
    results = []  # List to store results
    
    for i, row in tqdm(dataframe.iterrows(), total=dataframe.shape[0], desc=f'Processing'):
        print("\n" + "="*50)
        print(f"Processing Row ID: {row['Id']}")
        
        image_urls = [url.strip(",") for url in re.findall(r"'([^']*)'", str(row.get('ImageURLs')))]
        title = row['Title'] 
        body = row['Body']
        
        # Store LLM responses for all images of this row
        title_responses = []
        body_responses = []
        
        for image_url in image_urls:
            print(f"\nProcessing image {len(title_responses) + 1}/{len(image_urls)}")
            
            # Generate title
            print("Generating title...")
            title_text = calculate_image_features_title(image_url)
            title_responses.append(title_text)
            
            # Generate body
            print("Generating body...")
            body_text = calculate_image_features_body(image_url)
            body_responses.append(body_text)
        
        # Combine all responses into strings
        combined_title_response = " ||| ".join(title_responses)
        combined_body_response = " ||| ".join(body_responses)
        
        # Store the results
        results.append({
            'Id': row['Id'],
            'Title': title,
            'Body': body,
            'ImageURLs': row['ImageURLs'],
            'llm_title_response': combined_title_response,
            'llm_body_response': combined_body_response
        })
        
        total_rows_processed += 1
        print(f"Completed processing Row ID: {row['Id']}")
        print("="*50 + "\n")
        
        # Add delay after every 2 rows
        if total_rows_processed % 2 == 0:
            print("\nPausing for 60 seconds after processing 2 rows...")
            intermediate_df = pd.DataFrame(results)
            intermediate_df.to_csv('Data/llm_responses_intermediate.csv', index=False)
            print("Saved intermediate results to CSV")
            
            for remaining in range(60, 0, -1):
                print(f"\rTime remaining: {remaining} seconds...", end='')
                time.sleep(1)
            print("\nResuming processing...")
    
    # Create final DataFrame and save to CSV
    final_df = pd.DataFrame(results)
    final_df.to_csv('Data/llm_responses_final_chain_of_thoughts.csv', index=False)
    print("\nFinal results saved to Data/llm_responses_final_chain_of_thoughts.csv")
    
    return final_df

In [39]:
dataset = pd.read_csv('Data/filtered_data_matched.csv')

In [40]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             143 non-null    int64  
 1   Id                     143 non-null    int64  
 2   PostTypeId             143 non-null    int64  
 3   AcceptedAnswerId       34 non-null     float64
 4   ParentId               0 non-null      float64
 5   CreationDate           143 non-null    object 
 6   DeletionDate           0 non-null      float64
 7   Score                  143 non-null    int64  
 8   ViewCount              143 non-null    int64  
 9   Body                   143 non-null    object 
 10  OwnerUserId            143 non-null    float64
 11  OwnerDisplayName       0 non-null      float64
 12  LastEditorUserId       74 non-null     float64
 13  LastEditorDisplayName  0 non-null      float64
 14  LastEditDate           74 non-null     object 
 15  LastAc

In [41]:
# def process_rows_with_delay(dataframe):
#     total_rows_processed = 0
#     results = []  # List to store results
    
#     for i, row in tqdm(dataframe.iterrows(), total=dataframe.shape[0], desc=f'Processing'):
#         print("\n" + "="*50)
#         print(f"Processing Row ID: {row['Id']}")
        
#         image_urls = [url.strip(",") for url in re.findall(r"'([^']*)'", str(row.get('ImageURLs')))]
#         title = row['Title'] 
#         body = row['Body']
        
#         # Store LLM responses for all images of this row
#         title_responses = []
#         body_responses = []
        
#         for image_url in image_urls:
#             print(f"\nProcessing image {len(title_responses) + 1}/{len(image_urls)}")
#             title_text, body_text = calculate_image_features_combined(image_url)
#             title_responses.append(title_text)
#             body_responses.append(body_text)
        
#         # Combine all responses into strings
#         combined_title_response = " ||| ".join(title_responses)
#         combined_body_response = " ||| ".join(body_responses)
        
#         # Store the results
#         results.append({
#             'Id': row['Id'],
#             'Title': title,
#             'Body': body,
#             'ImageURLs': row['ImageURLs'],
#             'llm_title_response': combined_title_response,
#             'llm_body_response': combined_body_response
#         })
        
#         total_rows_processed += 1
#         print(f"Completed processing Row ID: {row['Id']}")
#         print("="*50 + "\n")
        
#         # Add delay after every 5 rows
#         if total_rows_processed % 5 == 0:
#             print("\nPausing for 60 seconds after processing 5 rows...")
#             # Save intermediate results to CSV
#             intermediate_df = pd.DataFrame(results)
#             intermediate_df.to_csv('Data/llm_responses_intermediate.csv', index=False)
#             print("Saved intermediate results to CSV")
            
#             for remaining in range(60, 0, -1):
#                 print(f"\rTime remaining: {remaining} seconds...", end='')
#                 time.sleep(1)
#             print("\nResuming processing...")
    
#     # Create final DataFrame and save to CSV
#     final_df = pd.DataFrame(results)
#     final_df.to_csv('Data/llm_responses_final_few_shot.csv', index=False)
#     print("\nFinal results saved to Data/llm_responses_final_few_shot.csv")
    
#     return final_df

In [None]:

results_df = process_rows_with_delay(dataset)

In [43]:
dataset = pd.read_csv('Data/llm_responses_final_chain_of_thoughts.csv')
dataset.head()

Unnamed: 0,Id,Title,Body,ImageURLs,llm_title_response,llm_body_response
0,79146548,GitHub Copilot responds to 'Hey Code' but dict...,"As the title explains, I can start an inline c...",['https://i.sstatic.net/MgGjdapB.png'],How to use GitHub Copilot to generate code for...,I'm trying to use GitHub Copilot to help me wr...
1,79146419,How can I fix my Workflow file to successfully...,I am trying to use Github Actions with Azure S...,['https://i.sstatic.net/THwNK2Jj.png'],"Azure deployment fails with ""npm: not found"" w...",I am trying to deploy my ASP.NET Core applicat...
2,79146412,LINQPad 8 Dump Property Order different that L...,"In LINQPad 5 with Linq-to-Sql DataContext, if ...",['https://i.sstatic.net/efq4SfvI.png'],How to Select Specific Properties from a User ...,I'm trying to retrieve user properties and the...
3,79146127,SyntaxError: Cannot use import statement outsi...,"I'm using TypeScript, ESM, npm, and ts-jest. U...",['https://i.sstatic.net/Jp5wj6k2.png'],"""SyntaxError: Cannot use import statement outs...","I'm encountering a ""SyntaxError: Cannot use im..."
4,79145758,Typescript Polymorphic Component Event Handler,I have written a strongly-typed Polymorphic Ty...,['https://i.sstatic.net/19LCKEF3.png'],Property 'currentTarget' does not exist on typ...,I'm encountering a TypeScript error in my Reac...


In [44]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Id                  143 non-null    int64 
 1   Title               143 non-null    object
 2   Body                143 non-null    object
 3   ImageURLs           143 non-null    object
 4   llm_title_response  143 non-null    object
 5   llm_body_response   142 non-null    object
dtypes: int64(1), object(5)
memory usage: 6.8+ KB
