### Importing the tsv file and desining the API endpoint

In [51]:
import pandas as pd
import requests

# Define the endpoint for the captioning Docker service.
# Make sure this matches your container’s host port and endpoint.
# In our case, if the container is running on host port 8765:
CAPTION_ENDPOINT = "http://localhost:8765/inception/v3/caption/image"

# Update this with the path to your input TSV file.
input_tsv = "arcgis_img_captions_objects-Copy1.tsv"  # e.g., "/Users/lancedsilva/Downloads/haunted_places.tsv"
# output_tsv = "/path/to/your/output_file.tsv"  # e.g., "/Users/lancedsilva/Downloads/haunted_places_updated.tsv"

# Read the TSV file
df = pd.read_csv(input_tsv, sep="\t")

In [53]:
df2 = df.iloc[:,:]
display(df2.shape)
display(df2)

(9731, 36)

Unnamed: 0,city,country,description,location,state,state_abbrev,longitude,latitude,city_longitude,city_latitude,...,Avg_PRCP,Avg_SNWD,Diurnal Temperature Range (DTR),intrastate_gaspipe_within_10miles,interstate_gaspipe_within_10miles,mental_health_provider,mental_health_RGB,IMG_LOC,caption,objects
0,Ada,United States,Ada witch - Sometimes you can see a misty blue...,Ada Cemetery,Michigan,MI,-85.504893,42.962106,-85.495480,42.960727,...,21.534207,14.889049,119.184638,False,False,677.0,"(np.int64(187), np.int64(204), np.int64(199))",Image/image_0.png,a woman standing next to a red fire hydrant .,"megalith, megalithic structure, cloak"
1,Addison,United States,A little girl was killed suddenly while waitin...,North Adams Rd.,Michigan,MI,-84.381843,41.971425,-84.347168,41.986434,...,23.936324,5.131693,116.806078,False,False,677.0,"(np.int64(187), np.int64(204), np.int64(199))",Image/image_1.png,a woman standing next to a fire hydrant .,trench coat
2,Adrian,United States,If you take Gorman Rd. west towards Sand Creek...,Ghost Trestle,Michigan,MI,-84.035656,41.904538,-84.037166,41.897547,...,24.174107,3.531884,117.425321,False,False,677.0,"(np.int64(187), np.int64(204), np.int64(199))",Image/image_2.png,a park bench sitting in the middle of a forest .,"bannister, banister, balustrade, balusters, ha..."
3,Adrian,United States,"In the 1970's, one room, room 211, in the old ...",Siena Heights University,Michigan,MI,-84.017565,41.905712,-84.037166,41.897547,...,24.174107,3.531884,117.425321,False,False,677.0,"(np.int64(187), np.int64(204), np.int64(199))",Image/image_3.png,a black and white photo of a bathroom .,"safe, sliding door"
4,Albion,United States,Kappa Delta Sorority - The Kappa Delta Sororit...,Albion College,Michigan,MI,-84.745177,42.244006,-84.753030,42.243097,...,23.936324,5.131693,116.806078,True,False,677.0,"(np.int64(187), np.int64(204), np.int64(199))",Image/image_4.png,a black and white photo of a brick building,"paddlewheel, paddle wheel, prison, prison house"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9726,Westminster,United States,at 12 midnight you can see a lady with two lit...,city hall,Colorado,CO,-105.048936,39.862610,-105.037205,39.836653,...,14.987448,13.241283,157.600397,False,False,457.0,"(np.int64(139), np.int64(197), np.int64(196))",Image/image_9726.png,a large building with a clock on the front of ...,castle
9727,Westminster,United States,Is haunted by the victims of a murder that hap...,Pillar of Fire,Colorado,CO,-105.032091,39.847237,-105.037205,39.836653,...,14.987448,13.241283,157.600397,False,False,457.0,"(np.int64(139), np.int64(197), np.int64(196))",Image/image_9727.png,a large building with a clock on the front of ...,"monastery, vault"
9728,Wheat Ridge,United States,The institution was for kids 18 years old and ...,Ridge Mental Institution,Colorado,CO,-105.063974,39.769726,-105.077206,39.766098,...,14.987448,13.241283,157.600397,False,False,457.0,"(np.int64(139), np.int64(197), np.int64(196))",Image/image_9728.png,a black and white photo of a train station .,"prison, prison house, viaduct"
9729,Wheat Ridge,United States,Gymnasium - their have been reports of a litt...,Wheat Ridge Middle School,Colorado,CO,-105.103613,39.764055,-105.077206,39.766098,...,14.987448,13.241283,157.600397,False,False,457.0,"(np.int64(139), np.int64(197), np.int64(196))",Image/image_9729.png,a group of people playing a game of tennis .,"basketball, volleyball"


In [58]:
display(df2.loc[1249,:])

city                                                                        Wilmington
country                                                                  United States
description                          An apparition of a blond blue-eyed man has bee...
location                                                                 Drum Barracks
state                                                                       California
state_abbrev                                                                        CA
longitude                                                                   -118.25757
latitude                                                                     33.784471
city_longitude                                                             -118.264357
city_latitude                                                                33.785795
date_occured                                                                2025-01-01
Audio                                      

### Code for generating caption for images by first calling HTML file server and then docker container

In [68]:
import pandas as pd
import requests

# Define the captioning endpoint (for the docker container running the captioning service)
CAPTION_ENDPOINT = "http://localhost:8765/inception/v3/caption/image"


# Create a new column for the caption if it doesn't already exist.
if "caption" not in df2.columns:
    df2.loc[:,"caption"] = ""

def get_caption(image_url):
    """
    Given a shortened image URL (e.g., "Images/image_0.png"), extract the filename 
    and construct the full URL for the local HTTP server before requesting a caption.
    """
    # Remove directory prefix and keep only the filename.
    filename = image_url.split("/")[-1]
    # Build the full URL using only the filename.
    full_url = "http://host.docker.internal:8000/" + filename
    print(full_url)

    params = {
        "url": full_url,
        "beam_size": 3,
        "max_caption_length": 30
    }
    try:
        response = requests.get(CAPTION_ENDPOINT, params=params, timeout=10)
        response.raise_for_status()  # Raise an error if status is not 200
        data = response.json()
        if "captions" in data and isinstance(data["captions"], list) and len(data["captions"]) > 0:
            # print(data["captions"])
            best_caption = max(data["captions"], key=lambda cap: float(cap.get("confidence", 0)))
            # print(best_caption)
            return best_caption['sentence']
        else:
            print("Unexpected JSON structure for image URL:", full_url)
            return ""
    except Exception as e:
        print(f"Error getting caption for image {full_url}: {e}")
        return ""


# Update each row: apply the get_caption function on the IMG_LOC column.
df2.loc[1249, "caption"] = get_caption(df2.loc[1249, "IMG_LOC"])

# # Optionally, save the updated DataFrame to a new TSV file.
# output_tsv = "/path/to/your/haunted_places_updated.tsv"
# df2.to_csv(output_tsv, sep="\t", index=False)
# print("Captions updated and saved to", output_tsv)


http://host.docker.internal:8000/image_1249.png


### Generating again for missed images

In [70]:
numbers = [
    2259, 2477, 2713, 2781,
    3563, 3860, 3957, 4014, 4336, 4900,
    5324, 6197, 6515, 6686, 6759, 6844,
    6910, 6913, 7041, 7865, 8372, 8747,
    9388, 9582
]
for num in numbers:
    df2.loc[num, "caption"] = get_caption(df2.loc[num, "IMG_LOC"])

http://host.docker.internal:8000/image_2259.png
http://host.docker.internal:8000/image_2477.png
http://host.docker.internal:8000/image_2713.png
http://host.docker.internal:8000/image_2781.png
http://host.docker.internal:8000/image_3563.png
http://host.docker.internal:8000/image_3860.png
http://host.docker.internal:8000/image_3957.png
http://host.docker.internal:8000/image_4014.png
http://host.docker.internal:8000/image_4336.png
http://host.docker.internal:8000/image_4900.png
http://host.docker.internal:8000/image_5324.png
http://host.docker.internal:8000/image_6197.png
http://host.docker.internal:8000/image_6515.png
http://host.docker.internal:8000/image_6686.png
http://host.docker.internal:8000/image_6759.png
http://host.docker.internal:8000/image_6844.png
http://host.docker.internal:8000/image_6910.png
http://host.docker.internal:8000/image_6913.png
http://host.docker.internal:8000/image_7041.png
http://host.docker.internal:8000/image_7865.png
http://host.docker.internal:8000/image_8

In [80]:
display(df2.loc[:,'caption'].isna().sum())

np.int64(0)

In [82]:
# output_tsv = "/path/to/your/haunted_places_updated.tsv"
df2.to_csv('arcgis_img2_captions.tsv', sep="\t", index=False)

### Code for generating caption for images by first calling HTML file server and then docker container

In [5]:
import pandas as pd
import requests

# Define the endpoints for the captioning and object detection services.
OBJECT_ENDPOINT  = "http://localhost:8764/inception/v4/classify/image"

# Create new columns for caption and objects if they don't already exist.
if "objects" not in df2.columns:
    df2.loc[:, "objects"] = ""

def get_objects(image_url):
    """
    Constructs the full URL from the shortened image URL and requests object detection.
    Returns the detected class names as a comma-separated string.
    """
    # Extract the filename.
    filename = image_url.split("/")[-1]
    full_url = "http://host.docker.internal:8000/" + filename
    print("Objects full URL:", full_url)
    
    # Optional: you can add parameters like topn and min_confidence if supported.
    params = {
        "url": full_url,
        "topn": 2,
        "min_confidence": 0.03
    }
    try:
        response = requests.get(OBJECT_ENDPOINT, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        if "classnames" in data and isinstance(data["classnames"], list):
            # Join the class names with commas.
            return ", ".join(data["classnames"])
        else:
            print("Unexpected JSON structure for object detection for URL:", full_url)
            return ""
    except Exception as e:
        print(f"Error getting objects for image {full_url}: {e}")
        return ""

# Update each row in the DataFrame by applying the functions on the IMG_LOC column.
df2.loc[:, "objects"] = df2["IMG_LOC"].apply(get_objects)


Objects full URL: http://host.docker.internal:8000/image_0.png
Objects full URL: http://host.docker.internal:8000/image_1.png
Objects full URL: http://host.docker.internal:8000/image_2.png
Objects full URL: http://host.docker.internal:8000/image_3.png
Objects full URL: http://host.docker.internal:8000/image_4.png
Objects full URL: http://host.docker.internal:8000/image_5.png
Objects full URL: http://host.docker.internal:8000/image_6.png
Objects full URL: http://host.docker.internal:8000/image_7.png
Objects full URL: http://host.docker.internal:8000/image_8.png
Objects full URL: http://host.docker.internal:8000/image_9.png
Objects full URL: http://host.docker.internal:8000/image_10.png
Objects full URL: http://host.docker.internal:8000/image_11.png
Objects full URL: http://host.docker.internal:8000/image_12.png
Objects full URL: http://host.docker.internal:8000/image_13.png
Objects full URL: http://host.docker.internal:8000/image_14.png
Objects full URL: http://host.docker.internal:8000

In [55]:
numbers = [
    183, 635, 933, 1004, 1396, 2005,
    2543, 3353, 4455, 4501, 5162, 5773,
    5987, 6042, 6262, 6404, 7036, 7081,
    7445, 8041, 8094, 8353, 8371, 8525, 8792,
    9300, 9303, 9621
]
for num in numbers:
    df2.loc[num, "objects"] = get_objects(df2.loc[num, "IMG_LOC"])

Objects full URL: http://host.docker.internal:8000/image_183.png
Objects full URL: http://host.docker.internal:8000/image_635.png
Objects full URL: http://host.docker.internal:8000/image_933.png
Objects full URL: http://host.docker.internal:8000/image_1004.png
Objects full URL: http://host.docker.internal:8000/image_1396.png
Objects full URL: http://host.docker.internal:8000/image_2005.png
Objects full URL: http://host.docker.internal:8000/image_2543.png
Objects full URL: http://host.docker.internal:8000/image_3353.png
Objects full URL: http://host.docker.internal:8000/image_4455.png
Objects full URL: http://host.docker.internal:8000/image_4501.png
Objects full URL: http://host.docker.internal:8000/image_5162.png
Objects full URL: http://host.docker.internal:8000/image_5773.png
Objects full URL: http://host.docker.internal:8000/image_5987.png
Objects full URL: http://host.docker.internal:8000/image_6042.png
Objects full URL: http://host.docker.internal:8000/image_6262.png
Objects full 

In [57]:
# Optionally, save the updated DataFrame to a new TSV file.
output_tsv = "arcgis_img_captions_objects_final1.tsv"
df2.to_csv(output_tsv, sep="\t", index=False)
print("Captions and objects updated and saved to", output_tsv)

Captions and objects updated and saved to arcgis_img_captions_objects_final1.tsv


In [59]:
display(df2['objects'])

0                   megalith, megalithic structure, cloak
1                                             trench coat
2       bannister, banister, balustrade, balusters, ha...
3                                      safe, sliding door
4         paddlewheel, paddle wheel, prison, prison house
                              ...                        
9726                                               castle
9727                                     monastery, vault
9728                        prison, prison house, viaduct
9729                               basketball, volleyball
9730               whippet, dogsled, dog sled, dog sleigh
Name: objects, Length: 9731, dtype: object