# Project

## Data Collection

The data is collected from the following sources:
- Wikidata

```sql
SELECT ?station ?stationLabel ?image WHERE {
  ?station wdt:P31 wd:Q55488 .  # Select entities that are railway stations
  ?station wdt:P18 ?image .  # Only include stations with images
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT 100
```

## Get images

In [3]:
import urllib.request
import json
import os
import shutil

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0"

url = "https://query.wikidata.org/sparql?query=SELECT%20%3Fstation%20%3FstationLabel%20%3FcityLabel%20%3FopeningDate%20%3FclosingDate%20%3FstateLabel%20%3Fcoordinates%20%3FlineLabel%20(SAMPLE(%3Fimage)%20AS%20%3FuniqueImage)%20WHERE%20%7B%0A%20%20%3Fstation%20wdt%3AP31%20wd%3AQ55488%20.%20%20%23%20Select%20railway%20stations%0A%20%20%3Fstation%20wdt%3AP18%20%3Fimage%20.%20%20%23%20Only%20include%20stations%20with%20images%0A%20%20%3Fstation%20wdt%3AP17%20wd%3AQ142%20.%20%20%23%20Only%20include%20stations%20in%20France%0A%20%20%3Fstation%20wdt%3AP1619%20%3FopeningDate%20.%0A%20%20%3Fstation%20wdt%3AP131%20%3Fcity%20.%0A%20%20OPTIONAL%20%7B%0A%20%20%20%20%3Fstation%20wdt%3AP576%20%3FclosingDate%20.%0A%20%20%7D%0A%20%20%3Fstation%20wdt%3AP5817%20%3Fstate%20.%0A%20%20%3Fstation%20wdt%3AP625%20%3Fcoordinates%20.%0A%20%20%3Fstation%20wdt%3AP81%20%3Fline%20.%0A%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22en%2Cen%22.%20%7D%0A%7D%0AGROUP%20BY%20%3Fstation%20%3FstationLabel%20%3FcityLabel%20%3FopeningDate%20%3FclosingDate%20%3FstateLabel%20%3Fcoordinates%20%3FlineLabel%0ALIMIT%20100%0A&format=json"
request = urllib.request.Request(url, data=None, headers={"User-Agent": user_agent})
with urllib.request.urlopen(request) as response:
    responsedata = json.loads(response.read().decode("utf-8"))

images_urls = []
images_data = []

for data in responsedata["results"]["bindings"]:
    images_urls.append([data["stationLabel"]["value"], data["uniqueImage"]["value"]])
    image_ext = data["uniqueImage"]["value"].split(".")[-1]
    image_path = "./data/images/" + data["stationLabel"]["value"] + "." + image_ext
    images_data.append({
        "name": data["stationLabel"]["value"],
        "imageUrl": data["uniqueImage"]["value"],
        "imagePath": image_path,
        "place": data["cityLabel"]["value"],
        "openingDate": data["openingDate"]["value"],
        "closingDate": data["closingDate"]["value"] if "closingDate" in data else None,
        "line": data["lineLabel"]["value"],
        "state": data["stateLabel"]["value"],
        "coordinates": data["coordinates"]["value"]
    })

print("Found " + str(len(images_urls)) + " images.")

# empty the images folder
shutil.rmtree("./data/images")
os.makedirs("./data/images")

# download the images
for image in images_urls:
    image_ext = image[1].split(".")[-1]
    image_path = "./data/images/" + image[0] + "." + image_ext

    request = urllib.request.Request(image[1], data=None, headers={"User-Agent": user_agent})
    with urllib.request.urlopen(request) as response:
        if response.status != 200:
            print("Failed to download " + image[0] + "." + image_ext + " (HTTP " + str(response.status) + ").")
            continue
        else:
            open(image_path, "wb").write(response.read())
            print("Downloaded " + image[0] + "." + image_ext)

# writing images data to a json file
with open("./data/images.json", "w") as f:
    json.dump(images_data, f)

print("Downloaded " + str(len(images_urls)) + " images.")



Found 100 images.
Downloaded Gare d'Avignon TGV.JPG
Downloaded Gare d'Avignon TGV.JPG
Downloaded Gare de Lyon-Perrache.jpg
Downloaded Gare de Lyon-Perrache.jpg
Downloaded Gare de Lyon-Perrache.jpg
Downloaded Gare de Lyon-Perrache.jpg
Downloaded Gare de Lyon-Vaise.jpg
Downloaded Gare de Lyon-Vaise.jpg
Downloaded Lyon-Saint-Paul station.jpg
Downloaded Lyon-Saint-Paul station.jpg
Downloaded Gare des Brotteaux.JPG
Downloaded Gare de Lyon-Jean Macé.JPG
Downloaded Gare de Lyon-Jean Macé.JPG
Downloaded Gare de Lyon-Jean Macé.JPG
Downloaded Gare de Lyon-Jean Macé.JPG
Downloaded Gare de Lyon-Jean Macé.JPG
Downloaded Gare de Lyon-Jean Macé.JPG
Downloaded Lyon-Saint-Clair station.JPG
Downloaded Lyon-Saint-Clair station.JPG
Downloaded Lyon-Saint-Clair station.JPG
Downloaded Lyon-Est station.JPG
Downloaded Gare de Lozère.jpg
Downloaded Gare de Saint-Laurent - Gainneville.jpg
Downloaded Gare de Tarascon-sur-Ariège.jpg
Downloaded Nanterre – Ville.jpg
Downloaded Gare de Guingamp.jpg
Downloaded Gare de

## Get metadata

In [2]:
import os
import PIL.Image
import PIL.ExifTags

# open every images in ./data/images folder
with os.scandir("./data/images") as entries:
    i = 0
    # get 5 first images
    for entry in entries:
        img = PIL.Image.open(entry)
        exif = {
            PIL.ExifTags.TAGS[k]: v
            for k, v in img._getexif().items()
            if k in PIL.ExifTags.TAGS
        }
        print(entry.name + " exif data:")
        print(exif)
        input("Press Enter to continue...")
        i += 1
        if i == 5:
            break

Gare de Saint-Étienne-Carnot.JPG exif data:
{'DateTimeOriginal': '2015:11:18 17:04:32', 'Rating': 1, 'ResolutionUnit': 2, 'RatingPercent': 20, 'ExifOffset': 256, 'Make': 'NIKON CORPORATION', 'Model': 'NIKON D5100', 'Software': 'darktable 2.4.3', 'Orientation': 1, 'DateTime': '2018:09:09 17:01:15', 'YCbCrPositioning': 2, 'XResolution': 300, 'YResolution': 300, 'ExifVersion': b'0230', 'ComponentsConfiguration': b'\x01\x02\x03\x00', 'CompressedBitsPerPixel': 4.0, 'DateTimeDigitized': '2015:11:18 17:04:32', 'ExposureBiasValue': 0.0, 'MaxApertureValue': 4.1, 'MeteringMode': 5, 'LightSource': 0, 'Flash': 16, 'FocalLength': 30.0, 'ColorSpace': 1, 'ExifImageWidth': 4638, 'ExifInteroperabilityOffset': 12348, 'FocalLengthIn35mmFilm': 45, 'SceneCaptureType': 0, 'SubsecTime': '20', 'SubsecTimeOriginal': '20', 'SubsecTimeDigitized': '20', 'ExifImageHeight': 3074, 'SubjectDistanceRange': 0, 'SensingMethod': 2, 'FileSource': b'\x03', 'ExposureTime': 0.002, 'FNumber': 4.2, 'SceneType': b'\x01', 'Expos