## Bergson ISS Data Post-Processing

In [1]:
# Import pre-requesite
import pandas as pd
import geopy
from geopy.geocoders import Nominatim
import os

In [2]:
# Read all Data from ISS Bergson experiment CSV log file
data_all = pd.read_csv('./data.csv')
data_all.head()
# Remark1: the GPS data we captured in the CSV file are in DMS format, we want it in DD format
# Remark2: only a subset of the picture (x245) have actually quality good enough to be used

Unnamed: 0,Date/time,Location,Picture Name,Predicted NO2
0,2021-04-17 01:47:01.360650,"(0.7319621443748474, -3.0113844871520996)",bergson_img_2021-04-17_01:46:39.jpg,low
1,2021-04-17 01:47:24.431233,"(0.7174579501152039, -2.9843356609344482)",bergson_img_2021-04-17_01:47:01.jpg,low
2,2021-04-17 01:47:45.462816,"(0.7038659453392029, -2.960275411605835)",bergson_img_2021-04-17_01:47:24.jpg,low
3,2021-04-17 01:48:07.775054,"(0.6892879009246826, -2.9357032775878906)",bergson_img_2021-04-17_01:47:45.jpg,low
4,2021-04-17 01:48:28.880668,"(0.6750655770301819, -2.9128353595733643)",bergson_img_2021-04-17_01:48:07.jpg,low


In [3]:
# Init the gps_df Dataframe that will contain post-processed Data
# Download bergson iss "good" pictures at: https://storage.googleapis.com/bergson_iss/pictures.zip
pictures_path = './pictures' # create first a pictures folder, then move all pictures within it
pictures_name=os.listdir(pictures_path)
pictures_name_sorted = sorted(pictures_name)
gps_df=pd.DataFrame()
gps_df['name']=pictures_name_sorted
gps_df.head()

Unnamed: 0,name
0,bergson_img_2021_04_17_01_46_39.jpg
1,bergson_img_2021_04_17_01_47_01.jpg
2,bergson_img_2021_04_17_01_47_24.jpg
3,bergson_img_2021_04_17_01_47_45.jpg
4,bergson_img_2021_04_17_01_48_07.jpg


In [4]:
# Helper function to convert get GPS DD from picture exif information
import PIL.Image
from dms2dec.dms_convert import dms2dec

def get_dms(pic_name):
    img = PIL.Image.open(pic_name)
    exif_data = img._getexif()
    N_S = exif_data[34853][1]
    latitude = exif_data[34853][2]
    E_W = exif_data[34853][3]
    longitude = exif_data[34853][4]
    return N_S, latitude, E_W, longitude

def format_latlong(latlong,nseo):
    degre = str(latlong[0])+'°'
    minute = str(latlong[1])+ '\''
    second = str(latlong[2])+'\"'
    dms_latlong = degre+minute+second+nseo
    return dms_latlong

def dmstodec(dms_lat,dms_long):
    latitude = dms2dec(dms_lat)
    longitude = dms2dec(dms_long)
    return latitude,longitude

In [5]:
# Get GPS DMS and DD list to be addedd to gps_df Dataframe
dms_list = []
dd_list = []
for picture in pictures_name_sorted:
    pic_path = os.path.join(pictures_path,picture)
    N_S, latitude, E_W, longitude = get_dms(pic_path)
    dms_lat = format_latlong(latitude,N_S)
    dms_long = format_latlong(longitude,E_W)
    dms_latlong = (dms_lat,dms_long)
    dms_list.append(dms_latlong)
    dd_lat, dd_long = dmstodec(dms_lat,dms_long)
    dd_latlong = (dd_lat,dd_long)
    dd_list.append(dd_latlong)

In [6]:
# Add GPS DMS and DD GPS to gps_df Dataframe
gps_df['dms_gps'] = dms_list
gps_df['dd_gps'] = dd_list
gps_df.head()

Unnamed: 0,name,dms_gps,dd_gps
0,bergson_img_2021_04_17_01_46_39.jpg,"(42.0°42.0'38.3""N, 174.0°3.0'43.6""W)","(42.01166666666666, -174.00083333333333)"
1,bergson_img_2021_04_17_01_47_01.jpg,"(41.0°56.0'18.0""N, 172.0°32.0'22.6""W)","(41.01555555555556, -172.0088888888889)"
2,bergson_img_2021_04_17_01_47_24.jpg,"(41.0°6.0'26.3""N, 170.0°59.0'23.4""W)","(41.001666666666665, -170.01638888888888)"
3,bergson_img_2021_04_17_01_47_45.jpg,"(40.0°19.0'42.8""N, 169.0°36.0'40.6""W)","(40.00527777777778, -169.01)"
4,bergson_img_2021_04_17_01_48_07.jpg,"(39.0°29.0'35.8""N, 168.0°12.0'12.3""W)","(39.00805555555556, -168.00333333333333)"


In [7]:
# Get address data from Open Street Map
address_openstreetmap = []
not_found = []
locator = Nominatim(user_agent="myGeocoder")

for gps_coord in gps_df['dd_gps']:
    coordinates = gps_coord
    location = locator.reverse(coordinates)
    try:
        location.raw
        address_openstreetmap.append(location.raw)
    except AttributeError:
        address_openstreetmap.append('not found')
        pass

gps_df['address'] = address_openstreetmap
gps_df.head()

Unnamed: 0,name,dms_gps,dd_gps,address
0,bergson_img_2021_04_17_01_46_39.jpg,"(42.0°42.0'38.3""N, 174.0°3.0'43.6""W)","(42.01166666666666, -174.00083333333333)",not found
1,bergson_img_2021_04_17_01_47_01.jpg,"(41.0°56.0'18.0""N, 172.0°32.0'22.6""W)","(41.01555555555556, -172.0088888888889)",not found
2,bergson_img_2021_04_17_01_47_24.jpg,"(41.0°6.0'26.3""N, 170.0°59.0'23.4""W)","(41.001666666666665, -170.01638888888888)",not found
3,bergson_img_2021_04_17_01_47_45.jpg,"(40.0°19.0'42.8""N, 169.0°36.0'40.6""W)","(40.00527777777778, -169.01)",not found
4,bergson_img_2021_04_17_01_48_07.jpg,"(39.0°29.0'35.8""N, 168.0°12.0'12.3""W)","(39.00805555555556, -168.00333333333333)",not found


In [8]:
# Create a country_code_df Dataframe
# This csv file has been created using https://developers.google.com/public-data/docs/canonical/countries_csv
country_code_path='./countrycode_csv.csv' 
country_code_df = pd.read_csv(country_code_path,delimiter=';')
country_code_df.head()

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla


In [9]:
country_code_list = []
for address in gps_df['address']:
    if address != 'not found':
        country_code_list.append(address['address']['country_code'])
    else:
        country_code_list.append('none')

In [10]:
gps_df['country_code'] = country_code_list
gps_df.head()

Unnamed: 0,name,dms_gps,dd_gps,address,country_code
0,bergson_img_2021_04_17_01_46_39.jpg,"(42.0°42.0'38.3""N, 174.0°3.0'43.6""W)","(42.01166666666666, -174.00083333333333)",not found,none
1,bergson_img_2021_04_17_01_47_01.jpg,"(41.0°56.0'18.0""N, 172.0°32.0'22.6""W)","(41.01555555555556, -172.0088888888889)",not found,none
2,bergson_img_2021_04_17_01_47_24.jpg,"(41.0°6.0'26.3""N, 170.0°59.0'23.4""W)","(41.001666666666665, -170.01638888888888)",not found,none
3,bergson_img_2021_04_17_01_47_45.jpg,"(40.0°19.0'42.8""N, 169.0°36.0'40.6""W)","(40.00527777777778, -169.01)",not found,none
4,bergson_img_2021_04_17_01_48_07.jpg,"(39.0°29.0'35.8""N, 168.0°12.0'12.3""W)","(39.00805555555556, -168.00333333333333)",not found,none


In [11]:
country_code_df['country_code'] = country_code_df['country'].str.lower()
country_code_df = country_code_df.rename(columns={"name": "country_name"})
country_code_df.head()

Unnamed: 0,country,latitude,longitude,country_name,country_code
0,AD,42.546245,1.601554,Andorra,ad
1,AE,23.424076,53.847818,United Arab Emirates,ae
2,AF,33.93911,67.709953,Afghanistan,af
3,AG,17.060816,-61.796428,Antigua and Barbuda,ag
4,AI,18.220554,-63.068615,Anguilla,ai


In [12]:
gps_df_final = pd.merge(gps_df,country_code_df[['country_code','country_name']],on='country_code',how='left')
gps_df_final

Unnamed: 0,name,dms_gps,dd_gps,address,country_code,country_name
0,bergson_img_2021_04_17_01_46_39.jpg,"(42.0°42.0'38.3""N, 174.0°3.0'43.6""W)","(42.01166666666666, -174.00083333333333)",not found,none,
1,bergson_img_2021_04_17_01_47_01.jpg,"(41.0°56.0'18.0""N, 172.0°32.0'22.6""W)","(41.01555555555556, -172.0088888888889)",not found,none,
2,bergson_img_2021_04_17_01_47_24.jpg,"(41.0°6.0'26.3""N, 170.0°59.0'23.4""W)","(41.001666666666665, -170.01638888888888)",not found,none,
3,bergson_img_2021_04_17_01_47_45.jpg,"(40.0°19.0'42.8""N, 169.0°36.0'40.6""W)","(40.00527777777778, -169.01)",not found,none,
4,bergson_img_2021_04_17_01_48_07.jpg,"(39.0°29.0'35.8""N, 168.0°12.0'12.3""W)","(39.00805555555556, -168.00333333333333)",not found,none,
...,...,...,...,...,...,...
240,bergson_img_2021_04_17_04_39_59.jpg,"(48.0°5.0'8.8""N, 70.0°46.0'10.0""E)","(48.00138888888889, 70.01277777777777)","{'place_id': 257510931, 'licence': 'Data © Ope...",kz,Kazakhstan
241,bergson_img_2021_04_17_04_40_22.jpg,"(48.0°36.0'57.1""N, 72.0°45.0'44.4""E)","(48.01, 72.0125)","{'place_id': 257512588, 'licence': 'Data © Ope...",kz,Kazakhstan
242,bergson_img_2021_04_17_04_40_43.jpg,"(49.0°3.0'58.2""N, 74.0°37.0'9.4""E)","(49.00083333333333, 74.01027777777777)","{'place_id': 257512588, 'licence': 'Data © Ope...",kz,Kazakhstan
243,bergson_img_2021_04_17_04_41_05.jpg,"(49.0°30.0'7.9""N, 76.0°36.0'5.1""E)","(49.00833333333333, 76.01)","{'place_id': 257511915, 'licence': 'Data © Ope...",kz,Kazakhstan


In [13]:
gps_df_final = gps_df_final.rename(columns={'country_name':'country'})
gps_df_final

Unnamed: 0,name,dms_gps,dd_gps,address,country_code,country
0,bergson_img_2021_04_17_01_46_39.jpg,"(42.0°42.0'38.3""N, 174.0°3.0'43.6""W)","(42.01166666666666, -174.00083333333333)",not found,none,
1,bergson_img_2021_04_17_01_47_01.jpg,"(41.0°56.0'18.0""N, 172.0°32.0'22.6""W)","(41.01555555555556, -172.0088888888889)",not found,none,
2,bergson_img_2021_04_17_01_47_24.jpg,"(41.0°6.0'26.3""N, 170.0°59.0'23.4""W)","(41.001666666666665, -170.01638888888888)",not found,none,
3,bergson_img_2021_04_17_01_47_45.jpg,"(40.0°19.0'42.8""N, 169.0°36.0'40.6""W)","(40.00527777777778, -169.01)",not found,none,
4,bergson_img_2021_04_17_01_48_07.jpg,"(39.0°29.0'35.8""N, 168.0°12.0'12.3""W)","(39.00805555555556, -168.00333333333333)",not found,none,
...,...,...,...,...,...,...
240,bergson_img_2021_04_17_04_39_59.jpg,"(48.0°5.0'8.8""N, 70.0°46.0'10.0""E)","(48.00138888888889, 70.01277777777777)","{'place_id': 257510931, 'licence': 'Data © Ope...",kz,Kazakhstan
241,bergson_img_2021_04_17_04_40_22.jpg,"(48.0°36.0'57.1""N, 72.0°45.0'44.4""E)","(48.01, 72.0125)","{'place_id': 257512588, 'licence': 'Data © Ope...",kz,Kazakhstan
242,bergson_img_2021_04_17_04_40_43.jpg,"(49.0°3.0'58.2""N, 74.0°37.0'9.4""E)","(49.00083333333333, 74.01027777777777)","{'place_id': 257512588, 'licence': 'Data © Ope...",kz,Kazakhstan
243,bergson_img_2021_04_17_04_41_05.jpg,"(49.0°30.0'7.9""N, 76.0°36.0'5.1""E)","(49.00833333333333, 76.01)","{'place_id': 257511915, 'licence': 'Data © Ope...",kz,Kazakhstan


In [14]:
# Create the bergson geodata CSV file that is used by the team to complete then AI prediction vs Sentinel 5P NO2 value
gps_df_final[['name', 'dms_gps', 'dd_gps', 'country_code', 'country','address']].to_csv('./bergson_geodata.csv',index=False)