
# Working with S3 storage on the SSP Cloud


## Configure the remote filesystem


In [1]:
import os
import s3fs

# Create filesystem object
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})

## List data from your bucket

In [None]:
fs.ls("inesh")

In [13]:
import pandas as pd

BUCKET = "inesh"
FILE_KEY_S3 = "demo/airports-extended.csv"
FILE_PATH_S3 = BUCKET + "/" + FILE_KEY_S3

with fs.open(FILE_PATH_S3, mode="rb") as file_in:
    df_airports = pd.read_csv(file_in, sep=",")

In [14]:
df_airports

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database timezone,type,source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.081690,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.207080,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.826790,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.443380,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12663,14107,Ulan-Ude East Airport,Ulan Ude,Russia,\N,XIUW,51.849998,107.737999,1670,\N,\N,\N,airport,OurAirports
12664,14108,Krechevitsy Air Base,Novgorod,Russia,\N,ULLK,58.625000,31.385000,85,\N,\N,\N,airport,OurAirports
12665,14109,Desierto de Atacama Airport,Copiapo,Chile,CPO,SCAT,-27.261200,-70.779198,670,\N,\N,\N,airport,OurAirports
12666,14110,Melitopol Air Base,Melitopol,Ukraine,\N,UKDM,46.880001,35.305000,0,\N,\N,\N,airport,OurAirports


## Transform your data

In [15]:
df_airports_fr = df_airports[df_airports["Country"] == "France"]
df_airports_fr

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database timezone,type,source
1223,1254,Calais-Dunkerque Airport,Calais,France,CQF,LFAC,50.962101,1.954760,12,1,E,Europe/Paris,airport,OurAirports
1224,1255,Péronne-Saint-Quentin Airport,Peronne,France,\N,LFAG,49.868500,3.029580,295,1,E,Europe/Paris,airport,OurAirports
1225,1256,Nangis-Les Loges Airport,Nangis,France,\N,LFAI,48.596199,3.006790,428,1,E,Europe/Paris,airport,OurAirports
1226,1257,Bagnoles-de-l'Orne-Couterne Airport,Bagnole-de-l'orne,France,\N,LFAO,48.545799,-0.387444,718,1,E,Europe/Paris,airport,OurAirports
1227,1258,Albert-Bray Airport,Albert,France,BYF,LFAQ,49.971500,2.697660,364,1,E,Europe/Paris,airport,OurAirports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12582,14025,Porte de Saint-Cloud Station,Paris,France,\N,\N,48.838307,2.257508,120,1,E,\N,\N,\N
12583,14026,Pont de Sevres Station,Boulogne-Billancourt,France,\N,\N,48.829747,2.231035,113,1,E,\N,\N,\N
12584,14027,Porte Maillot Metro Station,Paris,France,\N,\N,48.878056,2.281944,133,1,E,\N,\N,\N
12585,14028,Gare de Denfert-Rochereau,Paris,France,\N,\N,48.833872,2.332616,205,1,E,\N,\N,\N



## Exporting data to S3 


In [17]:
BUCKET_OUT = "inesh"
FILE_KEY_OUT_S3 = "demo/airports_fr.csv"
FILE_PATH_OUT_S3 = BUCKET_OUT + "/" + FILE_KEY_OUT_S3

with fs.open(FILE_PATH_OUT_S3, 'w') as file_out:
    df_airports_fr.to_csv(file_out)