In [1]:
import pandas as pd
import requests
from selenium import webdriver
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv("seisintel_selected_surveys_OBN.csv")
def fetch_vessel_row(df, name, status):
    df_filter  = df[df["Vessel"].str.contains(name) & df["Complete"].str.contains(status)]
    return df_filter

def fetch_by_survey_name(df, name):
    df_filter  = df[df["Survey Name"].str.contains(name)]
    return df_filter

def fetch_by_survey_name_year(df, name,year):
    df_filter  = df[df["Survey Name"].str.contains(name) & df["Mobilisation Start"].str.contains(year)]
    return df_filter

def fetch_vessels_in_survey(df,type):
    df_filter  = df[df["Activity"].str.contains(type)]
    return df_filter

def fetch_vessel_info(df, survey_id):
    df_filter  = df[df["Survey Id"].str.contains(survey_id)]
    return df_filter

df.columns

Index(['Survey Id', 'Parent Id', 'Survey Name', 'Type', 'Activity', 'Company',
       'Survey Company', 'Vessel', 'Client', 'Country', 'Complete',
       'Mobilisation Location', 'Mobilisation Start', 'Deployment Start',
       'Production Start', 'Production End', 'Retrieval End',
       'Demobilisation End', 'Demobilisation Location', 'Number of Streamers',
       'Streamer Separation', 'Streamer Length', 'Sail Line Separation',
       'Report', 'Combo Report', 'Tides', 'Ocean Current', 'FFSQKM',
       'FFSQKM Method', 'Shot Patch SQKM', 'Shotpatch Method',
       'FF Minimum Water Depth (m)', 'FF Maximum Water Depth (m)', 'CentreLAT',
       'CentreLONG', 'Replay'],
      dtype='object')

In [3]:
# df_survey_info = fetch_by_survey_name_year(df,"Mumbai High OBN","2024")
# df_survey_info = fetch_by_survey_name(df,"KG-DWN-98/2 Cluster-2 OBN")
# df_survey_info = fetch_by_survey_name(df,"Stones 4D")
# df_survey_info = fetch_by_survey_name(df,"Edvard Grieg 4D")
# df_survey_info = fetch_by_survey_name(df,"Thunder Horse 4D OBN")
# df_survey_info = fetch_by_survey_name(df,"OML 130 Preowei OBN")
# df_survey_info = fetch_by_survey_name(df,"Engagement 6")
# df_survey_info = fetch_by_survey_name(df,"Inception")
df_survey_info = fetch_by_survey_name(df,"Jack")
# df_survey_info = fetch_by_survey_name(df,"Oseburg")
# df_survey_info = fetch_by_survey_name(df,"Green Canyon")
# df_survey_info = fetch_by_survey_name(df,"Kaikias")
# survey_id = list(df_survey_info["Survey Id"])[0]
# df_survey_info = fetch_by_survey_name(df,"Sepia")
# for index, row in df_survey_info.iterrows():
#     print(row)

In [4]:
df_node_vessels_in_survey = df_survey_info
vessel_names = list(df_node_vessels_in_survey['Vessel'])
df_node_vessels_in_survey['Vessel']

392        Fulmar Explorer
393    Harvey Intervention
394        Olympic Artemis
395    Harvey Intervention
Name: Vessel, dtype: object

In [5]:
survey_ids = list(df_node_vessels_in_survey["Survey Id"])
survey_ids

['23US_02765', '23US_02739', '23US_02820', '24US_02829']

In [6]:
start_location_of_each_vessel = list(df_node_vessels_in_survey["Mobilisation Location"])
activity_of_the_vessel = list(df_node_vessels_in_survey["Activity"])
start_points_per_vessel = {}
survey_id_per_vessel = {}
activity_per_vessel = {}
i = 0
for vessel in vessel_names:
    start_points_per_vessel[vessel] = start_location_of_each_vessel[i]
    survey_id_per_vessel[vessel] = survey_ids[i]
    activity_per_vessel[survey_ids[i]] = activity_of_the_vessel[i]
    i+=1
start_points_per_vessel

{'Fulmar Explorer': 'Galveston -  UNITED STATES',
 'Harvey Intervention': 'Port Fourchon -  UNITED STATES',
 'Olympic Artemis': 'Galveston -  UNITED STATES'}

In [7]:
survey_id_per_vessel

{'Fulmar Explorer': '23US_02765',
 'Harvey Intervention': '24US_02829',
 'Olympic Artemis': '23US_02820'}

In [8]:
activity_per_vessel

{'23US_02765': 'Source',
 '23US_02739': 'Node Layout',
 '23US_02820': 'Node Layout',
 '24US_02829': 'Node Retrieval'}

In [9]:
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [10]:
# This is just so that all the downloads are moved into a specific folder
import os
import glob
import shutil
import time
 

In [11]:
# if there are issues with the chromedriver - update it with the latest compatible version of your chrome app
og_url = "https://portal.seisintel.com/#/"
login_url = og_url+"login/"
path_url = og_url+"path/"
# "https://portal.seisintel.com/#/path/"
cService  = webdriver.ChromeService(executable_path=".\\chromedriver.exe") 
driver = webdriver.Chrome(service = cService)
driver.get(login_url) 
wait = WebDriverWait(driver, 100)
# login to the seisintel page
wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Login']"))).send_keys("rohith.nomula@tgs.com")
wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Password']"))).send_keys("50fd47a7")
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@ng-click='vm.doLogin()']"))).click()
# if this part is done - write error checking here
print("Logged in")
# After login - that part is only once
# Now we try to access different vessels using a unqiue survey_id and download the data 
# This can be more effecient if we search from a complete project and find all the download links then and there 
# So we can figure out how long does it take to process a specific project
for survey_id in survey_ids:
    # try:
    driver.get(path_url+survey_id) 
    wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Download']"))).click()
    # We might need csv as well as geojson data - need to explore a bit into KML data and see if has any additional info
    wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='CSV']"))).click()
    time.sleep(2)
    # The plot is very bad - we can make it look good but we have geojson for this exact same reason
    wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Download']"))).click()
    wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='GeoJSON']"))).click()
    time.sleep(2)
    # except:
    #     move_all_files_to_download_folder()
    

Logged in
