## Techport API

In [1]:
import pandas as pd
import requests

sinceDate = '2024-04-22'

resProjects = requests.get('https://techport.nasa.gov/api/projects?updatedSince='+sinceDate).json()
dfProjectsRes = pd.DataFrame(resProjects['projects'])
dfProjectsRes.head()

Unnamed: 0,acronym,projectId,title,website,lastUpdated
0,,96770,,,2024-4-23
1,,154830,,,2024-4-22
2,,157154,,,2024-4-22
3,,157153,,,2024-4-22
4,,157155,,,2024-4-22


In [2]:
#esta tarea tardara un poco...

dfProjects = pd.DataFrame(
    columns=[
        'project_id', 
        'acronym', 
        'title', 
        'description', 
        'start_year', 
        'start_month', 
        'end_year', 
        'end_month', 
        'status_description',
        'website',
        'last_updated',
        'release_status_string',
        'organization_id',
        'organization_name',
        'organization_type_pretty'
    ]
)

organizations = {}
        
indexProject = 0

for index, row in dfProjectsRes.iterrows():

    resProject = requests.get('https://techport.nasa.gov/api/projects/'+str(row['projectId'])).json()

    organizationId = resProject['project']['responsibleMd']['organizationId']
    organizationFound = organizations.get(organizationId)

    #add organization detail if not exists
    if organizationFound is None:
        resOrganization = requests.get('https://techport.nasa.gov/api/organizations/'+str(organizationId)).json()

        #append organization detail to dfOrganization
        organizations[organizationId] = {
            'organization_id': organizationId,
            'organization_name': resOrganization['organization']['organizationName'],
            'organization_type_pretty': resOrganization['organization']['organizationTypePretty']
        }

        organizationFound = organizations.get(organizationId)

    #add project to dfProjects
    dfProjects.loc[indexProject] = [
        resProject['project']['projectId'],
        resProject['project']['acronym'],
        resProject['project']['title'],
        resProject['project']['description'],
        resProject['project']['startYear'],
        resProject['project']['startMonth'],
        resProject['project']['endYear'],
        resProject['project']['endMonth'],
        resProject['project']['statusDescription'],
        resProject['project']['website'],
        resProject['project']['lastUpdated'],
        resProject['project']['releaseStatusString'],
        organizationFound['organization_id'],
        organizationFound['organization_name'],
        organizationFound['organization_type_pretty']
    ]

    indexProject += 1
        

In [3]:
dfProjects.drop_duplicates(subset=['project_id'], keep='last')
dfProjects.head()

Unnamed: 0,project_id,acronym,title,description,start_year,start_month,end_year,end_month,status_description,website,last_updated,release_status_string,organization_id,organization_name,organization_type_pretty
0,96770,CASALS,"SWaP-efficient, fast-wavelength-steering and t...",<p>We propose to integrate next-generation lid...,2018,10,2024,9,Active,https://etd.gsfc.nasa.gov/,2024-4-23,Released,4910,Mission Support Directorate,NASA Mission Directorate
1,154830,,Radiation Tolerant Displays,<p>The Radiation Tolerant Displays project aim...,2022,10,2025,9,Active,,2024-4-22,Released,9525,Exploration Systems Development Mission Direct...,NASA Mission Directorate
2,157154,,CC23 Positive Connections,"Under ""Starburst: A Revolutionary Under-Constr...",2023,7,2023,11,Completed,,2024-4-22,Released,4875,Space Technology Mission Directorate,NASA Mission Directorate
3,157153,,CC22 Rid the Rocket Challenge: Phase 1 Ideation,<p>The Rid the Rocket Challenge invites indivi...,2022,9,2023,5,Completed,,2024-4-22,Released,4875,Space Technology Mission Directorate,NASA Mission Directorate
4,157155,,CC23 Super-Heavy Logistics Transport for the M...,<p>This NASA challenge is to develop a super h...,2023,8,2023,10,Completed,,2024-4-22,Released,4875,Space Technology Mission Directorate,NASA Mission Directorate


## Redshift

In [4]:
import psycopg2
import os
from dotenv import load_dotenv

load_dotenv()

REDSHIFT_HOST =  os.getenv('REDSHIFT_HOST')
REDSHIFT_PORT = os.getenv('REDSHIFT_PORT')
REDSHIFT_DATABASE = os.getenv('REDSHIFT_DATABASE')
REDSHIFT_USER = os.getenv('REDSHIFT_USER')
REDSHIFT_PASSWORD = os.getenv('REDSHIFT_PASSWORD')

try:
    conn = psycopg2.connect(
        host=REDSHIFT_HOST,
        dbname=REDSHIFT_DATABASE,
        user=REDSHIFT_USER,
        password=REDSHIFT_PASSWORD,
        port=REDSHIFT_PORT
    )

    print("- conectado a redshift")
except Exception as e:
    print(e)
    print("- no se pudo conectar a redshift")

- conectado a redshift


In [5]:
#drop tables
with conn.cursor() as cursor:
    cursor.execute("DROP TABLE IF EXISTS projects")
    conn.commit()

In [6]:
#create tables
with conn.cursor() as cursor:
    tblProjects = """
    CREATE TABLE IF NOT EXISTS projects (
        project_id INT NOT NULL,
        acronym VARCHAR(50),
        title VARCHAR(250),
        description VARCHAR(MAX),
        start_year VARCHAR(4),
        start_month VARCHAR(2),
        end_year VARCHAR(4),
        end_month VARCHAR(2),
        status_description VARCHAR(20),
        website VARCHAR(250),
        last_updated DATE,
        release_status_string VARCHAR(20),
        organization_id INT,
        organization_name VARCHAR(250),
        organization_type_pretty VARCHAR(250),
        etl_load DATETIME DEFAULT CURRENT_TIMESTAMP
    )
    """

    cursor.execute(tblProjects)
    conn.commit()

In [7]:
from psycopg2.extras import execute_values

# insert data in tbl projects
with conn.cursor() as cursor:
    execute_values(
        cursor,
        """
        INSERT INTO projects (
            project_id,
            acronym,
            title,
            description,
            start_year,
            start_month,
            end_year,
            end_month,
            status_description,
            website,
            last_updated,
            release_status_string,
            organization_id,
            organization_name,
            organization_type_pretty
        ) VALUES %s
        """,
        [tuple(row) for row in dfProjects.values],
        page_size=len(dfProjects)
    )
    
    conn.commit()

In [8]:
cursor.close()
conn.close()