In [2]:
import pandas as pd
from bs4 import BeautifulSoup as bts
import requests as req
import asyncio
from tqdm import tqdm
from typing import List, Tuple, Union
import re

In [3]:
with open('html/intro_naves.html','r',encoding='UTF-8') as file:
    contenido=file.read()
soup=bts(contenido,'html.parser')
href=[a.get('href').replace('..','https://space.skyrocket.de') for a in soup.find_all('table')[0].find_all('a') if int(a.text) > 1950 and int(a.text) < 2023]

In [4]:
def asincrono(funcion):
    ''' esto es un decorador que permite colocarlo encima de una funcion para poder ejecutar otras mientras se espera el resultado'''
    def eventos(*args, **kwargs):
        return asyncio.get_event_loop().run_in_executor(None, funcion, *args, **kwargs)
    
    return eventos

In [5]:
def DataFrame_table(url) -> pd.DataFrame:
    '''ulr contenido de la web que se va a transformar:
    DataFrame_table(url)
    ulr-> el contenido que va a leer y transformar
    return -> dataframe'''
    response=req.get(url)
    html_content=response.text
    soup=bts(html_content,'html.parser')

    table=soup.find('table',{'class':'data'})
    content_table=[content.text for content in table.find_all('td')]
    keys=[e.text for e in table.find('tr',{'class':'bigheadline'}) if e.text != '\n']
    values=[content_table[e:e+6] for e in range(0,len(content_table),6)]
    
    ref = soup.find('div', {'class': 'container'}).text.split('\n')
    corte1=soup.find('div',{'class': 'container'}).text.split('\n').index('Launch sites:')+1
    corte2=soup.find('div', {'class': 'container'}).text.split('\n').index('\t\r')-1
    colum_ref={e.split('=')[0].strip() : e.split('=')[-1].strip() for e in ref[corte1:corte2]}

    table_dic={key : [] for key in keys}


    for sublist in values:
        for a in range(6):
            table_dic[keys[a]].append(sublist[a])

    df=pd.DataFrame(table_dic)

    def fail(x) -> str:
        '''Simplifica los valores de las descripciones dadas como : fallo ,exito , tipo de vuelo y dudoso
        x -> es la str que entra para comprobar los parametros'''
        phrase=x.lower()

        if 'fail' in phrase:
            x='Failed'
            return x
        elif any(word in phrase for word in ['explo','wrong','damaged','abort','not deployed','error','Lower']):
            return 'Failed'
        elif any(word in phrase for word in ['1st','first flight']):
            return 'First flight'
        elif 'last flight' in phrase:
            return'Last flight'
        elif 'note' in phrase:
            return x
        else:
            return 'Pass'
    
    def complate(x) -> str:
        '''sustituye valores vacios como exitosos
        x -> es la str que se lee'''
        if x =='':
            x='Pass'
            return x
        else:
            return x
        
    def clean_df(df) -> pd.DataFrame:  
        ''' se encarga de ordenar y limpiar el df
        df -> dataframe que desea pasar por la limpieza'''
        df['Date']=df['Date'].apply(lambda x : x.replace('?','') if '?' in x else x)
        df['Date']=df['Date'].astype('datetime64[ns]')
        df['Remark']=df['Remark'].apply(complate)
        df['Payload(s)']=df['Payload(s)'].str.replace('\n',',')
        df['Remark']=df['Remark'].apply(fail)

        Succes=df['Remark']

        df_change=df.drop(columns='Remark')
        df_change['Site_inf']=df_change['Site'].apply(lambda x : x.split()[-1])
        df_change['Site']=df_change['Site'].apply(lambda x : x.split()[0])
        df_change['complate_site']=df_change['Site'].map(colum_ref)
        
        complete_df=pd.concat([df_change,Succes],axis=1)

        return complete_df
    
    df_clean=clean_df(df)

    return df_clean

@asincrono
def data(href) -> pd.DataFrame:
    '''a partir de una lista de url se crea un df :

    misiones=pd.DataFrame()

    for i in range(len(href)): -> lee la longitud de la lista para extraer df de manera individual 
    
        try:
            misiones=pd.concat([misiones,DataFrame_table(href[i])],axis=0) ->concatena el df con el principal para crear un dataframe unico 

        except Exception as e: -> en caso de querer sasber el fallo
            print(f'{e}:{i}')
            continue
        '''
    misiones=pd.DataFrame()
    for i in range(len(href)):
        try:
            misiones=pd.concat([misiones,DataFrame_table(href[i])],axis=0)
        except Exception as e:
            print(f'{e}:{i}')
            continue
    return misiones

In [6]:
def get_url(url)->list:
    '''entra una lista de urls/href que buscara las urls internas de lanzamientos
    devuelve una lista de urls de una pagina
    get_url() -> ejemplo : 
    1) insertar una unica url_get('https://aqui.web/pones/tu/url')
    return -> lista de urls que hay en una pagina web
    '''
    response=req.get(url)
    html_content=response.text
    soup=bts(html_content,'html.parser')

    table=soup.find('table',{'class':'data'})
    content_table=[content for content in table.find_all('td')]
    values=[content_table[e:e+6] for e in range(0,len(content_table),6)]
    filtered_values = [[td for td in sublist if any(a in td for a in td if a.name == 'a')] for sublist in values]
    launch_veicles=[]
    
    for a in filtered_values:
        url=a[-1].find('a').get('href').replace('..','https://space.skyrocket.de')
        launch_veicles.append(url)
    return launch_veicles

@asincrono
def url_launches(url) -> list:
    '''entra una lista de urls/href que buscara las urls internas de lanzamientos
    devuelve una lista dee urls de todas las paginas
    url_launches(url) -> se usa una lista de urls para podedr ejecutar la funcioin : get_url
    y lo guarda en una lista de valores unicos

    urls=[]  -> valores unicos 
    
    for i in url:
    
        list_url_page=get_url(i)
        for new_url in list_url_page:
            if new_url not in urls:
                urls.append(new_url)

    return urls -> devuelve lista de valores unicos'''
    urls=[]
    for i in url:
        list_url_page=get_url(i)
        for new_url in list_url_page:
            if new_url not in urls:
                urls.append(new_url)
    return urls

In [7]:
def rockets(url) -> pd.DataFrame:

    response=req.get(url)
    html_content=response.text
    soup=bts(html_content,'html.parser')

    try:
        '''hay dos tipos de tablas que me interesan porlo que si da error en una es la otra'''
        info=soup.find('table',{'id':'rocdata'}).find_all('tr')
        keys=[document.text for document in info[0] if document.text != '\n']

        try:
            '''hay dos tipos de diccionarios y para evitar errores pongo ambas cualidades'''
            values=[document.text for document in info[-1] if document.text != '\n']
            table_rocket={keys[e] : [values[e]] for e in range(len(keys))}

            df=pd.DataFrame(table_rocket)
            colum_launch=pd.DataFrame({'Launch Vehicle':[url.split('/')[-1].split('.')[0]]})
            final_df=pd.concat([colum_launch,df],axis=1)
        except:
            values=[e.strip() for e in [e.text for e in info[-1] if e.text!='\n'][0].split('/')]
            table_rocket={keys[e] : [values[e]] for e in range(len(keys))}

            df=pd.DataFrame(table_rocket)
            colum_launch=pd.DataFrame({'Launch Vehicle':[url.split('/')[-1].split('.')[0]]})
            final_df=pd.concat([colum_launch,df],axis=1)
        
    except:
        
        info=soup.find('table',{'id':'rocperf'}).find_all('tr')
        keys=[document.text for document in info[0] if document.text != '\n']
        values=[document.text for document in info[-1] if document.text != '\n']
        table_rocket={keys[e] : [values[e]] for e in range(len(keys))}

        df=pd.DataFrame(table_rocket)
        colum_launch=pd.DataFrame({'Launch Vehicle':[url.split('/')[-1].split('.')[0]]})
        final_df=pd.concat([colum_launch,df],axis=1)
        
    return final_df

def rockets_table(href) -> pd.DataFrame:

    final_df=pd.DataFrame()
    other_df=pd.DataFrame()
    for url in href:
        try:
            df=rockets(url)
            if 'LEO' not in list(df.columns):
                final_df=pd.concat([final_df,df],axis=0)
            else:
                other_df=pd.concat([other_df,df],axis=0)
        except:
            continue
    
    other_df=other_df[other_df.columns.to_list()[:3]]
    final_df=final_df[final_df.columns.to_list()[:-4]]
    
    return final_df,other_df

def lists_of_launches(href) -> tuple:

        '''
        lists_of_launches(href) => aqui se aplica la funcion: result1 = lists_of_launches('launch1', 'launch2', 'launch3')
        o launches = ['launch4', 'launch5', 'launch6']
        return => devuelve dos argumentos'''
        
        response = req.get(href)
        response_html=response.text
        soup = bts(response_html,'html.parser')
        table = soup.find('div',{'class':'llist'}).find_all('pre')
        clave_html = [e.find_all('a') for e in table if e.find_all('a')][0]
        keys = [e.text for e in clave_html]
        dicio_clave = {e.text:e.get('href').replace('..','https://space.skyrocket.de') for e in clave_html}

        lista_tablas = []
        lista_url_sat = [url for key,url in dicio_clave.items()]

        for content in table:
                for line in content.text.splitlines():
                        if any(key in line for key in keys):
                                lista_tablas.append(line)

        result=(lista_tablas,lista_url_sat)

        return result

def general_fun_of_lists(lists,fun) -> Union[list, Tuple]:
    '''
    general_fun_of_lists (lists,fun) => es una funcion que se encarga de iterar lista en las funciones que uno necesita
    litst => [una,lista,de,elementos,que,desea,trabajar]
    fun => la funcion que se va ha aplicar en cada elemento 
    return => depende de los resultados de la funcion que se use:

    1) da una lista unica => return solucion
    2) da una lista de dos resultados => return (solucion,solucion2)

    opcional: errores

    0) en caso de que haya errores esta funcion te enseña donde ha pasado, y el error:
       por eso en este caso puede ocurrir dos cosas:
       1)si la lista unica tiene errores, te devolvera una tupla donde el primero es la solucion y la segundd los errores => return (solucion, errores)
       2)si es una tupla de dos soluciones, te devuelve las soluciones mas el error => return (solucion,solucion2,errores)

    ejemplo:

    def encontrar_x() -> lista          fun de ejemplo

    general_fun_of_lists([una,lista,de,elementos,que,desea,trabajar],encontrar_x)
    '''
    sol = []
    sol_2 = []
    error = []

    for element in lists:

        try:
            result = fun(element)
            if len (result) == 1:
                sol.extend(result)
            elif len (result) == 2:
                sol.extend(result[0])
                sol_2.extend(result[-1])
        except Exception as e:
            error.append(f'{e}:{element}')

    if sol_2:
        if error:
            result = (sol,sol_2,error)
            return result
        else:
            result = (sol,sol_2)
            return result
    elif sol:
        if error:
            result = (sol,error)
            return result
        else:
            return sol


In [8]:
def visualizar_fallos(urls,fun):
    '''
    uso en caso de querer descubrir donde esta fallando la funcion:
    visualizar_fallos(urls,fun)
    urls -> urls que necesita leer la funcion para poner aprueba la peticion
    fun -> es la funcion que quieres testear para su funcionalidad
    return -> devuelve uns lista de errores
    '''
    errors=[]

    for i in range(len(urls)):
        try:
            fun(urls[i])
        except Exception as e:
            errors.append(f'{e}:{i}')
            
    
    return errors

In [9]:
help(visualizar_fallos)

Help on function visualizar_fallos in module __main__:

visualizar_fallos(urls, fun)
    uso en caso de querer descubrir donde esta fallando la funcion:
    visualizar_fallos(urls,fun)
    urls -> urls que necesita leer la funcion para poner aprueba la peticion
    fun -> es la funcion que quieres testear para su funcionalidad
    return -> devuelve uns lista de errores



In [10]:
misiones=data(href)
launches=url_launches(href)

In [12]:
misiones=misiones.result()
launches_list=launches.result()

In [13]:
rockes_1,rockes_2=rockets_table(launches_list)

In [14]:
misiones

Unnamed: 0,ID,Date,Payload(s),Launch Vehicle,Site,Site_inf,complate_site,Remark
0,1957 α (001),1957-04-10,Sputnik 1 (PS-1 #1),Sputnik (1),Ba,LC-1/5,"Baikonur (Tyuratam, NIIP-5, GIK-5), Tyuratam, ...",Pass
1,1957 β (002),1957-03-11,Sputnik 2 (PS-2 #1),Sputnik (1),Ba,LC-1/5,"Baikonur (Tyuratam, NIIP-5, GIK-5), Tyuratam, ...",Pass
2,1957-F01,1957-06-12,Vanguard (Test Satellite F),Vanguard,CC,LC-18A,"Cape Canaveral Air Force Station, Eastern Test...",Failed
0,1958 α (001),1958-01-02,Explorer 1,Juno-1,CC,LC-26A,"Cape Canaveral Air Force Station, Eastern Test...",Pass
1,1958-F01,1958-05-02,Vanguard (Test Satellite G),Vanguard,CC,LC-18A,"Cape Canaveral Air Force Station, Eastern Test...",Failed
...,...,...,...,...,...,...,...,...
181,2022-F07,2022-12-21,"Pléiades-Neo 5 (VHR-2020 3),Pléiades-Neo 6 (VH...",Vega-C,Ko,ELV,"Centre Spatial Guyanais (CSG), Kourou, French ...",Failed
182,2022-176,2022-12-27,Gaofen 11-04 (GF 11-04),CZ-4B,TY,LC-9,"Taiyuan Satellite Launch Center (TSLC), Wuzhai...",Pass
183,2022-177,2022-12-28,"Starlink v1.5 G5-1-1 (Starlink 5382),Starlink ...",Falcon-9 v1.2 (Block 5),CC,SLC-40,"Cape Canaveral Air Force Station, Eastern Test...",Pass
184,2022-178,2022-12-29,SY 10-02,CZ-3B/G2(2),Xi,LC-2,"Xichang Space Center (Songlin), Sichuan, China",Pass


In [15]:
rockes_1

Unnamed: 0,Launch Vehicle,Version,Strap-On,Stage 1,Stage 2,Stage 3
0,sputnik-1,Sputnik (8K71PS),"Blok-B,V,G,D / 4 × RD-107-8D74PS",Blok-A / RD-108-8D75PS,,
0,vanguard,Vanguard,,X-405,AJ-10-37,GRC 133-KS-2800
0,juno-1,Juno-1,,Redstone / A-7,11 × Baby Sergeant,3 × Baby Sergeant
0,sputnik-2,Sputnik (8A91),"Blok-B,V,G,D / 4 × RD-107-8D76",Blok-A / RD-108-8D77,,
0,pilot,Pilot (NOTS-EV1H),,2 × 2 × HOTROC,X-241,NOTS-100
...,...,...,...,...,...,...
0,sslv,SSLV,,S-85,S-7,S-4
0,falcon-9-heavy_b5_px,Falcon-Heavy (Block 5) (px),"2 × Stage 1 (str., reusable) / 9 × Merlin-1D",Stage 1 (str.) / 9 × Merlin-1D (upr.+),Stage 2 (str.) / Merlin-1D-Vac (upr.+),
0,sls_bl1_icps,SLS (Block-1) iCPS,2 × RSRM-5,4 × RS-25D,iCPS / RL10B-2,
0,jielong-3,Jielong-3 (Smart Dragon-3),,?,?,?


In [16]:
rockes_2

Unnamed: 0,Launch Vehicle,Performance (kg),LEO
0,atlas-d_mercury,Atlas-D Mercury,1350
0,atlas-slv3,Atlas-SLV3,800
0,slv-3,SLV-3,40


In [17]:
resultado=general_fun_of_lists(launches_list,lists_of_launches)

In [18]:
lista_launches_trabajar,urls_sat,errores=resultado

In [19]:
clean=[[element for element in file.split('  ') if element != ''] for file in lista_launches_trabajar]

#RECONTRUCCION DE LOS DATOS 

In [20]:
lenght_of_info=set([len(e) for e in clean])
dicio_of_launches_info={e : [] for e in lenght_of_info}
for line in clean:
    dicio_of_launches_info[len(line)].append(line)
dicio_of_launches_info

dataframes=[e for e in lenght_of_info]
for element in lenght_of_info:
    dataframes[element-1]=pd.DataFrame(dicio_of_launches_info[element])

In [21]:
keys=[element.split('/')[-1].split('.')[0] for element in launches_list]

In [22]:
df_8=dataframes[8]
pattern = r'^\d{2}\.\d{2}\.\d{4}$'

In [281]:
df_8[4]=df_8[4].str.replace(' ','')

for column in df_8.columns:
    df_8[column]=df_8[column].str.strip()

for column in df_8.columns:
    df_8.loc[df_8[column].str.contains(pattern), 'date'] = df_8.loc[df_8[column].str.contains(pattern), column]

for column in df_8.columns:
    df_8.loc[df_8[column].str.lower().isin(keys), 'type'] = df_8.loc[df_8[column].str.lower().isin(keys), column]

for column in df_8.columns:  
    df_8.loc[df_8[column].str.contains(r'^[A-Z0-9-/]{2,}$') , 'serial'] = df_8.loc[df_8[column].str.contains(r'^[A-Z0-9-/]{2,}$') , column]

for column in df_8.columns: 
    df_8.loc[df_8[column].str.contains(r'^[A-Za-z0-9\\\-/]+ [A-Za-z0-9\\\-/]{4,}$'), 'LS'] =  df_8.loc[df_8[column].str.contains(r'^[A-Za-z0-9\\\-/]+ [A-Za-z0-9\\\-/]{4,}$')  , column]

df_8['payload']=df_8[8]


clean_8 = df_8[df_8.columns[-4:-1]]

In [282]:
df_8

Unnamed: 0,0,1,2,3,4,5,6,7,8,date,type,serial,LS,payload
0,12,7,7,ST-7,Scout-X1,WI LA-3,*,19.10.1961,P 21,19.10.1961,Scout-X1,ST-7,WI LA-3,P 21
1,15,9,1,ST-9,Scout-X2,WI LA-3,*,29.03.1962,P 21A,29.03.1962,Scout-X2,ST-9,WI LA-3,P 21A
2,26,19,4,116,Scout-X3,WI LA-3,*,22.05.1963,RFD 1,22.05.1963,Scout-X3,116,WI LA-3,RFD 1
3,35,28,5,124R,Scout-X4,WI LA-3A,*,20.07.1964,SERT 1,20.07.1964,Scout-X4,124R,WI LA-3A,SERT 1
4,83,76,24,166CR,Scout-B,WI LA-3A,*,20.09.1971,BIC,20.09.1971,Scout-B,BIC,WI LA-3A,BIC
5,102,95,10,193C,Scout-D1,WI LA-3A,*,18.06.1976,GP A (Gravity Probe 1),18.06.1976,Scout-D1,193C,WI LA-3A,GP A (Gravity Probe 1)
6,10,Ariane-5G,10,S,V-142/510,12.07.2001,Ko ELA-3,P,Artemis / BSat 2b,12.07.2001,Ariane-5G,V-142/510,Ko ELA-3,Artemis / BSat 2b
7,14,Ariane-5ECA,1,L,V-157/517,11.12.2002,Ko ELA-3,F,Hotbird 7 / Stentor / MFD A / MFD B,11.12.2002,Ariane-5ECA,V-157/517,Ko ELA-3,Hotbird 7 / Stentor / MFD A / MFD B
8,97,Ariane-5ECA,65,L,VA-241/5101,25.01.2018,Ko ELA-3,P,SES 14/GOLD / Al Yah 3,25.01.2018,Ariane-5ECA,VA-241/5101,Ko ELA-3,SES 14/GOLD / Al Yah 3
9,1,1,LauncherOne,R2,25.05.2020,Mo RW12/30,B-747-400,F,Starshine 4 / Intern-Sat,25.05.2020,LauncherOne,B-747-400,Mo RW12/30,Starshine 4 / Intern-Sat


In [283]:
clean_8

Unnamed: 0,type,serial,LS
0,Scout-X1,ST-7,WI LA-3
1,Scout-X2,ST-9,WI LA-3
2,Scout-X3,116,WI LA-3
3,Scout-X4,124R,WI LA-3A
4,Scout-B,BIC,WI LA-3A
5,Scout-D1,193C,WI LA-3A
6,Ariane-5G,V-142/510,Ko ELA-3
7,Ariane-5ECA,V-157/517,Ko ELA-3
8,Ariane-5ECA,VA-241/5101,Ko ELA-3
9,LauncherOne,B-747-400,Mo RW12/30


In [25]:
def buscador_links(nombre_lanzamiento):
    for e in launches_list:
        if nombre_lanzamiento in e:
            print(e)

In [26]:
def buscador_links_sat(nombre_lanzamiento):
    for e in urls_sat:
        if nombre_lanzamiento in e:
            print(e)

In [27]:
def date_pattern(str):
    pattern=r'\d{2}\.\d{2}\.\d{4}'
    condicion=re.search(pattern, str)
    if condicion:
        return str
    else:
        return ''

def date(str):
    pattern = r'\d{2}\.\d{2}\.\d{4}'
    if str == (pattern):
        return str

def clean_date(str):
    pattern = r'\d{2}\.\d{2}\.\d{4}'
    concidencia = re.findall(pattern,str)
    if concidencia:
        return concidencia[0]
    return str

def type_pattern(str):
    global keys
    if any(key in str.lower() for key in keys):
        return str
    elif str.lower() in keys:
        return str
    elif any (key[:5] in str.lower() for key in keys if len(key)>13):
        return str
    elif any (key[:5] in str.lower() for key in keys if len(key)>6):
        return str
    elif any (key[:3] in str.lower() for key in keys if len(key)>6):
        return str
    return ''

def serial_patern(str):
    pattern_serial = r'^(?![0-9]{0,2}$)(?![A-Z]{4}$)(?![A-Z]{1}[0-9]{1}[A-Z]{1}[0-9]{1}$)([A-Z0-9-/]{3,}|[A-Z0-9-\s)(]{2,}\([A-Z]{2}\)|[A-Za-z]{2}[0-9]{5}\-[0-9]{3}|[A-Z0-9/.]+\.[0-9/.]|[A-Z0-9]{2})$'
    coinci= re.search(pattern_serial , str)
    if coinci:
        return coinci[0]
    return ''
def serial_patern_2(str):
    pattern_serial = r'^(?![0-9]{0,2}$)(?![A-Z]{4}$)([A-Z0-9-/]{3,}|[A-Z0-9-\s)(]{2,}\([A-Z]{2}\)|[A-Za-z]{2}[0-9]{5}\-[0-9]{3}|[A-Z0-9/.]+\.[0-9/.])$'
    coinci= re.search(pattern_serial , str)
    if coinci:
        return coinci[0]
    return ''

def ls_pattern(str):
    pattern_ls= r'^(?:[A-Za-z]{2}\s[A-Za-z0-9/-]+|[A-Za-z]{2}|[A-Za-z]{0,3}\s[A-Z0-9-]+|[A-Za-z]{3}\s[A-Z0-9-]+|[A-Z]{2}\s[A-Z]\-[A-Za-z0-9/-]|[A-Za-z]{2}\s[A-Z]{2}\-[0-9]{1}|[A-Z]{2}\s[A-Z]{0,4}\-[0-9]{2}[A-Z])$'
    coincidencia = re.search( pattern_ls, str)
    if coincidencia:
        return coincidencia[0]
    return ''

def fail(text):
    pattern = r'^(F\s|\sF)|(P\s|\sP)$'  # Patrón que busca "F" al inicio o al final con un espacio antes o después
    coin = re.search(pattern, text)
    if coin:
        return 'F'
    elif text =='F' or text =='P':
        return 'F'
    return ''

def aplicador_columnas_fun(nombre_columna,prueba,fun,lenght,limpiar = False):
    if limpiar == False :
        for i in range(lenght[0],lenght[1]):
            prueba[f'new_{nombre_columna}_{i}'] = prueba[i].apply(fun)
        return prueba
    else:
        for i in range(lenght[0],lenght[1]):
            prueba[f'new_{nombre_columna}_{i}'] = prueba[f'new_{nombre_columna}_{i}'].apply(fun)
        return prueba
    
def borrar_columnas(nombre_columna,prueba,lenght):
        for i in range(lenght[0],lenght[1]):
            prueba = prueba.drop(columns=[f'new_{nombre_columna}_{i}'])
        return prueba


In [28]:
df_7 = dataframes[7]

In [29]:
pattern = r'^(?:\d{2}\.\d{2}\.\d{4}(?: [A-Z])?|(?:[A-Z] )?\d{2}\.\d{2}\.\d{4}|(?:\d{2}\.\d{2}\.\d{4}\.)|\d{2}\.\d{2}\.\d{4})(?: [a-z])?(?: \.)?$'
pattern_serial = r'^(?![0-9]{0,2}$)([A-Z0-9-/]{3,}|[A-Z0-9-\s)(]{2,}\([A-Z]{2}\)|[A-Za-z]{2}[0-9]{5}\-[0-9]{3}|[A-Z0-9/.]+\.[0-9/.])$'
pattern_ls= r'^(?:[A-Z]{2}|[A-Za-z]{2}\s[A-Za-z0-9-/]+|[A-Z]{2})$'

In [30]:
pru = df_7.copy()

In [31]:
def apply_logic(row):
    if row[7] != '':
        return row[7]
    else:
        return row[6]

In [32]:
for column in pru.columns:
    pru[column]=pru[column].str.strip()

pru = aplicador_columnas_fun('date',pru,date_pattern,[2,7])
pru = aplicador_columnas_fun('date',pru,clean_date,[2,7],True)

patron_dia=r'\d{2}\.\d{2}\.\d{4}'

for column in range(2,7):  
    pru.loc[pru[f'new_date_{column}'].str.contains(patron_dia) , 'date'] = pru.loc[pru[f'new_date_{column}'].str.contains(patron_dia) , f'new_date_{column}']

pru=borrar_columnas('date',pru,[2,7])

pru = aplicador_columnas_fun('type',pru,type_pattern,[1,6])

for e in range(1, 6):
    mask = pru[f'new_type_{e}'] != ''
    pru.loc[mask, 'type'] = pru.loc[mask, f'new_type_{e}']

pru=borrar_columnas('type',pru,[1,6])

for column in range(2,6):
    pru[column]=pru[column].str.strip()

pru[4] = pru[4].str.replace(' ','')

pru= aplicador_columnas_fun('serial',pru,serial_patern_2,[1,7])

pattern_serial =  r'^(?![0-9]{0,2}$)(?![A-Z]{4}$)([A-Z0-9-/]{3,}|[A-Z0-9-\s)(]{2,}\([A-Z]{2}\)|[A-Za-z]{2}[0-9]{5}\-[0-9]{3}|[A-Z0-9/.]+\.[0-9/.])$'

for column in range(1,7):  
        pru.loc[ pru[f'new_serial_{column}'].str.contains(pattern_serial) , 'serial'] =  pru.loc[ pru[f'new_serial_{column}'].str.contains(pattern_serial) , f'new_serial_{column}']


pru=borrar_columnas('serial',pru,[1,7])

pru=aplicador_columnas_fun('LS',pru,ls_pattern,[1,7])

patron_ls = r'^(?:[A-Za-z]{2}\s[A-Za-z0-9/-]+|[A-Za-z]{2}|[A-Za-z]{0,3}\s[A-Z0-9-]+|[A-Za-z]{3}\s[A-Z0-9-]+|[A-Z]{2}\s[A-Z]\-[A-Za-z0-9/-])$'

for column in range(1,7):  
        pru.loc[ pru[f'new_LS_{column}'].str.contains(patron_ls) , 'LS'] =  pru.loc[ pru[f'new_LS_{column}'].str.contains(patron_ls) , f'new_LS_{column}']

pru=borrar_columnas('LS',pru,[1,7])

pru['payload'] = pru.apply(apply_logic, axis=1)

  return func(self, *args, **kwargs)


In [33]:
pru

Unnamed: 0,0,1,2,3,4,5,6,7,date,type,serial,LS,payload
0,38,2,Juno-1,RS-26 (UV),05.03.1958,F,CC LC-26A,Explorer 2,05.03.1958,Juno-1,RS-26 (UV),CC LC-26A,Explorer 2
1,47,5,Juno-1,RS-47 (TI),24.08.1958,F,CC LC-5,Explorer 5,24.08.1958,Juno-1,RS-47 (TI),CC LC-5,Explorer 5
2,1,1,Vostok-L,B1-3,23.09.1958,F,Ba LC-1/5,Luna (1a) (Ye-1 1),23.09.1958,Vostok-L,B1-3,Ba LC-1/5,Luna (1a) (Ye-1 1)
3,2,2,Vostok-L,B1-4,11.10.1958,F,Ba LC-1/5,Luna (1b) (Ye-1 2),11.10.1958,Vostok-L,B1-4,Ba LC-1/5,Luna (1b) (Ye-1 2)
4,3,3,Vostok-L,B1-5,04.12.1958,F,Ba LC-1/5,Luna (1c) (Ye-1 3),04.12.1958,Vostok-L,B1-5,Ba LC-1/5,Luna (1c) (Ye-1 3)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
757,9,5,Astra Rocket-3.3,LV0010,12.06.2022,F,CC SLC-46,TROPICS 02 / TROPICS 04,12.06.2022,Astra Rocket-3.3,LV0010,CC SLC-46,TROPICS 02 / TROPICS 04
758,1,1,Firefly-Alpha,FLTA001,03.09.2021,Va SLC-2W,F,Serenity / Hiapo / BSS 1 / FossaSat 2 / FossaS...,03.09.2021,Firefly-Alpha,FLTA001,Va SLC-2W,Serenity / Hiapo / BSS 1 / FossaSat 2 / FossaS...
759,2,2,Firefly-Alpha,FLTA002,01.10.2022,Va SLC-2W,p,"Serenity (2) / TechEdSat 15 / GENESIS G, J / Q...",01.10.2022,Firefly-Alpha,FLTA002,Va SLC-2W,"Serenity (2) / TechEdSat 15 / GENESIS G, J / Q..."
760,91,1,Atlas-5(511)²,AV-084,5S,CC SLC-41,21.01.2022,GSSAP 5 (USA 324) / GSSAP 6 (USA 325),21.01.2022,Atlas-5(511)²,AV-084,CC SLC-41,GSSAP 5 (USA 324) / GSSAP 6 (USA 325)


In [284]:
clean_7 = pru[pru.columns.tolist()[-4:-1]]

In [285]:
clean_7

Unnamed: 0,type,serial,LS
0,Juno-1,RS-26 (UV),CC LC-26A
1,Juno-1,RS-47 (TI),CC LC-5
2,Vostok-L,B1-3,Ba LC-1/5
3,Vostok-L,B1-4,Ba LC-1/5
4,Vostok-L,B1-5,Ba LC-1/5
...,...,...,...
757,Astra Rocket-3.3,LV0010,CC SLC-46
758,Firefly-Alpha,FLTA001,Va SLC-2W
759,Firefly-Alpha,FLTA002,Va SLC-2W
760,Atlas-5(511)²,AV-084,CC SLC-41


In [36]:
df_6=dataframes[6]

In [37]:
prueba=df_6.copy()

In [38]:
def serial_patern_0(str):
    pattern = r'^(?![a-z]{2,}$)(?![A-Z]{2}\-[0-9A-Z]{2}\/[A-Z]{0,2}$)(?!\d{2}\.\d{2}\.\d{4})([A-Z]{0,3}\-[0-9]|[0-9]{3}|[A-Za-z]{0,3}\-[0-9A-Z-/]|[A-Za-z]{0,2}[0-9]{0,5}\-[0-9]{0,3}|[0-9A-Z]{2}\-[A-Z0-9]{0,3}|[A-Z0-9/.]+\.[0-9/.])'
    coin = re.search(pattern, str)
    if coin:
        return str
    return ''

In [39]:
patron_dia=r'\d{2}\.\d{2}\.\d{4}'

sol=aplicador_columnas_fun('date',prueba,date_pattern,[2,6],False)
sol=aplicador_columnas_fun('date',prueba,clean_date,[2,6],True)
for column in range(2,6):  
        prueba.loc[ prueba[f'new_date_{column}'].str.contains(patron_dia) , 'date'] =  prueba.loc[ prueba[f'new_date_{column}'].str.contains(patron_dia) , f'new_date_{column}']
sol=borrar_columnas('date',prueba,[2,6])

sol_1=aplicador_columnas_fun('type',sol,type_pattern,[2,6])

for e in range(2, 6):
    mask = sol_1[f'new_type_{e}'] != ''
    sol_1.loc[mask, 'type'] = sol_1.loc[mask, f'new_type_{e}']


sol_1=borrar_columnas('type',sol_1,[2,6])

for column in range(1,6):
    sol_1[column]=sol_1[column].str.strip()

sol_1= aplicador_columnas_fun('serial',sol_1,serial_patern_0,[1,4])

pattern_serial = r'^(?![a-z]{2,}$)(?![A-Z]{2}\-[0-9A-Z]{2}\/[A-Z]{0,2}$)([A-Z]{0,3}\-[0-9]|[0-9]{3}|[A-Za-z]{0,3}\-[0-9A-Z-/]|[A-Za-z]{0,2}[0-9]{0,5}\-[0-9]{0,3}|[0-9A-Z]{2}\-[A-Z0-9]{0,3}|[A-Z0-9/.]+\.[0-9/.])'

for column in range(1,4):  
        sol_1.loc[ sol_1[f'new_serial_{column}'].str.contains(pattern_serial) , 'serial'] =  sol_1.loc[ sol_1[f'new_serial_{column}'].str.contains(pattern_serial) , f'new_serial_{column}']

sol_1=borrar_columnas('serial',sol_1,[1,4])

sol_1=aplicador_columnas_fun('LS',sol_1,ls_pattern,[2,6])

pattern_ls = r'^(?:[A-Za-z]{2}\s[A-Za-z0-9/-]+|[A-Za-z]{2}|[A-Za-z]{0,3}\s[A-Z0-9-]+|[A-Za-z]{3}\s[A-Z0-9-]+|[A-Z]{2}\s[A-Z]\-[A-Za-z0-9/-]|[A-Za-z]{2}\s[A-Z]{2}\-[0-9]{1}|[A-Z]{2}\s[A-Z]{0,4}\-[0-9]{2}[A-Z])$'

for column in range(2, 6):
    sol_1.loc[sol_1[f'new_LS_{column}'].str.contains(pattern_ls), 'LS'] = sol_1.loc[sol_1[f'new_LS_{column}'].str.contains(pattern_ls), f'new_LS_{column}']

sol_1=borrar_columnas('LS',sol_1,[2,6])

sol_1['payload']=sol_1[6]

  return func(self, *args, **kwargs)


In [286]:
clean_6=sol_1[sol_1.columns.tolist()[-4:-1]]

In [287]:
clean_6

Unnamed: 0,type,serial,LS
0,Vanguard,TV-3,CC LC-18A
1,Vanguard,TV-3BU,CC LC-18A
2,Vanguard,TV-4,CC LC-18A
3,Vanguard,TV-5,CC LC-18A
4,Vanguard,SLV-1,CC LC-18A
...,...,...,...
4348,SSLV,,Sr FLP
4349,SSLV,,Sr FLP
4350,Falcon-Heavy b5(px),B1066.1/64.1/65.1,CCK LC-39A
4351,Falcon-Heavy b5(px),B1070.1/64.2/65.2,CCK LC-39A


In [42]:
df_5=dataframes[5]
pru5=df_5.copy() 

In [43]:
patron_dia=r'\d{2}\.\d{2}\.\d{4}'

sol1=aplicador_columnas_fun('date',pru5,date_pattern,[2,5],False)
sol1=aplicador_columnas_fun('date',pru5,clean_date,[2,5],True)
for column in range(2,5):  
        pru5.loc[ pru5[f'new_date_{column}'].str.contains(patron_dia) , 'date'] =  pru5.loc[ pru5[f'new_date_{column}'].str.contains(patron_dia) , f'new_date_{column}']
sol1=borrar_columnas('date',sol1,[2,5])

sol1=aplicador_columnas_fun('type',sol1,type_pattern,[1,5])

for e in range(1, 5):
    mask = sol1[f'new_type_{e}'] != ''
    sol1.loc[mask, 'type'] = sol1.loc[mask, f'new_type_{e}']

sol1=borrar_columnas('type',sol1,[1,5])

for column in range(1,6):
    sol1[column]=sol1[column].str.strip()

sol1= aplicador_columnas_fun('serial',sol1,serial_patern_0,[1,4])

pattern_serial = r'^(?![a-z]{2,}$)(?![A-Z]{2}\-[0-9A-Z]{2}\/[A-Z]{0,2}$)([A-Z]{0,3}\-[0-9]|[0-9]{3}|[A-Za-z]{0,3}\-[0-9A-Z-/]|[A-Za-z]{0,2}[0-9]{0,5}\-[0-9]{0,3}|[0-9A-Z]{2}\-[A-Z0-9]{0,3}|[A-Z0-9/.]+\.[0-9/.])'

for column in range(1,4):  
        sol1.loc[ sol1[f'new_serial_{column}'].str.contains(pattern_serial) , 'serial'] =  sol1.loc[ sol1[f'new_serial_{column}'].str.contains(pattern_serial) , f'new_serial_{column}']

sol1=borrar_columnas('serial',sol1,[1,4])

sol1['serial'] = sol1['serial'].fillna('no serial')

sol1=aplicador_columnas_fun('LS',sol1,ls_pattern,[2,6])

pattern_ls = r'^(?:[A-Za-z]{2}\s[A-Za-z0-9/-]+|[A-Za-z]{2}|[A-Za-z]{0,3}\s[A-Z0-9-]+|[A-Za-z]{3}\s[A-Z0-9-]+|[A-Z]{2}\s[A-Z]\-[A-Za-z0-9/-]|[A-Za-z]{2}\s[A-Z]{2}\-[0-9]{1}|[A-Z]{2}\s[A-Z]{0,4}\-[0-9]{2}[A-Z])$'

for column in range(2, 6):
    sol1.loc[sol1[f'new_LS_{column}'].str.contains(pattern_ls), 'LS'] = sol1.loc[sol1[f'new_LS_{column}'].str.contains(pattern_ls), f'new_LS_{column}']

sol1=borrar_columnas('LS',sol1,[2,6])

sol1['payload']=sol1[5]

  return func(self, *args, **kwargs)


In [288]:
clean_5=sol1[sol1.columns.tolist()[-4:-1]]

In [289]:
launches = pd.concat([clean_8,clean_7,clean_6,clean_5], axis=0).reset_index(drop=True)

In [290]:
launches.to_csv('csv/launches.csv', index=False)

In [89]:
misiones.to_csv('csv/misiones.csv', index=False)

In [47]:
buscador_links('astra')

https://space.skyrocket.de/doc_lau_det/astra-rocket-3.htm
https://space.skyrocket.de/doc_lau_det/astra-rocket-3-3.htm


In [220]:
def sats(url) -> pd.DataFrame:

    response=req.get(url)
    html_content=response.text
    soup=bts(html_content,'html.parser')

    table=soup.find('table',{'id':'satlist'})

    keys = [e.text for e in table.find_all('th')]

    datos = [e.text for e in table.find_all('td')]

    values = [datos[e:e+7] for e in range(0,len(datos),7)]

    dictio_sats = {key : [] for key in keys}

    for value in values:
        for i in range(len(keys)):
            dictio_sats[keys[i]].append(value[i])

    df = pd.DataFrame(dictio_sats)

    return df

@asincrono
def df(lista):
    errores=[]
    df = pd.DataFrame()
    for element in lista:
        try:
            sol = sats(element)
            df = pd.concat([df,sol],axis=0)
        except Exception as e:
            error=f'{e}{element}'
            print(error)
            errores.append(error)
            continue
    return (df,errores)


In [61]:
pruebals=df(urls_sat)

In [74]:
resultado=pruebals.result()

In [75]:
sat_df=resultado[0]
sat_errors=resultado[1]

In [66]:
#prues=pruebalp.result()
#df_errors=prues[1]
#df_sat=prues[0]

In [166]:
def date_pattern_pru(str):
    
    pattern=r'\d{2}\.\d{2}\.\d{4}'
    condicion=re.search(pattern, str)
    if condicion:
        return str
    elif condicion != False:
        pattern = r'\d{4}$'
        con = re.search(pattern, str)
        if con:
            return '01.01.'+con[0]
    else:
        return ''

In [87]:
df_sat=df_sat.reset_index(drop=True)

df_sat.to_csv('csv/sats_inf.csv', index=False)

df_sat.rename(columns={'':'success'},inplace=True)

df_sat['success'] = df_sat['success'].apply(lambda x : 'OK' if x == '' else x)

df_sat['success'] = df_sat['success'].apply(lambda x: 'F' if any(value in x for value in ['F', 'p', 'P','f','t','T','*']) else x)

df_sat['Date'] = df_sat['Date'].apply(date_pattern_pru)

df_sat['Date'] = df_sat['Date'].astype(str)
df_sat['Date'] = df_sat['Date'].apply(clean_date)

df_sat['Date'] = df_sat['Date'].apply( lambda x : '11.10.1111' if len(x)<10 else x)

df_sat['Date'] = df_sat['Date'].str.replace('1111','2222')

df_sat['Date'] = df_sat['Date'].apply(date_pattern)

In [188]:
df_sat.to_csv('csv/sats.csv', index=False)

In [237]:
def sats_or(url) -> pd.DataFrame:

    response=req.get(url)
    html_content=response.text
    soup=bts(html_content,'html.parser')

    table=soup.find('table',{'id':'satdata'})

    titulo = soup.find('h1').text

    df=pd.DataFrame({conten.find('th').text :[conten.find('td').text] for conten in table.findAll('tr')})
    df['titulo']=titulo

    return df

@asincrono
def df_or(lista):
    errores=[]
    df = pd.DataFrame()
    for element in lista:
        try:
            sol = sats_or(element)
            df = pd.concat([df,sol],axis=0)
        except Exception as e:
            error=f'{e}{element}'
            print(error)
            errores.append(error)
            continue
    return (df,errores)

In [238]:
sol_sat_huevon = df_or(urls_sat)

In [250]:
sol_sat_huevon

<Future finished result=(   Nation:   ... x 13 columns], ["'NoneType' o...g/star-13.htm", "'NoneType' o...g/star-24.htm", "'NoneType' o...age/pam-d.htm", "'NoneType' o...stage/ius.htm", "'NoneType' o...age/pam-d.htm", "'NoneType' o...age/pam-d.htm", ...])>

In [251]:
huevon = sol_sat_huevon.result()

In [255]:
party = huevon[0].drop(columns='Type, Application:')


In [299]:
party

Unnamed: 0,Nation:,Type / Application:,Operator:,Contractors:,Equipment:,Configuration:,Propulsion:,Power:,Lifetime:,Mass:,Orbit:,titulo
0,USSR,Technology,,NPO Energia,2 transmitters,pressurized sphere with four antennas,,Batteries,21 days,84 kg,"228 km × 947 km, 65.0°",Sputnik 1 (PS-1 #1)
0,USSR,Biological resaerch,,,,R-7 core with added payload,none (after burnout),Batteries,6 days,508 kg (payload),"212 km × 1660 km, 65.3°",Sputnik 2 (PS-2 #1)
0,USA,Science,NASA,Naval Research Laboratory (NRL),,,,"Solar cells, batteries",,1.5 kg,"654 km × 3969 km, 34.25°",Vanguard (6.5in)
0,USA,Science,NASA,Naval Research Laboratory (NRL),,,,"Solar cells, batteries",,1.5 kg,"654 km × 3969 km, 34.25°",Vanguard (6.5in)
0,USA,Science,NASA,Naval Research Laboratory (NRL),,,,"Solar cells, batteries",,1.5 kg,"654 km × 3969 km, 34.25°",Vanguard (6.5in)
...,...,...,...,...,...,...,...,...,...,...,...,...
0,China,"Communication M2M/IoT, traffic monitoring",HEAD Aerospace,SAST,high performance AIS-receiver,,,"2 deployable solar arrays, batteries",2-3 years,45 kg,"796 km × 809 km, 98.65° (#1); 495 km × 511 km,...","HEAD 1, 2A, 2B, 2C, 2D, 2E, 2F, 3, 4, 5 (Hede ..."
0,China,Earth observation,Lingzhong Kongjian Jishu (Beijing Zero G Lab),Lingzhong Kongjian Jishu (Beijing Zero G Lab),,,,"Solar arrays, batteries",,,,"Jinzijing 1-01, ..., 1-06 (Golden Bauhinia 1-0..."
0,China,Communication M2M/IoT,Guodian Gaoke,Guodian Gaoke,,,,"Solar arrays, batteries",,~50 kg,,"Tianqi 7, 8, 9, 13, 14, 15"
0,China,"Education, amateur radio communication, techno...",CAMSAT,CAMSAT,,CubeSat (6U),,"Solar cells, batteries",,,,"CAS 5A (Fengtai Shaonian 2, FO 118, Fengtai-OS..."


In [319]:
sat_uni = list(set([e.split('/')[-1].split('.')[0] for e in urls_sat]))

In [346]:
def type_sat(str):
    global sat_uni

    for key in sat_uni:
        if ' '.join(str.lower().split()[:2]).replace(' ','-') in key:
            return key
        elif ' '.join(str.lower().split()[0]).replace(' ','-') in key[:5] :
            return key
    return ''

In [347]:
party['prueba'] = party['titulo'].apply(type_sat)

In [342]:
for e in sat_uni:
    if 'van' in e:
        print(e)

naduvaniy-gazovoy-ballon
leo-vantage-1
vanguard-20in_xr
ravan
vanguard-20in_cc
vanguard-20in_la
vanguard-20in_rb
vanguard-13in_m
vanguard-balloon
kvant-1
vanguard-6in
leo-vantage-2
kvant-2
savant
vanguard-20in_mxe


In [345]:
party

Unnamed: 0,Nation:,Type / Application:,Operator:,Contractors:,Equipment:,Configuration:,Propulsion:,Power:,Lifetime:,Mass:,Orbit:,titulo,prueba
0,USSR,Technology,,NPO Energia,2 transmitters,pressurized sphere with four antennas,,Batteries,21 days,84 kg,"228 km × 947 km, 65.0°",Sputnik 1 (PS-1 #1),sputnik-1
0,USSR,Biological resaerch,,,,R-7 core with added payload,none (after burnout),Batteries,6 days,508 kg (payload),"212 km × 1660 km, 65.3°",Sputnik 2 (PS-2 #1),sputnik-2
0,USA,Science,NASA,Naval Research Laboratory (NRL),,,,"Solar cells, batteries",,1.5 kg,"654 km × 3969 km, 34.25°",Vanguard (6.5in),
0,USA,Science,NASA,Naval Research Laboratory (NRL),,,,"Solar cells, batteries",,1.5 kg,"654 km × 3969 km, 34.25°",Vanguard (6.5in),
0,USA,Science,NASA,Naval Research Laboratory (NRL),,,,"Solar cells, batteries",,1.5 kg,"654 km × 3969 km, 34.25°",Vanguard (6.5in),
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,China,"Communication M2M/IoT, traffic monitoring",HEAD Aerospace,SAST,high performance AIS-receiver,,,"2 deployable solar arrays, batteries",2-3 years,45 kg,"796 km × 809 km, 98.65° (#1); 495 km × 511 km,...","HEAD 1, 2A, 2B, 2C, 2D, 2E, 2F, 3, 4, 5 (Hede ...",
0,China,Earth observation,Lingzhong Kongjian Jishu (Beijing Zero G Lab),Lingzhong Kongjian Jishu (Beijing Zero G Lab),,,,"Solar arrays, batteries",,,,"Jinzijing 1-01, ..., 1-06 (Golden Bauhinia 1-0...",
0,China,Communication M2M/IoT,Guodian Gaoke,Guodian Gaoke,,,,"Solar arrays, batteries",,~50 kg,,"Tianqi 7, 8, 9, 13, 14, 15",
0,China,"Education, amateur radio communication, techno...",CAMSAT,CAMSAT,,CubeSat (6U),,"Solar cells, batteries",,,,"CAS 5A (Fengtai Shaonian 2, FO 118, Fengtai-OS...",cas-5a


In [331]:
buscador_links('tianqi')
buscador_links_sat('cas')

https://space.skyrocket.de/doc_sdat/castor.htm
https://space.skyrocket.de/doc_sdat/urthecast-1.htm
https://space.skyrocket.de/doc_sdat/castor.htm
https://space.skyrocket.de/doc_sdat/cassini.htm
https://space.skyrocket.de/doc_sdat/intelsat-americas-8.htm
https://space.skyrocket.de/doc_sdat/tianqin-1-cas-6.htm
https://space.skyrocket.de/doc_sdat/cas500-1.htm
https://space.skyrocket.de/doc_sdat/lituanicasat-2.htm
https://space.skyrocket.de/doc_sdat/astrocast-0101.htm
https://space.skyrocket.de/doc_sdat/picasso.htm
https://space.skyrocket.de/doc_sdat/cassiope-1.htm
https://space.skyrocket.de/doc_sdat/lituanicasat-1.htm
https://space.skyrocket.de/doc_sdat/cas-2t.htm
https://space.skyrocket.de/doc_sdat/astrocast-0.htm
https://space.skyrocket.de/doc_sdat/astrocast-0101.htm
https://space.skyrocket.de/doc_sdat/astrocast-0101.htm
https://space.skyrocket.de/doc_sdat/orcasat.htm
https://space.skyrocket.de/doc_sdat/astrocast-0101.htm
https://space.skyrocket.de/doc_sdat/cas500-4.htm
https://space.sk

In [293]:
party.to_csv('csv/party.csv',index=False)

In [270]:
misiones['complate_site'] = misiones['complate_site'].fillna('Des')

In [272]:
misiones['country'] = misiones.complate_site.apply(lambda x : x.split(',')[-1] if x != float else x)

In [276]:
misiones.country = misiones.country.str.strip()

In [279]:
new_mis=misiones
new_mis['LS']= new_mis['Site']+' '+new_mis['Site_inf']

In [280]:
new_mis.to_csv('csv/misi.csv',index=False)

In [240]:
df_sat

Unnamed: 0,Satellite,COSPAR,Date,LS,success,Launch Vehicle,Remarks
0,Sputnik 1 (PS-1 #1),1957 α 2,1957-04-10,Ba LC-1/5,OK,Sputnik (8K74PS),
1,Sputnik 2 (PS-2 #1),1957 β 1,1957-03-11,Ba LC-1/5,OK,Sputnik (8K74PS),
2,Vanguard TV3 (Test Satellite F),1957-F01,1957-06-12,CC LC-18A,F,Vanguard,
3,Vanguard TV3BU (Test Satellite G),1958-F01,1958-05-02,CC LC-18A,F,Vanguard,
4,Vanguard 1 (Test Satellite H),1958 β 2,1958-03-17,CC LC-18A,OK,Vanguard,
...,...,...,...,...,...,...,...
509071,FossaSat 1b,2021-F08,2021-03-09,Va SLC-2W,F,Firefly-Alpha,"with Serenity, Hiapo, BSS 1, FossaSat 2, GENES..."
509072,FossaSat 1b (2),2022-122#,2022-01-10,Va SLC-2W,F,Firefly-Alpha,"with Serenity (2), TechEdSat 15, GENESIS G, GE..."
509073,Spinnaker-3 (CP 15) / Firefly Capsule 1,2021-F08,2021-03-09,Va SLC-2W,F,Firefly-Alpha,"with Serenity, Hiapo, BSS 1, FossaSat 1b, Foss..."
509074,Firefly Capsule 2,2022-122E,2022-01-10,Va SLC-2W,F,Firefly-Alpha,"with Serenity (2), TechEdSat 15, GENESIS G, GE..."


In [105]:
buscador_links('mol')

https://space.skyrocket.de/doc_lau_det/molniya.htm
https://space.skyrocket.de/doc_lau_det/molniya-m_blok-l.htm
https://space.skyrocket.de/doc_lau_det/molniya-m_blok-vl.htm
https://space.skyrocket.de/doc_lau_det/molniya-m_blok-ml.htm
https://space.skyrocket.de/doc_lau_det/molniya-m_blok-mvl.htm
https://space.skyrocket.de/doc_lau_det/molniya-m_blok-so-l.htm
https://space.skyrocket.de/doc_lau_det/molniya-m_blok-2bl.htm


In [533]:
buscador_links_sat('luna')

https://space.skyrocket.de/doc_sdat/luna_e1.htm
https://space.skyrocket.de/doc_sdat/luna_e1.htm
https://space.skyrocket.de/doc_sdat/luna_e1.htm
https://space.skyrocket.de/doc_sdat/luna_e1.htm
https://space.skyrocket.de/doc_sdat/luna_e1a.htm
https://space.skyrocket.de/doc_sdat/luna_e1a.htm
https://space.skyrocket.de/doc_sdat/luna_e2a.htm
https://space.skyrocket.de/doc_sdat/luna_e3.htm
https://space.skyrocket.de/doc_sdat/luna_e3.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/luna_e6.htm
https://space.skyrocket.de/doc_sdat/lunar-orbiter.htm
https://space.skyrocket.de/doc_