In [1]:
from pathlib import Path

import pandas as pd
import requests
import numpy as np
from slugify import slugify

def download_and_format_image_path(url):
    path = Path(url)
    ref_path = f"images/{slugify(path.stem)}{path.suffix}"
    output_path = f"static/{ref_path}"
    r = requests.get(url)
    with open(output_path, 'wb') as f:
        f.write(r.content)
    return ref_path

In [14]:
df = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vR-BAUvNUjp2AeV_daeeqHReX0M3ew3ZpEL3nfkrz96uUd816mV_hV1uWMvbsACphEBGjqHJBswGwFz/pub?gid=614465369&single=true&output=csv")

In [15]:
def get_status(curso, moodle_url, portada):
    try:
        # compare if the three parameters are nan and exit the function if they are
        if pd.isna(curso) and pd.isna(moodle_url) and pd.isna(portada):
            return
        elif moodle_url.startswith("https"):
            r = requests.get(portada)
            if r.status_code != 200:
                print(f"El curso [{curso}]({moodle_url}) no tiene portada")
        else:
            print(f"Faltan datos:\nCurso: {curso}\nMoodle: {moodle_url}\nPortada: {portada}")
    except Exception:
        print(f"Con los siguientes datos, algo ha fallado:\nCurso: {curso}\nMoodle: {moodle_url}\nPortada: {portada}")

In [17]:
# apply a funcion taking three arguments in pandas
df.apply(lambda x: get_status(x['Curso'], x['Moodle_url'], x['Portada']), axis=1)

Con los siguientes datos, algo ha fallado:
Curso: Arduino con ArduinoBlocks
Moodle: nan
Portada: nan
Con los siguientes datos, algo ha fallado:
Curso: ARDUINOBLOCKS EN EL AULA
Moodle: nan
Portada: nan
Con los siguientes datos, algo ha fallado:
Curso: nan
Moodle: nan
Portada: nan
Con los siguientes datos, algo ha fallado:
Curso: nan
Moodle: nan
Portada: nan


0      None
1      None
2      None
3      None
4      None
       ... 
165    None
166    None
167    None
168    None
169    None
Length: 170, dtype: object

In [25]:
# pandas replace nan values in a series with empty string
df['Portada'] = df['Portada'].replace(np.nan, '', regex=True)

#split the url and get the last element of the list and prepend a path
df['Images'] = df['Portada'].apply(lambda x: download_and_format_image_path(x))

In [28]:
df.to_csv("webdata.csv", index=False)

In [None]:
# function to reduce the size of the image
def reduce_image_size(image_path):
    img = Image.open(image_path)
    img.save(image_path, optimize=True, quality=85)


In [30]:
# pandas select specific columns and those whose title contains a string of one digit, a dot and another digit
pd.concat([df[["Portada", "Curso"]], df.filter(regex="\d\.\d")], axis=1)


Unnamed: 0,Portada,Curso,1.1,1.2,1.3,1.4,1.5,2.1,2.2,2.3,...,4.2,4.3,5.1,5.2,5.3,6.1,6.2,6.3,6.4,6.5
0,https://moodle.catedu.es/pluginfile.php/5878/c...,ABP APRENDIZAJE BASADO EN PROYECTOS INFANTIL P...,B1,A2,B1,B1,B1,,B1,,...,,,,B1,B1,,,,,B1
1,https://moodle.catedu.es/pluginfile.php/5822/c...,ABP APRENDIZAJE BASADO EN PROYECTOS SECUNDARIA...,B1,A2,B1,B1,B1,,B1,,...,,,,B1,B1,,,,,B1
2,https://moodle.catedu.es/pluginfile.php/85386/...,AJEDREZ EN LA ESCUELA,A2,A2,,A2,A2,,,,...,,,,,,,,,,
3,https://moodle.catedu.es/pluginfile.php/5737/c...,ALIMENTACIÓN Y NUTRICIÓN PARA UNA VIDA SALUDABLE,A2,A2,,A2,A2,,,,...,,,,,,,,,,
4,https://moodle.catedu.es/pluginfile.php/1224/c...,APP INVENTOR,A2,A2,A1,B1,A2,,B1,,...,,,,,A1,,,A1,,A1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,https://moodle.catedu.es/pluginfile.php/175090...,Diseña un Plan Digital de Centro,B1,B1,B1,B1,B1,,B1,B1,...,,,,,,,,,,
166,,ARDUINOBLOCKS EN EL AULA,,,,,,,,,...,,,,,,,,,,
167,https://moodle.catedu.es/pluginfile.php/154682...,Bases de datos relacionales y lenguaje SQL,A2,B1,,B1,A2,A2,B1,A2,...,,,A1,A1,,A1,,A1,A1,A1
168,,,,,,,,,,,...,,,,,,,,,,


In [27]:
# pandas concat a selection of columns and some columns filtered by a regex
df[["Portada", "Curso"]].concat(df.filter(regex="\d\.\d"))

AttributeError: 'DataFrame' object has no attribute 'concat'