In [1]:
import requests
import pandas as pd
import base64
import gzip
from io import BytesIO
import io
import shutil
from datetime import datetime, timedelta, date
import time
import os
import logging
import smtplib
import pyodbc

CD2_base_url = os.environ['CD2_base_url']
CD2_client_id = os.environ['CD2_client_id']
CD2_client_secret = os.environ['CD2_client_secret']



In [2]:
def les_access_token(logger):
    # Hent access_token
    requesturl = "https://api-gateway.instructure.com/ids/auth/login"
    payload = {'grant_type': 'client_credentials'}
    r = requests.request(
        "POST",
        requesturl,
        data=payload,
        auth=(CD2_client_id, CD2_client_secret)
    )
    if r.status_code == 200:
        respons = r.json()
        access_token = respons['access_token']
        logger.info(f"Henta access_token OK: {access_token}")
        return access_token
    else:
        logger.error(f"Klarte ikkje å skaffe access_token, feil {r.status_code}")
        return None

In [3]:
def lag_logger(log_namn):
    # opprett ein logger
    logger = logging.getLogger('my_logger')
    logger.setLevel(logging.DEBUG)  # Sett ønska loggnivå

    # Opprett formatter
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    # Opprett filhandler for å logge til fil (ein loggfil kvar dag)
    file_handler = logging.FileHandler(log_namn)
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(formatter)

    # Opprett konsollhandler for å logge til konsollen
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.DEBUG)
    console_handler.setFormatter(formatter)

    # Legg til handlerne i loggeren
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    return logger

In [4]:
def hent_CD2_filar(innfil, token, svar, logger):
    try:
        requesturl = f"{CD2_base_url}/dap/object/url"
        payload = f"{svar['objects']}"
        payload = payload.replace('\'', '\"')
        headers = {'x-instauth': token, 'Content-Type': 'text/plain'}
        logger.info(f"Request: {requesturl} {payload}")
        respons = requests.request("POST", requesturl, headers=headers, data=payload)
        logger.info(f"Response: {respons.status_code} {respons.reason}")
        respons.raise_for_status()
        fil = respons.json()
        logger.info(f"Objects: {fil}")
        url = fil['urls'][innfil]['url']
        logger.info(f"URL: {url}")
        data = requests.request("GET", url)
        logger.info(f"Response: {data.status_code} {data.reason}")
        buffer = io.BytesIO(data.content)
        logger.info(f"Buffer: {buffer}")
        with gzip.GzipFile(fileobj=buffer, mode='rb') as utpakka_fil:
            utpakka_data = utpakka_fil.read().decode("utf-8", errors='ignore')
            # logger.info(f"Data: {utpakka_data}")
        return utpakka_data
    except requests.exceptions.RequestException as exc:
        raise exc

In [5]:
def les_CD2_tabell(token, tabell, logger):
    headers = {'x-instauth': token, 'Content-Type': 'text/plain'}
    # sist_oppdatert = akv_finn_sist_oppdatert(tabell)
    payload = '{"format": "csv"}' # % (sist_oppdatert)
    requesturl = f"{CD2_base_url}/dap/query/canvas/table/{tabell}/data"
    print(f"Sender søk til {requesturl}")
    try:
        r = requests.request("POST", requesturl, headers=headers, data=payload)
        r.raise_for_status()
        respons = r.json()
        id = respons['id']
        vent = True
        while vent:
            requesturl2 = f"{CD2_base_url}/dap//job/{id}"
            r2 = requests.request("GET", requesturl2, headers=headers)
            time.sleep(5)
            respons2 = r2.json()
            print(respons2)
            if respons2['status'] == "complete":
                vent = False
                filar = respons2['objects']
        dr_liste = []
        print(filar)
        for fil in filar:
            data = io.StringIO(akv_hent_CD2_filar(fil['id'], token, respons2))
            df = pd.read_csv(data, sep=",")
            dr_liste.append(df)
        alledata = pd.concat(df for df in dr_liste if not df.empty)
        return alledata, sist_oppdatert, respons2['until']
    except requests.exceptions.RequestException as exc:
        raise exc

In [6]:

def akv_finn_sist_oppdatert(tabell):
    """
    Return the latest update time for the given table from the akv_sist_oppdatert table.
    """
    conn_str = os.environ["Connection_SQL"] 
    try:
        with pyodbc.connect(conn_str) as connection:
            cursor = connection.cursor()
            print(connection)
            query = """
            SELECT [sist_oppdatert] FROM [dbo].[akv_sist_oppdatert]
            WHERE [tabell] = ?
            """
            cursor.execute(query, (tabell,))
            row = cursor.fetchone()
            print(row)
            if row:
                print("Har henta frå Azure")
                if tabell == "web_logs":
                    return (datetime.now() - timedelta(days=1)).isoformat() + "Z"
                else:
                    return row[0].isoformat() + "Z"
            else:
                print("Har ikkje henta frå Azure")
                if tabell == "web_logs":
                    return (datetime.now() - timedelta(days=1)).isoformat() + "Z"
                else:
                    return (date.today() - timedelta(days=1)).isoformat() + "Z"
    except pyodbc.Error as exc:
        print("Har ikkje henta frå Azure")
        if tabell == "web_logs":
            return (datetime.now() - timedelta(days=1)).isoformat() + "Z"
        else:
            return (datetime.today() - timedelta(days=1)).isoformat() + "Z"

In [9]:
tabell = "calendar_events"
logger = lag_logger(f'loggfil-{tabell}.log')
token = les_access_token(logger)
# data = les_CD2_tabell(token, tabell, logger)
headers = {'x-instauth': token, 'Content-Type': 'text/plain'}
sist_oppdatert = akv_finn_sist_oppdatert(tabell)
sist_oppdatert = "2025-01-01T01:00:00Z"


2025-02-07 10:43:20,006 - my_logger - INFO - Henta access_token OK: eyJhbGciOiJSUzI1NiIsImtpZCI6InB1YmxpYzpoeWRyYS5qd3QuYWNjZXNzLXRva2VuIiwidHlwIjoiSldUIn0.eyJhdWQiOltdLCJjbGllbnRfaWQiOiJldS13ZXN0LTEjMTUwNWNjNGUtZjlhYS00MWQ1LWEzNjQtMjE0Njk5ZWJlNjZiIiwiZXhwIjoxNzM4OTI1MDAwLCJleHQiOnt9LCJpYXQiOjE3Mzg5MjE0MDAsImlzcyI6Imh0dHBzOi8vaXNzLWV1LXdlc3QtMS5pZGVudGl0eS5pbnN0cnVjdHVyZS5jb20vIiwianRpIjoiNmNkOWYxODktNzhhZi00MWJlLTg5NGItZjMzYmNmNWYyYjdkIiwibmJmIjoxNzM4OTIxNDAwLCJzY3AiOlsiZGFwIiwicGFydG5lcjpJTlNUSVRVVElPTkFMX1BBUlRORVIiLCJyZWdpb246ZXUtd2VzdC0xIiwicHJpbmNpcGFsOktqWVhGbFd5cUlQMzA2ak1Mck94T0U2UVNVMVM4b0Y1bTV0ZmJYeWQiXSwic3ViIjoiZXUtd2VzdC0xIzE1MDVjYzRlLWY5YWEtNDFkNS1hMzY0LTIxNDY5OWViZTY2YiJ9.lmrjvv-J2z02q4xEmzF8Y0EcIKMyaWEsKUDUUYfXh5R5U6MeuUeVKUko-XL09trU-iNXxUdMk2Vy_vGXI3m_fVoLFHRRL86uCEdZblw2twXecfVmS2To2spPAUSCFJGuAQv_7ZE6AhVmLgLzJ6JPUeqBPRja9OiQ8TvC74dWUH8FDy_SnzjKZesjtuH_Bk8nlGnksOkFKLQEWAliUQNszL3J4WSY5U0HYmHAiXK5X2D5JW5KGQMPLp6mIO7xThYLZH-f_Qmta1XGWKRZJGxQ-sQXDhnIZUNFMP-y2Tbxr1-tlz_

Har ikkje henta frå Azure


In [10]:
payload = '{"format": "csv", "since": \"%s\"}' % (sist_oppdatert)
requesturl = f"{CD2_base_url}/dap/query/canvas/table/{tabell}/data"
print(f"Sender søk til {requesturl}")
# try:
r = requests.request("POST", requesturl, headers=headers, data=payload)
r.raise_for_status()
respons = r.json()
id = respons['id']
vent = True
while vent:
    requesturl2 = f"{CD2_base_url}/dap//job/{id}"
    r2 = requests.request("GET", requesturl2, headers=headers)
    time.sleep(5)
    respons2 = r2.json()
    print(respons2)
    if respons2['status'] == "complete":
        vent = False
        filar = respons2['objects']
dr_liste = []
print(filar)


Sender søk til https://api-gateway.instructure.com/dap/query/canvas/table/calendar_events/data
{'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41', 'status': 'running', 'expires_at': '2025-02-08T09:43:22Z'}
{'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41', 'status': 'running', 'expires_at': '2025-02-08T09:43:22Z'}
{'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41', 'status': 'complete', 'objects': [{'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00000-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz'}, {'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00002-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz'}], 'expires_at': '2025-02-08T09:43:22Z', 'schema_version': 2, 'since': '2025-01-01T01:00:00Z', 'until': '2025-02-07T08:03:28Z'}
[{'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00000-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz'}, {'id': 'b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00002-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz'}]


In [11]:
for fil in filar:
    logger.info(f"Henter fil {fil['id']}")
    data = io.StringIO(hent_CD2_filar(fil['id'], token, respons2, logger))
    df = pd.read_csv(data, sep=",")
    dr_liste.append(df)
alledata = pd.concat(df for df in dr_liste if not df.empty)

2025-02-07 10:44:14,870 - my_logger - INFO - Henter fil b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00000-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz
2025-02-07 10:44:14,870 - my_logger - INFO - Henter fil b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00000-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz
2025-02-07 10:44:14,876 - my_logger - INFO - Request: https://api-gateway.instructure.com/dap/object/url [{"id": "b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00000-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz"}, {"id": "b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00002-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz"}]
2025-02-07 10:44:14,876 - my_logger - INFO - Request: https://api-gateway.instructure.com/dap/object/url [{"id": "b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00000-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz"}, {"id": "b73dcbb7-e6e6-44d8-a3a1-1fcfa0c06e41/part-00002-f3edb83a-9cf2-42b8-8aef-2d6a5dcb1ff3-c000.csv.gz"}]
2025-02-07 10:44:15,582 - my_logger - INFO - Respo

In [12]:
alledata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40114 entries, 0 to 40113
Data columns (total 27 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   key.id                              40114 non-null  int64  
 1   value.deleted_at                    5269 non-null   object 
 2   value.user_id                       39288 non-null  float64
 3   value.created_at                    40114 non-null  object 
 4   value.updated_at                    40114 non-null  object 
 5   value.workflow_state                40114 non-null  object 
 6   value.context_id                    40114 non-null  int64  
 7   value.context_type                  40114 non-null  object 
 8   value.location_address              1 non-null      object 
 9   value.start_at                      40081 non-null  object 
 10  value.end_at                        40081 non-null  object 
 11  value.context_code                  40114

In [22]:
alledata[alledata['value.context_id']==28283]

Unnamed: 0,key.id,value.deleted_at,value.user_id,value.created_at,value.updated_at,value.workflow_state,value.context_id,value.context_type,value.location_address,value.start_at,...,value.web_conference_id,value.all_day,value.all_day_date,value.migration_id,value.important_dates,value.location_name,value.description,value.title,meta.ts,meta.action
12604,1384433,,12477.0,2024-12-16T08:19:18.344Z,2024-12-16T08:19:18.344Z,active,28283,Course,,2025-01-24T09:15:00.000Z,...,,,,,False,F118 <Auditorium>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Forelesning MAT110,2025-01-23T23:23:19.826Z,U
12605,1384431,,12477.0,2024-12-16T08:19:18.118Z,2024-12-16T08:19:18.118Z,active,28283,Course,,2025-01-17T09:15:00.000Z,...,,,,,False,F118 <Auditorium>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Forelesning MAT110,2025-01-16T23:21:28.707Z,U
12606,1384437,,12477.0,2024-12-16T08:19:18.791Z,2024-12-16T08:19:18.791Z,active,28283,Course,,2025-02-07T09:15:00.000Z,...,,,,,False,F118 <Auditorium>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Forelesning MAT110,2025-02-06T23:41:42.856Z,U
12607,1384435,,12477.0,2024-12-16T08:19:18.561Z,2024-12-16T08:19:18.561Z,active,28283,Course,,2025-01-31T09:15:00.000Z,...,,,,,False,F118 <Auditorium>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Forelesning MAT110,2025-01-30T21:40:25.420Z,U
12608,1384439,,12477.0,2024-12-16T08:19:19.009Z,2024-12-16T08:19:19.009Z,active,28283,Course,,2025-02-14T09:15:00.000Z,...,,,,,False,F118 <Auditorium>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Forelesning MAT110,2025-02-06T23:41:42.856Z,U
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26926,1413882,,12477.0,2024-12-16T21:29:49.891Z,2024-12-16T21:29:49.891Z,active,28283,Course,,2025-03-12T07:15:00.000Z,...,,,,,False,M508 <Seminarrom>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Øving MAT110,2025-02-06T23:41:42.856Z,U
26930,1413884,,12477.0,2024-12-16T21:29:50.104Z,2024-12-16T21:29:50.104Z,active,28283,Course,,2025-03-19T07:15:00.000Z,...,,,,,False,M508 <Seminarrom>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Øving MAT110,2025-02-06T23:41:42.856Z,U
26933,1413880,,12477.0,2024-12-16T21:29:49.697Z,2024-12-16T21:29:49.697Z,active,28283,Course,,2025-03-05T07:15:00.000Z,...,,,,,False,M508 <Seminarrom>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Øving MAT110,2025-02-06T23:41:42.856Z,U
26937,1413886,,12477.0,2024-12-16T21:29:50.341Z,2024-12-16T21:29:50.341Z,active,28283,Course,,2025-03-26T07:15:00.000Z,...,,,,,False,M508 <Seminarrom>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Øving MAT110,2025-02-06T23:41:42.856Z,U


In [24]:
alledata[alledata['key.id']==1384433]

Unnamed: 0,key.id,value.deleted_at,value.user_id,value.created_at,value.updated_at,value.workflow_state,value.context_id,value.context_type,value.location_address,value.start_at,...,value.web_conference_id,value.all_day,value.all_day_date,value.migration_id,value.important_dates,value.location_name,value.description,value.title,meta.ts,meta.action
12604,1384433,,12477.0,2024-12-16T08:19:18.344Z,2024-12-16T08:19:18.344Z,active,28283,Course,,2025-01-24T09:15:00.000Z,...,,,,,False,F118 <Auditorium>,"Bendiksen Ole, Riera Constanza Susana<br><span...",Forelesning MAT110,2025-01-23T23:23:19.826Z,U
