In [1]:
import feedparser
import re
import logging
from bs4 import BeautifulSoup
from datetime import datetime
from typing import Generator
from urllib.parse import urljoin
import requests
import json
from requests.adapters import HTTPAdapter, Retry
from dotenv import load_dotenv
import os
import asyncio
from azure.identity.aio import ClientSecretCredential
from msgraph import GraphServiceClient
from kiota_abstractions.api_error import APIError
from msgraph.generated.models.external_connectors.activity_settings import ActivitySettings
from msgraph.generated.models.external_connectors.display_template import DisplayTemplate
from msgraph.generated.models.external_connectors.external_connection import ExternalConnection
from msgraph.generated.models.external_connectors.item_id_resolver import ItemIdResolver
from msgraph.generated.models.external_connectors.search_settings import SearchSettings
from msgraph.generated.models.external_connectors.url_match_info import UrlMatchInfo
from msgraph.generated.models.external_connectors.schema import Schema
from msgraph.generated.models.external_connectors.property_ import Property_
from msgraph.generated.models.external_connectors.property_type import PropertyType
from msgraph.generated.models.external_connectors.label import Label
from msgraph.generated.models.json import Json

from msgraph.generated.models.external_connectors.access_type import AccessType
from msgraph.generated.models.external_connectors.acl import Acl
from msgraph.generated.models.external_connectors.acl_type import AclType
from msgraph.generated.models.external_connectors.external_activity import (
    ExternalActivity,
)
from msgraph.generated.models.external_connectors.external_activity_type import (
    ExternalActivityType,
)
from msgraph.generated.models.external_connectors.external_item import ExternalItem
from msgraph.generated.models.external_connectors.external_item_content import (
    ExternalItemContent,
)
from msgraph.generated.models.external_connectors.external_item_content_type import (
    ExternalItemContentType,
)
from msgraph.generated.models.external_connectors.identity import Identity
from msgraph.generated.models.external_connectors.identity_type import IdentityType
from msgraph.generated.models.external_connectors.properties import Properties


In [2]:
load_dotenv()

True

# Prepare the Data
## EXTRACT
### Microsoft Graph Public ChangeLog

In [3]:
#Microsoft Change Log RSS Feed
#https://developer.microsoft.com/en-us/graph/changelog/rss
# Pull in the RSS feed data using Feedparser and then explores the list channels
# https://feedparser.readthedocs.io/en/latest/common-rss-elements.html#accessing-common-channel-elements
msgraphchangelog = feedparser.parse("https://developer.microsoft.com/en-us/graph/changelog/rss")
docs = msgraphchangelog.entries

In [4]:
## Test the data pull by inpection of a few elements
mclentryone = msgraphchangelog.entries[1]
mclentrytwo = msgraphchangelog.entries[2]

In [5]:
mclentryone
# Note the ID has beta appended to the GUID we will clean that up with a funciton later on

{'id': '1ab6bfa0-5fd2-4d51-9834-a8cfc0ba637cbeta',
 'guidislink': False,
 'tags': [{'term': 'Prod', 'scheme': None, 'label': None},
  {'term': 'beta', 'scheme': None, 'label': None}],
 'title': 'Security',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://developer.microsoft.com/en-us/graph/changelog/rss',
  'value': 'Security'},
 'summary': '<div>Added <b>microsoftDefenderForIoT</b>, <b>microsoftDefenderForServers</b>, <b>microsoftDefenderForStorage</b>, <b>microsoftDefenderForDNS</b>, <b>microsoftDefenderForDatabases</b>, <b>microsoftDefenderForContainers</b>, <b>microsoftDefenderForNetwork</b>, <b>microsoftDefenderForAppService</b>, <b>microsoftDefenderForKeyVault</b>, <b>microsoftDefenderForResourceManager</b> and <b>microsoftDefenderForApiManagement</b> members to the <b>detectionSource</b> enumeration.</div>',
 'summary_detail': {'type': 'text/html',
  'language': None,
  'base': 'https://developer.microsoft.com/en-us/graph/changelog/rss',
  'value': 

In [6]:
mclentrytwo
# Note the ID has v1.0 appended to the GUID we will clean that up with a funciton later on

{'id': '2ffb1cd0-70b3-4e55-b5f2-b7e6c7d62dc2v1.0',
 'guidislink': False,
 'tags': [{'term': 'Prod', 'scheme': None, 'label': None},
  {'term': 'v1.0', 'scheme': None, 'label': None}],
 'title': 'Security',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://developer.microsoft.com/en-us/graph/changelog/rss',
  'value': 'Security'},
 'summary': '<div>Added <b>microsoftDefenderForIoT</b>, <b>microsoftDefenderForServers</b>, <b>microsoftDefenderForStorage</b>, <b>microsoftDefenderForDNS</b>, <b>microsoftDefenderForDatabases</b>, <b>microsoftDefenderForContainers</b>, <b>microsoftDefenderForNetwork</b>, <b>microsoftDefenderForAppService</b>, <b>microsoftDefenderForKeyVault</b>, <b>microsoftDefenderForResourceManager</b> and <b>microsoftDefenderForApiManagement</b> members to the <b>detectionSource</b> enumeration.</div>',
 'summary_detail': {'type': 'text/html',
  'language': None,
  'base': 'https://developer.microsoft.com/en-us/graph/changelog/rss',
  'value': 

In [7]:
# Count the number of items (entries) in the feed
num_items = len(msgraphchangelog.entries)
print(num_items)

1749


## Helper Functions needed in TRANSFORM stage later on for External Item

In [12]:
def remove_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

In [13]:
def remove_lastfourinid(input_string):
    input_string = input_string[:-4]
    return input_string

In [14]:
def convert_to_iso8601(date_str):
    # Parse the date string to datetime object
    dt = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S Z')

    # Convert to ISO 8601 format
    iso8601_format = dt.isoformat() + "Z"  # Appending 'Z' to indicate UTC
    return iso8601_format

In [15]:
def create_narrative(id,pd,tit,dscr):
    cleanId = remove_lastfourinid(id)
    dl = f"https://developer.microsoft.com/en-us/graph/changelog/?search={cleanId}"
    cleanDate = convert_to_iso8601(pd)
    narrative = f"On {cleanDate} {tit} entered {dscr} with deeplink {dl}"
    return narrative

## Create Connection to Microsoft Graph
### Setting up the Client

In [16]:
credential = ClientSecretCredential(
    os.getenv("TENANT_ID"),
    os.getenv("CLIENT_ID"),
    os.getenv("CLIENT_SECRET")
)
scopes = ['https://graph.microsoft.com/.default']
client = GraphServiceClient(credentials=credential, scopes=scopes)

In [18]:
# Get the User ID needed as PerformedBy Identity
user_id = os.getenv("USER_ID")

## Create the Graph Custom Connector
### External Connection

In [19]:
external_connection = ExternalConnection(
    id="fabsgclpython1",
    name="FabsGraphChangeLog Alpha",
    description="The official Microsoft Graph ChangeLog - Injested in Graph Connector for Copilot",
    activity_settings=ActivitySettings(
        url_to_item_resolvers=[
            ItemIdResolver(
                odata_type="#microsoft.graph.externalConnectors.itemIdResolver",
                priority=1,
                item_id="{slug}",
                url_match_info=UrlMatchInfo(
                    base_urls=[
                        "https://developer.microsoft.com/en-us/graph/changelog"
                    ],
                    url_pattern="/(?<slug>[^/]+)"
                )
            )
        ]
    ),
    search_settings=SearchSettings(
      search_result_templates=[
        DisplayTemplate(
            id="fabsgclpython1",
            priority=1
        )
      ]
    )
)

In [20]:
schema = Schema(
  base_type="microsoft.graph.externalItem",
  properties=[
    Property_(
        name="title",
        type=PropertyType.String,
        is_queryable=True,
        is_searchable=True,
        is_retrievable=True,
        labels=[
            Label.Title
        ]
    ),
    Property_(
        name="excerpt",
        type=PropertyType.String,
        is_queryable=True,
        is_searchable=True,
        is_retrievable=True
    ),
    Property_(
        name="imageUrl",
        type=PropertyType.String,
        is_retrievable=True
    ),
    Property_(
        name="url",
        type=PropertyType.String,
        is_retrievable=True,
        labels=[
            Label.Url
        ]
    ),
    Property_(
        name="publishdate",
        type=PropertyType.DateTime,
        is_queryable=True,
        is_retrievable=True,
        is_refinable=True,
        labels=[
            Label.LastModifiedDateTime
        ]
    ),
    Property_(
        name="tags",
        type=PropertyType.StringCollection,
        is_queryable=True,
        is_retrievable=True,
        is_refinable=True
    )
  ]
)

## Functions to Create Connection and Schema
#### resultLayout.json is the Adaptive Card that will render in the Verticals in Search

In [21]:
async def _create_connection():
    print("Creating connection...")
    with open("resultLayout.json", "r", encoding='utf-8') as file:
        adaptive_card = file.read()
        layout = json.loads(adaptive_card)

        assert external_connection.search_settings is not None
        assert external_connection.search_settings.search_result_templates is not None

        external_connection.search_settings.search_result_templates[0].layout = Json(
            additional_data=layout
        )

    await client.external.connections.post(external_connection)
    print("CONNECTION CREATION DONE")

In [22]:
async def _create_schema():
    print("Creating schema...")

    assert external_connection.id is not None
    await client.external.connections.by_external_connection_id(external_connection.id).schema.patch(schema)
    print("DONE")

In [23]:
# Function below executes both and will create the Connection and Schema 

In [24]:
async def create_connection():
    await _create_connection()
    await _create_schema()

In [25]:
#Run the Connection Setup Process as a Function
#As of now this will throw an error at the very end due to a bug in the SDK
#The Connection and Schema will be created successfully but the return code is broken in the SDK
#it has been reported and a PR is underway
createpoc1conn = await create_connection()

Creating connection...
CONNECTION CREATION DONE
Creating schema...


DeserializationError: No response content type found for deserialization

In [26]:
#verify that the Microsoft Graph External Connector has been created
print(external_connection.id)

fabsgclpython1


In [27]:
# BREAK IN CASE OF FIRE
# Use this to delete the connection and start over
#await client.external.connections.by_external_connection_id(external_connection.id).delete()

# Create the External Item
## This is the TRANSFORM and LOAD Stage

### Functions to Transform and Load

In [28]:
def _transform(content) -> Generator[ExternalItem, None, None]:
    # needed to properly format activity date
    local_time_with_timezone = datetime.now().astimezone()

    for logentry in msgraphchangelog.entries:
        #print(logentry["published"])
        cleanDate = convert_to_iso8601(logentry.published)
        cleanId = remove_lastfourinid(logentry.id)
        dl = f"https://developer.microsoft.com/en-us/graph/changelog/?search={cleanId}"
        content = create_narrative(logentry.id, logentry.published, logentry.title, logentry.description)
        currentnarrative = remove_html_tags(logentry.description)
        yield ExternalItem(
            id=remove_lastfourinid(logentry.id),
            properties=Properties(
                additional_data={
                    "title": logentry.title,
                    "excerpt": currentnarrative,
                    "imageUrl": "https://raw.githubusercontent.com/microsoftgraph/g-raph/master/g-raph.png",
                    "url": dl,
                    "publishdate": cleanDate
                }
            ),
            content=ExternalItemContent(
                type=ExternalItemContentType.Html, value=content
            ),
            acl=[
                Acl(
                    type=AclType.Everyone,
                    value="everyone",
                    access_type=AccessType.Grant,
                )
            ],
            activities=[
                ExternalActivity(
                    odata_type="#microsoft.graph.externalConnectors.externalActivity",
                    type=ExternalActivityType.Created,
                    start_date_time=cleanDate,
                    performed_by=Identity(type=IdentityType.User, id=user_id),
                )
            ],
        )

In [29]:
async def _load(content: Generator[ExternalItem, None, None]):
    for doc in content:
        try:
            print("Loading %s...", doc.id)
            assert external_connection.id is not None
            #Below print is for display purpoes only for the sake of the demo and leave behind
            #It can be lengthy based on the amount of indexed item. consider commenting out
            #print("Doc...", doc)
            assert doc.id is not None

            await client.external.connections.by_external_connection_id(
                external_connection.id
            ).items.by_external_item_id(doc.id).put(doc)
            print("DONE loading")
        except Exception as e:
            print("Failed to load %s: %s", doc.id, e)
            return

### Do the Transform

In [30]:
transformed = _transform(msgraphchangelog.entries)

### Do the Load
#### For me this takes 1/2 hour for 1700 items

In [31]:
loadChangeLog = await _load(transformed)

Loading %s... 97796aeb-ce8a-4650-ad43-6872cee08c4d
DONE loading
Loading %s... 1ab6bfa0-5fd2-4d51-9834-a8cfc0ba637c
DONE loading
Loading %s... 2ffb1cd0-70b3-4e55-b5f2-b7e6c7d62dc2
DONE loading
Loading %s... 400279c6-8c1b-4a1d-92ff-c49987587f79
DONE loading
Loading %s... 8fefbf46-97c3-43c7-96a9-56c1f4c24052
DONE loading
Loading %s... e70ef0dd-ac40-4549-9583-958a7a9c0b66
DONE loading
Loading %s... 514fea21-1d51-46d0-847e-9c2cce1d6c58
DONE loading
Loading %s... 8b9db607-81fd-4013-af2e-d6cb709aa313
DONE loading
Loading %s... cc0c0a79-a691-485d-b47c-8b0ee543ae6c
DONE loading
Loading %s... 7a1f574d-c348-4ce9-890b-d55b58b2d3c2
DONE loading
Loading %s... 2204d5d0-d3cf-4850-88b7-d430ab574476
DONE loading
Loading %s... bbb70cce-5252-40be-a7ca-7c29f99cb089
DONE loading
Loading %s... a12d7c65-273a-4409-8c5a-a8be6bcfd005
DONE loading
Loading %s... 71ebad7b-fc20-4469-b895-ddeef0ee54a0
DONE loading
Loading %s... 14a780c1-d222-4476-acc0-3c5b6425f040
DONE loading
Loading %s... c1808556-a3ee-4258-a706-e1

In [32]:
print(loadChangeLog)

None
