In [None]:
import pandas as pd
import re
from msal import ConfidentialClientApplication
import requests
import json
from hvac import Client
import certifi
import os
import logging

In [None]:
def read_secrets(mount_point, path):
    client = Client(verify="D:\certs\My_CA_Bundle.crt")
    resp = client.secrets.kv.v2.read_secret_version(mount_point=mount_point, path=path)

    return resp['data']['data']

In [None]:
config = read_secrets(mount_point="cloud",
                      path="microsoft/app_registrations/flask_sharepoint_connection")

config["authority_url"] = f'https://login.microsoftonline.com/{config["tenant_id"]}'
config["scopes"] = ["https://graph.microsoft.com/.default"]


In [None]:
def confidential_client():
    return ConfidentialClientApplication(client_id=config["client_id"],
                                         client_credential=config["secret_value"],
                                         authority=config["authority_url"],
                                         validate_authority=True,
                                         client_capabilities=["cp1"]
                                         )

In [None]:
app = confidential_client()

In [None]:
def get_access_token():
    # The pattern to acquire a token looks like this.
    result = None

    # First, the code looks up a token from the cache.
    # Because we're looking for a token for the current app, not for a user,
    # use None for the account parameter.
    result = app.acquire_token_silent(config["scopes"], account=None)

    if not result:
        logging.info("No suitable token exists in cache. Let's get a new one from Azure AD.")
        result = app.acquire_token_for_client(scopes=config["scopes"])

    if "access_token" in result:
        # Call a protected API with the access token.
        print(f'Token type [\"{result["token_type"]}\"] created')
        return result["access_token"]
    else:
        print(result.get("error"))
        print(result.get("error_description"))
        print(result.get("correlation_id"))  # You might need this when reporting a bug.



In [None]:
config["access_token"] = get_access_token()

In [None]:
import requests
import pandas as pd

APPLICATION_ID = "app id"
CLIENT_SECRET = "client secret"
authority_url = f'https://login.microsoftonline.com/{config["tenant_id"]}'
base_url = 'https://graph.microsoft.com/v1.0/'
SCOPES = ['Mail.Read', 'Mail.ReadBasic']



In [None]:
resource = "sites"

In [None]:
def get_graph_data(resource):
    base_url = 'https://graph.microsoft.com/v1.0/'

    url = f'{base_url}{resource}'
    
    # Request Graph Data
    headers = {
        'Authorization': f'Bearer {config["access_token"]}',
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }
    
    try:
    
        graph_data = requests.get(url=f"{base_url}/{resource}",
                                  headers=headers,
                                  stream=False)
        
        if graph_data.status_code == 200:
            return graph_data.content.decode('utf-8')
        else:
            print(graph_data.status_code)
            print(graph_data.reason)
            print(graph_data.text)
            return None
        
    except:
        return None
    
# graph_data = requests.get(url=sharepoint_url, headers=headers, verify=True)

In [None]:
def expand_data(df):
    for index, row in df.iterrows():
        try:
            for key, value in row["value"].items():
                try:                    
                    df.loc[index, str(key)] = str(value)
                except:
                    for k, v in value.items():
                        df.loc[index, str(k)] = str(v)
        except:
            pass
        
    for index, row in df.iterrows():
        for i, e in enumerate((str(row["id"])).split(",")):
            if i == 0:
                pass
            elif i == 1:
                df.loc[index, "tenant_id"] = str(e)
            elif i == 2:
                df.loc[index, "site_id"] = str(e)
                
                
    return df


In [None]:
df_sites = expand_data(pd.read_json(get_graph_data("sites"), orient='records', dtype_backend="pyarrow"))

In [None]:
df_sites

In [None]:
site_id = df_sites[df_sites["displayName"] == "Reports"]["tenant_id"][0]

In [None]:
type(site_id)

In [None]:
df_site_lists = pd.read_json(get_graph_data(f'sites/{site_id}/lists'), orient='records')

In [None]:
for k, v in df_site_lists["value"][0].items():
    print(f"{k}: {v}")

In [None]:
df_files = expand_data(pd.read_json(get_graph_data(f'sites/{site_id}/drives'), orient='records'))

In [None]:
df_files

In [None]:
for k, v in df_files["value"][0].items():
    print(f"{k}: {v}")

In [None]:
def upload_file(site_id, parent_id, filename):
    base_url = 'https://graph.microsoft.com/v1.0/'

    url = f'{base_url}/sites/{site_id}/drive/items/{parent_id}:/{filename}:/content'
    
    # Request Graph Data
    headers = {
        'Authorization': f'Bearer {config["access_token"]}',
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }
    
    try:
    
        graph_data = requests.put(url=f"{base_url}/{resource}",
                                  headers=headers,
                                  stream=False)
        
        if graph_data.status_code == 200:
            return graph_data.content.decode('utf-8')
        else:
            print(graph_data.status_code)
            print(graph_data.reason)
            print(graph_data.text)
            return None
        
    except:
        return None

In [None]:
ENDPOINT = "https://graph.microsoft.com/v1.0/"
SHAREPOINT_HOST_NAME = "example.sharepoint.com"
SITE_NAME = "Reports"

In [None]:
headers={'Authorization': 'Bearer ' + config['access_token']}

# get the site id
result = requests.get(f'{ENDPOINT}/sites/{SHAREPOINT_HOST_NAME}:/sites/{SITE_NAME}', headers=headers)
result.raise_for_status()
site_info =result.json()
site_id = site_info['id']

In [None]:
site_id

In [None]:
# get the drive id
result = requests.get(f'{ENDPOINT}/sites/{site_id}/drive', headers=headers)
result.raise_for_status()
drive_info =result.json()
drive_id = drive_info['id']

In [None]:
drive_id

In [None]:
drive_info

In [None]:
import urllib.parse

In [None]:
# get the folder id
folder_path = 'General'   # replace this with the folder you want to list
folder_url = urllib.parse.quote(folder_path)
result = requests.get(f'{ENDPOINT}/drives/{drive_id}/root:/{folder_url}', headers=headers)
result.raise_for_status()
folder_info =result.json()
folder_id = folder_info['id']

In [None]:
folder_id

In [None]:

# list the folder contents
result = requests.get(f'{ENDPOINT}/drives/{drive_id}/items/{folder_id}/children', headers=headers)
result.raise_for_status()
children =result.json()['value']
for item in children:
    print(item['name'])

In [None]:
# upload a large file to the 'General' folder -- replace these
filename = '.txt'
folder_path = 'General'

folder_url = urllib.parse.quote(folder_path)
result = requests.get(f'{ENDPOINT}/drives/{drive_id}/root:/{folder_url}', headers=headers)
result.raise_for_status()
folder_info =result.json()
folder_id = folder_info['id']

file_url = urllib.parse.quote(filename)
result = requests.post(
    f'{ENDPOINT}/drives/{drive_id}/items/{folder_id}:/{file_url}:/createUploadSession',
    headers=headers,
    json={
        '@microsoft.graph.conflictBehavior': 'replace',
        'description': 'A large test file',
        'fileSystemInfo': {'@odata.type': 'microsoft.graph.fileSystemInfo'},
        'name': filename
    }
)
result.raise_for_status()
upload_session =result.json()
upload_url = upload_session['uploadUrl']

st = os.stat(filename)
size = st.st_size
CHUNK_SIZE = 10485760
chunks = int(size / CHUNK_SIZE) + 1 if size % CHUNK_SIZE > 0 else 0
with open(filename, 'rb') as fd:
    start = 0
    for chunk_num in range(chunks):
        chunk = fd.read(CHUNK_SIZE)
        bytes_read = len(chunk)
        upload_range = f'bytes {start}-{start + bytes_read - 1}/{size}'
        print(f'chunk: {chunk_num} bytes read: {bytes_read} upload range: {upload_range}')
        result = requests.put(
            upload_url,
            headers={
                'Content-Length': str(bytes_read),
                'Content-Range': upload_range
            },
            data=chunk
        )
        result.raise_for_status()
        start += bytes_read
