## checking sandboxing

### datavillage.me and api.datavillage.me are accessible

In [6]:
!wget -T 3 https://datavillage.me

--2022-05-04 13:46:23--  https://datavillage.me/
Resolving datavillage.me (datavillage.me)... 172.20.139.28
Connecting to datavillage.me (datavillage.me)|172.20.139.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘index.html.1’

index.html.1            [ <=>                ]  47.79K  --.-KB/s    in 0.01s   

2022-05-04 13:46:23 (4.57 MB/s) - ‘index.html.1’ saved [48936]



### google is not --> resulting in a connection timeout

In [10]:
!wget -T 3 https://google.com 

--2022-05-04 13:47:48--  https://google.com/
Resolving google.com (google.com)... 142.250.185.174, 2a00:1450:4001:828::200e
Connecting to google.com (google.com)|142.250.185.174|:443... failed: Connection timed out.
Connecting to google.com (google.com)|2a00:1450:4001:828::200e|:443... failed: Network is unreachable.


## Kubernetes initialize the pod with a lot of environment variables ready to be used
### we can for instance find the redis service address

In [13]:
!env

BASE_URL=/780fd3ca2676/jupyter/
NGINX_OUTBOUND_DATAVILLAGE_ME_PORT_443_TCP=tcp://172.20.139.28:443
NGINX_OUTBOUND_DATAVILLAGE_ME_SERVICE_PORT_HTTPS=443
NGINX_OUTBOUND_API_DATAVILLAGE_ME_SERVICE_PORT_HTTPS=443
KUBERNETES_PORT=tcp://172.20.0.1:443
REDIS_SERVICE_PORT_6379=6379
REDIS_PORT=tcp://172.20.189.125:6379
NGINX_OUTBOUND_API_DATAVILLAGE_ME_PORT_443_TCP=tcp://172.20.58.54:443
KUBERNETES_SERVICE_PORT=443
REDIS_SERVICE_PORT=6379
NGINX_OUTBOUND_DATAVILLAGE_ME_SERVICE_HOST=172.20.139.28
NGINX_OUTBOUND_API_DATAVILLAGE_ME_SERVICE_HOST=172.20.58.54
REDIS_PORT_6379_TCP_ADDR=172.20.189.125
MPLBACKEND=module://matplotlib_inline.backend_inline
HOSTNAME=jupyter-0
DV_APP_ID=00eca6c5-d17c-4870-bdec-780fd3ca2676
PYTHON_PIP_VERSION=22.0.4
HOME=/home/user
DV_TOKEN=eyJhbGciOiJLTVMiLCJ0eXAiOiJKV1QifQ.eyJhdWQiOiJodHRwczovL2FwaS5kYXRhdmlsbGFnZS5tZS8iLCJhenAiOiJsQWVYRXNMbGM0Z0h4Y1FYcnpOWjJoYUQzRWJPU0g4ViIsImlhdCI6MTY1MTY3MTg0MSwiZXhwIjoyMjUxNjcxODQxLCJzY29wZSI6InJlYWQ6Y2xpZW50cyBjcmVhdGU6Y2xpZW50cyIsInVy

## Checking connections to redis

In [9]:
import redis, os, datetime
r = redis.Redis(host=os.environ["REDIS_SERVICE_HOST"], port=os.environ["REDIS_SERVICE_PORT"], db=0)

In [10]:
r.set('foo', 'bar')

True

In [11]:
r.get('foo')

b'bar'

### checking for message on the redis queue

In [None]:
pubsub = r.pubsub(ignore_subscribe_messages=True)
pubsub.subscribe("dv")
end = datetime.datetime.utcnow()+datetime.timedelta(minutes=2)
while datetime.datetime.utcnow()<end:
    message = pubsub.get_message()
    if(message): print(message)

{'type': 'message', 'pattern': None, 'channel': b'dv', 'data': b'{"appId": "edf4b78e-bc76-470f-b215-842474eab1c4", "clientId": "f20502df-3c27-4441-90fa-003173f9bdf7", "type": "OTHER"}'}


## Load app settings

In [1]:
import requests, os
clientId, appId, token, baseUrl = os.environ["DV_CLIENT_ID"], os.environ["DV_APP_ID"], os.environ["DV_TOKEN"], os.environ["DV_URL"]
appSettings = requests.get(f'{baseUrl}/clients/{clientId}/applications/{appId}', headers={"Authorization":f"Bearer {token}"}).json()
appSettings

JSONDecodeError: [Errno Expecting value] Unauthorized: 0

## Load users

In [3]:
userIds = requests.get(f'{baseUrl}/clients/{clientId}/applications/{appId}/activeUsers', headers={"Authorization":f"Bearer {token}"}).json()
print(len(userIds))
userId = userIds[0]    
print(userId)

0


IndexError: list index out of range

## Load user data as rdf

In [18]:
from rdflib import Graph as RDFGraph
raw_data = requests.get(f'{baseUrl}/clients/{clientId}/applications/{appId}/activeUsers/{userId}/data', headers={"Authorization":f"Bearer {token}"})
rdf_data = RDFGraph()
rdf_data.parse(data=raw_data.text, format="turtle")


<Graph identifier=N2522ed13f0164747b676749565fe32f6 (<class 'rdflib.graph.Graph'>)>

## Extract Track Listens from rdf (using Sparql) and aggregate by artist, album (using Pandas)

In [34]:
import pandas as pd
listens = rdf_data.query(
        """
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX sdo: <https://schema.org/>        
        SELECT DISTINCT ?action ?object ?action_time ?object_name ?object_type ?object_provider ?album ?album_name ?artist ?artist_name        
           WHERE {
              ?action sdo:object ?object .
              ?action sdo:additionalType sdo:ListenAction .
              OPTIONAL { ?action sdo:startTime ?action_time . }
              OPTIONAL { ?object sdo:name ?object_name . }
              OPTIONAL { ?object sdo:additionalType ?object_type . }
              OPTIONAL { ?object sdo:provider ?object_provider . }
              OPTIONAL { ?object sdo:inAlbum ?album. ?album sdo:name ?album_name} .
              OPTIONAL { ?object sdo:byArtist ?artist. ?artist sdo:name ?artist_name} .              
           }"""
    )
listens = [ {k: v.toPython() for k, v in r.asdict().items()} for r in listens ]
listens = pd.DataFrame(listens)
listens[:2]

Unnamed: 0,action,object,action_time,object_name,object_provider,album,album_name,artist,artist_name
0,https://api.spotify.com/v1/me/history/0qprlw0jfsW4H9cG0FFE0Z,https://open.spotify.com/track/0qprlw0jfsW4H9cG0FFE0Z,2021-12-26 15:21:28.655000+00:00,Cold Little Heart,https://www.spotify.com,https://open.spotify.com/album/0qxsfpy2VU0i4eDR9RTaAU,Love & Hate,https://open.spotify.com/artist/0bzfPKdbXL5ezYW2z3UGQj,Michael Kiwanuka
1,https://api.spotify.com/v1/me/history/0qprlw0jfsW4H9cG0FFE0Z,https://open.spotify.com/track/0qprlw0jfsW4H9cG0FFE0Z,2021-12-25 21:19:04.180000+00:00,Cold Little Heart,https://www.spotify.com,https://open.spotify.com/album/0qxsfpy2VU0i4eDR9RTaAU,Love & Hate,https://open.spotify.com/artist/0bzfPKdbXL5ezYW2z3UGQj,Michael Kiwanuka


In [57]:
listens["count"] = 1
top_artists = listens.groupby(["artist"]).aggregate({"artist_name":"first", "count":"sum", "action_time":"max"}).reset_index().sort_values(["count","action_time"], ascending=False)
top_artists = top_artists[:10].drop("action_time", axis=1).reset_index(drop=True)
top_artists

Unnamed: 0,artist,artist_name,count
0,https://open.spotify.com/artist/0bzfPKdbXL5ezYW2z3UGQj,Michael Kiwanuka,11
1,https://open.spotify.com/artist/1Dt1UKLtrJIW1xxRBejjos,The Blaze,6
2,https://open.spotify.com/artist/1W8rzmv9lhOEkwtRMAzZtw,Andrew Britton,2
3,https://open.spotify.com/artist/2TJHmhbmT7L3gw2NKyDTHh,Manu Pilas,2
4,https://open.spotify.com/artist/7adu8fQmtoTq8O8BXQIq9L,Smith & Burrows,2
5,https://open.spotify.com/artist/6YFu93nQbZx5Nrt6Qjfj90,Henry Lindon,1
6,https://open.spotify.com/artist/5Wj6zFHgENkOCOMh4ZwGX0,Burt Mitchell,1
7,https://open.spotify.com/artist/0sn0tQQs3lIlLSgHgidapX,Hector Milo Trio,1
8,https://open.spotify.com/artist/52Sd0A8IsB2XLpEUIvs6En,Alex Decante,1
9,https://open.spotify.com/artist/4Na5KNSDTg543P1FMbaAMk,Mandevilla,1


In [58]:
print(f"Top artists for user {userId} are:")
print(top_artists.to_string())

Top artists for user 38804928-cdc0-4a6b-b3f0-f3ce411c58cb are:
                                                   artist       artist_name  count
0  https://open.spotify.com/artist/0bzfPKdbXL5ezYW2z3UGQj  Michael Kiwanuka     11
1  https://open.spotify.com/artist/1Dt1UKLtrJIW1xxRBejjos         The Blaze      6
2  https://open.spotify.com/artist/1W8rzmv9lhOEkwtRMAzZtw    Andrew Britton      2
3  https://open.spotify.com/artist/2TJHmhbmT7L3gw2NKyDTHh        Manu Pilas      2
4  https://open.spotify.com/artist/7adu8fQmtoTq8O8BXQIq9L   Smith & Burrows      2
5  https://open.spotify.com/artist/6YFu93nQbZx5Nrt6Qjfj90      Henry Lindon      1
6  https://open.spotify.com/artist/5Wj6zFHgENkOCOMh4ZwGX0     Burt Mitchell      1
7  https://open.spotify.com/artist/0sn0tQQs3lIlLSgHgidapX  Hector Milo Trio      1
8  https://open.spotify.com/artist/52Sd0A8IsB2XLpEUIvs6En      Alex Decante      1
9  https://open.spotify.com/artist/4Na5KNSDTg543P1FMbaAMk        Mandevilla      1


## demonstrate sandboxing

In [72]:
try:
    r = requests.get("https://google.com")
    print("was able to reach outside", r.status_code)
except requests.exceptions.RequestException as e:
    print("connection faillure error",e)
except Exception as e:
    print("other error", e)

was able to reach outside 200
