In [3]:
%load_ext autoreload
%autoreload 2

In [21]:
from pathlib import Path
import os
import sys
import datetime as dt
import pandas as pd
import numpy as np
from google.cloud import storage
import json
from dotenv import load_dotenv
load_dotenv()
import tempfile

from tensorflow import keras as tfk
import tensorflow_hub as hub

In [5]:
dag_path = Path('/Users', 'calebcastleberry', 'Documents', 'GitHubRepos')
sys.path.append(dag_path.as_posix())

In [6]:
from airflow_dags.services.reddit_analysis.dags import sub_overview_node as son
from airflow_dags.services.reddit_analysis.dags import post_detail_node as pdn

In [7]:
try:
    from airflow.models import Variable
    reddit_client_id = Variable.get(
        'REDDIT_CLIENT_ID', default_var=os.environ.get('REDDIT_CLIENT_ID'))
    reddit_client_secret = Variable.get(
        'REDDIT_CLIENT_SECRET', default_var=os.environ.get('REDDIT_CLIENT_SECRET'))
    reddit_user_agent = Variable.get(
        'REDDIT_USER_AGENT', default_var=os.environ.get('REDDIT_USER_AGENT'))
    google_storage_bucket_name = Variable.get(
        'GOOGLE_STORAGE_BUCKET_NAME',
        default_var=os.environ.get('GOOGLE_STORAGE_BUCKET_NAME')
    )
except:
    reddit_client_id = os.environ.get('REDDIT_CLIENT_ID')
    reddit_client_secret = os.environ.get('REDDIT_CLIENT_SECRET')
    reddit_user_agent = os.environ.get('REDDIT_USER_AGENT')
    google_storage_bucket_name = os.environ.get('GOOGLE_STORAGE_BUCKET_NAME')

In [8]:
subreddit = 'destinythegame'
date = dt.date(year=2020, month=1, day=23)

In [9]:
blob_path = Path(
    'reddit_analysis',
    'comments',
    '7mys8m_comments.json'
).as_posix()
client = storage.Client()
bucket = client.bucket(google_storage_bucket_name)
json_blob = bucket.blob(blob_path)

In [10]:
comments = json.loads(json_blob.download_as_string())

In [11]:
df = pd.DataFrame.from_records(comments)

In [12]:
df.shape

(29, 9)

In [13]:
df.head(10)

Unnamed: 0,id,parent_prefix,parent_id,parent,body,score,level,post_id,created_ts
0,drxp6xb,t3,7mys8m,t3_7mys8m,It's just a box (!),65,1,7mys8m,1514599000.0
1,drxqbv0,t3,7mys8m,t3_7mys8m,!,50,1,7mys8m,1514601000.0
2,dry854l,t3,7mys8m,t3_7mys8m,"Kept you waiting, huh?",11,1,7mys8m,1514635000.0
3,dry4pj2,t3,7mys8m,t3_7mys8m,Solid post.,26,1,7mys8m,1514625000.0
4,dryb97i,t3,7mys8m,t3_7mys8m,An MTX to surpass all other MTX.,9,1,7mys8m,1514643000.0
5,dry98q5,t3,7mys8m,t3_7mys8m,New Dexterity skill gem for the next expansion...,10,1,7mys8m,1514638000.0
6,dry8aa1,t3,7mys8m,t3_7mys8m,You're that ninja..,5,1,7mys8m,1514636000.0
7,drynyd1,t3,7mys8m,t3_7mys8m,So this is what Cipher has been working on in ...,2,1,7mys8m,1514660000.0
8,drxyemf,t3,7mys8m,t3_7mys8m,Quality MGSpost.,2,1,7mys8m,1514612000.0
9,dry6n50,t3,7mys8m,t3_7mys8m,Lilly in the Box,2,1,7mys8m,1514631000.0


In [30]:
sample_text = df['body'].loc[0]
sample_array = np.array(sample_text).reshape(-1)
sample_array.shape

(1,)

In [15]:
model_path = '/Users/calebcastleberry/Documents/GitHubRepos/airflow_dags/services/reddit_analysis/models/keras_large_bert.h5'

In [16]:
model = tfk.models.load_model(model_path, custom_objects={'KerasLayer':hub.KerasLayer})

In [32]:
pred = model.predict(sample_array)
pred

array([[3.7293121e-02, 7.5271958e-04, 1.2652708e-02, 7.6698583e-05,
        1.0714167e-02, 2.1729067e-03]], dtype=float32)

In [35]:
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
for i in range(6):
    label = label_cols[i]
    print('---------------------')
    print(f'{label}:')
    print(f'Score: {pred[0][i]:.02f}')
    print('---------------------')

---------------------
toxic:
Score: 0.04
---------------------
---------------------
severe_toxic:
Score: 0.00
---------------------
---------------------
obscene:
Score: 0.01
---------------------
---------------------
threat:
Score: 0.00
---------------------
---------------------
insult:
Score: 0.01
---------------------
---------------------
identity_hate:
Score: 0.00
---------------------


In [36]:
round(.345, 2)

0.34