In [103]:
from transformers import pipeline
import tensorflow as tf
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2

In [100]:
# Allocate a pipeline for sentiment-analysis
classifier = pipeline('sentiment-analysis')
# Use the GPU
classifier.device = 0
classifier.ensure_tensor_on_device()
classifier.model

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertModel: ['pre_classifier', 'dropout_19', 'classifier']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.
Some layers from the model checkpoint at distilbert-b

<transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertForSequenceClassification at 0x7f1f0d789b50>

# SQL

In [101]:
# SQLAlchemy engine
engine = create_engine(
    "postgresql+psycopg2://postgres:password@10.0.0.26:5432/twitter"
)

In [102]:
# psycopg2 connection
conn = psycopg2.connect("dbname='twitter' user='postgres' host='10.0.0.26' port = '5432' password='password'")

In [95]:
sql = '''UPDATE tweets t
    SET sentiment = scores.score
    FROM scores
    WHERE scores.tweet_id = t.tweet_id;'''

# Score Tweets

In [97]:
cur = conn.cursor()
cur.execute("""SELECT tweet_id, text from tweets_transform;""")
tweets = pd.DataFrame(cur.fetchall())
scores = classifier(list(tweets[1]))
tweets[2] = [5 if x['score'] < .95 else 10 if x['label'] == 'POSITIVE' else 0 for x in scores]
tweets = tweets.drop(1, axis = 1)
tweets.columns = ['tweet_id', 'score']

# Update Database

In [97]:
tweets.to_sql(con=engine, name='scores', if_exists='replace', index=False)
cur = conn.cursor()
cur.execute(sql)
cur.execute('commit')

In [1]:

df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})
df.head()

Unnamed: 0,fips,unemp
0,1001,5.3
1,1003,5.4
2,1005,8.6
3,1007,6.6
4,1009,5.5


In [97]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})

import plotly.express as px

fig = px.choropleth_mapbox(df, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'unemp':'unemployment rate'}
                          )

In [98]:
with open('stuff.html', 'w') as writer:
        writer.write(fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}).to_html())

In [105]:
url = "https://www.dropbox.com/s/15gisj8hx218rn1/street-pole-sample.csv?dl=1"
df = pd.read_csv(url)
df.head()

Unnamed: 0.1,Unnamed: 0,X,Y,OBJECTID,OID,POLE_NUM,TYPE,NLUMIN,LUM_SIZE,HEIGHT,POLE_DATE,UP_DATE,OWNER,TAP_ID,BLOCK,PLATE
0,423,-75.170097,39.942766,41341,0,214423,WP,,,,1997-06-09T00:00:00.000Z,1997-06-09T00:00:00.000Z,PECO,0.0,,
1,85,-75.166112,39.941477,42523,0,215645,AAPT,2.0,250.0,25.0,1997-06-10T00:00:00.000Z,1997-06-10T00:00:00.000Z,Streets,214622.0,,
2,121,-75.163483,39.943068,42730,0,215926,WP,,,,1997-06-04T00:00:00.000Z,1997-06-04T00:00:00.000Z,PECO,0.0,,
3,478,-75.167727,39.944528,41409,0,214515,WP,1.0,100.0,25.0,1997-06-09T00:00:00.000Z,1997-06-09T00:00:00.000Z,Streets,214515.0,,
4,392,-75.171863,39.941486,41298,0,214380,WP,1.0,100.0,25.0,1997-06-09T00:00:00.000Z,1997-06-09T00:00:00.000Z,Streets,214380.0,,


In [114]:
df['TEXT'] = 'hi'

In [118]:
fig = px.scatter_mapbox(df, lat="Y", lon="X", hover_data=["TEXT"], mapbox_style="carto-positron", zoom=15)
with open('stuff2.html', 'w') as writer:
        writer.write(fig.to_html())

# Read Flowfile

In [30]:
import plotly.express as px

In [31]:
df = pd.read_csv("39eceec6-8cc9-4052-b5b3-4679264e458b")

In [32]:
df.head()

Unnamed: 0,coord,full_name,created_at,text
0,"[[[-73.911271,40.900789],[-73.911271,40.988346...","Yonkers, NY",Fri Mar 12 17:50:59 +0000 2021,@Aescano https://t.co/MZpIU80fWR
1,"[[[-82.21201,33.338854],[-82.21201,33.542581],...","Augusta, GA",Fri Mar 12 17:50:59 +0000 2021,"@angelicjuju I got mines done on wrightsboro ,..."
2,"[[[-85.605166,30.355644],[-85.605166,35.000771...","Georgia, USA",Fri Mar 12 17:51:00 +0000 2021,@MacBell1919 Even better lol
3,"[[[-96.014224,41.089139],[-96.014224,41.191076...","Bellevue, NE",Fri Mar 12 17:51:00 +0000 2021,@KatelynDynneson Especially that 28 year old f...
4,"[[[-96.977527,32.620678],[-96.977527,33.019039...","Dallas, TX",Fri Mar 12 17:51:00 +0000 2021,@TheNoiz_ 😭 you only lose when you sell when y...


In [33]:
df['lat'] = df['coord'].apply(lambda x: eval(x)[0][0][1])
df['lon'] = df['coord'].apply(lambda x: eval(x)[0][0][0])

In [46]:
fig = px.scatter_mapbox(df, lat="lat", lon="lon",  hover_data=["text", "full_name"], mapbox_style="carto-positron", zoom = 3.5)
with open('stuff2.html', 'w') as writer:
        writer.write(fig.to_html())

In [47]:
df.columns

Index(['coord', 'full_name', 'created_at', 'text', 'lat', 'lon'], dtype='object')