In [1]:
#import libraries and secrets
import pandas as pd
import numpy as np
import psycopg2
import sys
from sqlalchemy import create_engine
import os
import subprocess
import datetime as dt
from bs4 import BeautifulSoup
import requests
import lxml.html as lh
import xgboost as xgb
import lightgbm as lgb
from datetime import datetime, timedelta
import pyarrow.parquet as pq
import boto3
import tweepy
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from dotenv import load_dotenv

env_path = './.env'
load_dotenv(dotenv_path=env_path)

postgres_user = os.environ['PSQL_USERNAME']
postgres_pw = os.environ['PSQL_PASSWORD']
postgres_url = 'reporting-db.expapp.com'

redshift_user = os.environ['REDSHIFT_USERNAME']
redshift_pw = os.environ['REDSHIFT_PASSWORD']
redshift_url = os.environ['REDSHIFT_URL']

t_consumer_key = os.environ['TWT_CONSUMER_KEY']
t_consumer_secret = os.environ['TWT_CONSUMER_SECRET']
t_access_token = os.environ['TWT_ACCESS_TOKEN_KEY']
t_access_token_secret = os.environ['TWT_ACCESS_TOKEN_SECRET']

def load_to_redshift(df_name,db_table_name,pq_str=""):
    
    parquet_nm='moveit_'+pq_str+'.parquet'
    df_name.to_parquet(parquet_nm, index=False)
    bashCommand = "aws s3 cp "+parquet_nm+" s3://exp-data-science/"+parquet_nm

    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    
    cur.execute("select count(*) from "+db_table_name)
    result = cur.fetchone()  
    print("Current "+db_table_name+" row count: "+str(result[0]))
    
    statement="""
    copy {} from 's3://exp-data-science/{}'
    iam_role 'arn:aws:iam::xxx:role/Redshift_Copy_Unload'
    FORMAT AS PARQUET;
    """.format(db_table_name,parquet_nm)
    cur.execute(statement)
    
    cur.execute("select count(*) from "+db_table_name)
    result = cur.fetchone()  
    print("New "+db_table_name+" row count: "+str(result[0]))
    
    # authentication of consumer key and secret 
auth = tweepy.OAuthHandler(t_consumer_key, t_consumer_secret) 
    
# authentication of access token and secret 
auth.set_access_token(t_access_token, t_access_token_secret) 
api = tweepy.API(auth) 

def tweet(txtout):
    TARGET_DAY_FORMAT=datetime.today().strftime("%c")
    # update the status 
    api.update_status("status | "+TARGET_DAY_FORMAT+" "+txtout) 



In [2]:
#this block makes the connection to postgres db (tableau/follower)
conn_string = "host='"+postgres_url+"' dbname='db' user='"+postgres_user+"' password='"+postgres_pw+"'"
conn = psycopg2.connect(conn_string)
cursor = conn.cursor()

In [3]:
#needed to make web requests
import requests

#store the data we get as a dataframe
import pandas as pd

#convert the response as a strcuctured json
import json

#mathematical operations on lists
import numpy as np

#parse the datetimes we get from NOAA
from datetime import datetime

from statistics import median 

#add the access token you got from NOAA
Token = 'xxx'


In [4]:
#bring in data file of zips in every county
zips = pd.io.parsers.read_csv('assets/ZIP_COUNTY_032020.csv', dtype={'ZIP': 'str'})
#zips.head()

In [5]:
#define zipcode foe the venue you need weather for
zip_in='54304'

#selects county(ies) from zip county file from zip
county=zips.loc[zips['ZIP']==zip_in]
select_county=county.iloc[0]['COUNTY']

In [6]:
#selects all zips in counties you selected above
select_zips=zips.loc[zips['COUNTY']==select_county]

In [7]:
#create api call pair for each zip
zip_list=select_zips.ZIP.unique()
zips_str='locationid=ZIP:'+'&locationid=ZIP:'.join(zip_list)
#print(zips_str)

In [8]:
#assemble api call
base='https://www.ncdc.noaa.gov/cdo-web/api/v2/data?'
#daily average data set
datasetid='datasetid=GHCND'
#datatypeid='datatypeid=TAVG'
#limit='limit=1000'
#stationid=zips_str
locationid=zips_str
#start date and end date of data pull
startdate='startdate=2020-05-29'
enddate='enddate=2020-05-29'

request=base+datasetid+'&'+locationid+'&'+startdate+'&'+enddate

#https://www.ncdc.noaa.gov/cdo-web/api/v2/data?datasetid=GHCND&locationid=ZIP:28801&startdate=2010-05-01&enddate=2010-05-01


In [9]:
#print(request)

In [10]:
#make the api call to NOAA
r = requests.get(request, headers={'token':Token})
#load the api response as a json
d = json.loads(r.text)
#get all items in the response which are precip readings
prcp = [item['value'] for item in d['results'] if item['datatype']=='PRCP']
#get all max/min/average temperature readings
#convert from tenths of centigrate to F
tavg = [((float(item['value'])/10.0)*1.8)+32 for item in d['results'] if item['datatype']=='TAVG']
tmin = [((float(item['value'])/10.0)*1.8)+32 for item in d['results'] if item['datatype']=='TMIN']
tmax = [((float(item['value'])/10.0)*1.8)+32 for item in d['results'] if item['datatype']=='TMAX']

#print data and median values from list
print(prcp)
print(median(prcp))

print(tmin)
print(median(tmin))

print(tmax)
print(median(tmax))

[295, 480, 439, 699, 307, 371, 318, 665, 358, 279, 284, 0]
338.0
[53.96, 53.06]
53.510000000000005
[71.06, 73.03999999999999]
72.05
