# Hayden Johnson and Quentin Bidwell

# Project 1.) First Cloud Function

### Description : Post a cloud function that takes in a string of numbers and returns a json file that contains the the sum of all of the single digit numbers

#### Example : input ="12345"
#### output = 1+2+3+4+5 = 15
#### returns({"answer":15})

In [1]:
import requests
import json

In [101]:
url = 'https://us-west2-annreturn.cloudfunctions.net/firstCloudFunction'

In [None]:
def firstCloudFunction(nums):
  #nums = nums.get_data().decode()
  lst = []
  for num in nums:
    lst.append(int(num))
  lstSum = sum(lst)
  answer = {'total' : lstSum}
  return answer

In [None]:
firstCloudFunction('8888')

{'total': 32}

In [99]:
packages = """requests            2.27.1
session_info        1.0.0"""
print(packages.replace(" ",""))

requests2.27.1
session_info1.0.0


In [108]:
r = requests.post(url, "12345")
json_data = r.json()
total = json_data["total"]

print(total)

15


## 1.b.) Query your cloud function using requests for example input "012937", "2" and "9999999999999"

In [208]:
def queryAdd(inputs):
    for input_data in inputs:
      url = 'https://us-west2-annreturn.cloudfunctions.net/firstCloudFunction'
      response = requests.post(url, data=input_data)
      json_data = response.json()
      total = json_data["total"]
      print(f"Input: {input_data}, Total: {total}")

inputs = ["012937", "2", "9999999999999"]

queryAdd(inputs)


Input: 012937, Total: 22
Input: 2, Total: 2
Input: 9999999999999, Total: 117


# Project 2.) Automated Webscraping

### Description : Find a website that is scrapable with Beautiful soup that updates with some frequency. Build a cloud function to programatically scrape the useful content

In [2]:
from bs4 import BeautifulSoup
import pandas as pd
from google.cloud import storage
import os
from io import StringIO
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'annreturn-f1d9154873aa.json'


In [8]:
# We talk about user agents later so run and ignore
user_agent_list = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246" 
,"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36 "
,"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9 "
,"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1" 
,"Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36"] 

headers = {
    'User-Agent': user_agent_list[2]}

In [15]:
def golfScraper():
    u = 'https://www.espn.com/golf/leaderboard'
    headers = {
        'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9'}
    page = requests.get(u, headers = headers)
    soup = BeautifulSoup(page.content, "html.parser")
    
    golfers = []
    for a in soup.findAll('a', class_='AnchorLink leaderboard_player_name'):
      golfers.append(a.get_text(strip=True))

    winnings = []
    for a in soup.findAll('td', class_='Table__TD'):
      if '$' in a.get_text():
        winnings.append(a.get_text(strip=True))
    
    max_length = max(len(golfers), len(winnings))
    winnings += ['-'] * (max_length - len(winnings))
    
    data = {'Golfers': golfers, 'Winnings': winnings}
    df = pd.DataFrame(data)
    df.index.name = 'Position'
    df.index += 1
    return(df)

In [210]:
golfScraper()

Unnamed: 0_level_0,Golfers,Winnings
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Danny Willett,-
2,Peter Malnati,-
3,Robby Shelton,-
4,Davis Thompson,-
5,Beau Hossler,-
...,...,...
116,Kazuki Higa,-
117,Thriston Lawrence,-
118,Nicolai Højgaard,-
119,Aldrich Potgieter (a),-


## 2.b.) Query your stored files

In [8]:
urlActual = 'https://us-west2-annreturn.cloudfunctions.net/webScrapeGolf'

In [9]:
client = storage.Client()

In [57]:
bucketName = 'webscrapingolf'
bucket = client.get_bucket(bucketName)

In [58]:
df = pd.DataFrame()
df.to_csv('golfScraped.csv')

In [59]:
#File path on cloud
blob = bucket.blob('webscraped/golf.csv')

In [32]:
#File path on local computer
blob.upload_from_filename('golfScraped.csv')

In [10]:
#Take the data that is already on the cloud
def download_data():
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'annreturn-f1d9154873aa.json'
    
    client = storage.Client()
    bucketName = 'webscrapingolf'
    bucket = client.get_bucket(bucketName)
    
    blob = bucket.blob('webscraped/golf.csv')
    
    csv_data = blob.download_as_text()
    
    df = pd.read_csv(StringIO(csv_data))
    
    return df

In [11]:
download_data()

Unnamed: 0,Golfers,Winnings
0,Danny Willett,-
1,Peter Malnati,-
2,Robby Shelton,-
3,Davis Thompson,-
4,Beau Hossler,-
...,...,...
115,Kazuki Higa,-
116,Thriston Lawrence,-
117,Nicolai Højgaard,-
118,Aldrich Potgieter (a),-


In [13]:
#Post a new csv on the cloud without storing locally
def post_data(request):
    
    df = golfScraper()
    
    csv_data = df.to_csv(index = False)
    
    client = storage.Client()
    bucketName = 'webscrapingolf'
    bucket = client.get_bucket(bucketName)
    
    blob = bucket.blob('webscraped/golf.csv')
    blob.upload_from_string(csv_data)
    
    return ({'status': 200,
            'length_data': len(df)})

In [16]:
post_data("")

{'status': 200, 'length_data': 120}

The length of the data does not increase. I could make it increase, but for the purposes of my data it should not. The golf leaderboard will be updated every Monday morning, so you can see the placements of all players for each tournament that weekend.

In [214]:
import session_info## 2.b.) Query your stored files
session_info.show()

## 2.c.) State how this could be useful in a business setting

You can track who won each tournament over the weekend on the PGA tour. This allows you to keep track of who the best golfers are and which ones are struggling based off their purse. You could then strategically make an advertisement offer to struggling golfers and get the advertisement relatively cheap since those golfers would be easier to swoon. The golfers winning a lot would need a stronger offer, but you know your advertisement would be seen on camera, which could be worth more than a cheap advertisement.

# Project 3.) 

### Description : Build some machine learning model using scikit learn and make it queriable using cloud functions

In [3]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from urllib.parse import parse_qs
import joblib
from io import BytesIO 

In [152]:
gender = pd.read_csv("gender_classification_v7.csv")
gender.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender
0,1,11.8,6.1,1,0,1,1,Male
1,0,14.0,5.4,0,0,1,0,Female
2,0,11.8,6.3,1,1,1,1,Male
3,0,14.4,6.1,0,1,1,1,Male
4,1,13.5,5.9,0,0,0,0,Female


In [153]:
X = gender[["long_hair","forehead_width_cm","forehead_height_cm",
                        "nose_wide", "nose_long", "lips_thin", "distance_nose_to_lip_long"]]

In [154]:
gender['male'] = gender['gender'].apply(lambda x: 1 if x == 'Male' else 0)
gender.head()

Unnamed: 0,long_hair,forehead_width_cm,forehead_height_cm,nose_wide,nose_long,lips_thin,distance_nose_to_lip_long,gender,male
0,1,11.8,6.1,1,0,1,1,Male,1
1,0,14.0,5.4,0,0,1,0,Female,0
2,0,11.8,6.3,1,1,1,1,Male,1
3,0,14.4,6.1,0,1,1,1,Male,1
4,1,13.5,5.9,0,0,0,0,Female,0


In [80]:
y = gender[['male']]

In [81]:

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000, random_state=42)


clf.fit(X_train,y_train)

#Locally Save ML Model

  y = column_or_1d(y, warn=True)


MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000, random_state=42)

In [83]:
joblib.dump(clf, "NN.sav") 

joblib.dump(scaler, "PP.sav")

['PP.sav']

In [84]:
bucket = client.get_bucket("webscrapingolf")
blob = bucket.blob("gender/nueral_net.sav")
blob.upload_from_filename("NN.sav")

bucket = client.get_bucket("webscrapingolf")
blob = bucket.blob("gender/preprocess.sav")
blob.upload_from_filename("PP.sav")

In [155]:
def load_scikit_model(file_name):
    bucket_name = "webscrapingolf"
    source_blob = "gender/" + file_name
    
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "annreturn-f1d9154873aa.json"
    client = storage.Client()
    
    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob(source_blob)
    
    model_data = blob.download_as_bytes()
    
    model = joblib.load(BytesIO(model_data))
    return(model)


In [None]:
model = load_scikit_model("nueral_net.sav")
preproc = load_scikit_model("preprocess.sav")

In [200]:
def gender_request(request):
    try:
        model = load_scikit_model("nueral_net.sav")
        preproc = load_scikit_model("preprocess.sav")
        #print(request.get_data().decode())
        ### Format request into these parameters

        query_string = request.get_data().decode()
        dictionary = parse_qs(query_string)
        dictionary = {k: int(v[0]) if v[0].isdigit() else float(v[0]) for k, v in dictionary.items()}
        for key, value in dictionary.items():
            globals()[key] = value


        X = preproc.transform([[long_hair,forehead_width_cm,forehead_height_cm,
                        nose_wide, nose_long, lips_thin, distance_nose_to_lip_long]])

        prediction = model.predict(X)[0]
        print(prediction)
        if prediction == 1:
            gender = 'male'
        else:
            gender = 'female'
        probability = str(round(model.predict_proba(X)[0][1]*100,2)) + "%"

        return({"status" : 200,
               "prediction" : gender,
               "prob_of_male" : probability})
    except Exception as e:
        return({"status" : 501, "error": str(e)})

In [180]:
urlSex = 'https://us-west2-annreturn.cloudfunctions.net/indetify_gender'

In [181]:
dic = {"long_hair": 1, "forehead_width_cm" : 13.5, 
       "forehead_height_cm" : 5.9, "nose_wide" : 0, 
       "nose_long" : 0, "lips_thin" : 0, 
       "distance_nose_to_lip_long" : 0}

In [216]:
r = requests.post(urlSex, dic)
print(r.text)
#r.json()
#total = json_data["total"]

#print(total)

{"Prediction":"female","Probability":"0.07%","status":200}



## 3.b.) Make a user-friendly input page that takes the inputs to your ML model and displays the output. Post to a sharable webpage. Link below

In [4]:
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets

In [5]:
def gender_request_widget(btn):
    long_hair = long_hair_input.value
    forehead_width_cm = forehead_width_cm_input.value
    forehead_height_cm = forehead_height_cm_input.value
    nose_wide = nose_wide_input.value
    nose_long = nose_long_input.value
    lips_thin = lips_thin_input.value
    distance_nose_to_lip_long = distance_nose_to_lip_long_input.value

    dic = {
        "long_hair": long_hair,
        "forehead_width_cm": forehead_width_cm,
        "forehead_height_cm": forehead_height_cm,
        "nose_wide": nose_wide,
        "nose_long": nose_long,
        "lips_thin": lips_thin,
        "distance_nose_to_lip_long": distance_nose_to_lip_long
    }

    urlSex = 'https://us-west2-annreturn.cloudfunctions.net/indetify_gender'
    r = requests.post(urlSex, dic)
    print(r.text)

In [7]:
long_hair_input = widgets.IntText(description="Long Hair:")
forehead_width_cm_input = widgets.FloatText(description="Forehead Width: ")
forehead_height_cm_input = widgets.FloatText(description="Forehead Height: ")
nose_wide_input = widgets.IntText(description="Nose Wide:")
nose_long_input = widgets.IntText(description="Nose Long:")
lips_thin_input = widgets.IntText(description="Lips Thin:")
distance_nose_to_lip_long_input = widgets.IntText(description="Distance Nose:")
submit_button = widgets.Button(description="Submit")

display(long_hair_input)
display(forehead_width_cm_input)
display(forehead_height_cm_input)
display(nose_wide_input)
display(nose_long_input)
display(lips_thin_input)
display(distance_nose_to_lip_long_input)
display(submit_button)

submit_button.on_click(gender_request_widget)

IntText(value=0, description='Long Hair:')

FloatText(value=0.0, description='Forehead Width: ')

FloatText(value=0.0, description='Forehead Height: ')

IntText(value=0, description='Nose Wide:')

IntText(value=0, description='Nose Long:')

IntText(value=0, description='Lips Thin:')

IntText(value=0, description='Distance Nose:')

Button(description='Submit', style=ButtonStyle())

{"Prediction":"male","Probability":"99.65%","status":200}

{"Prediction":"male","Probability":"99.65%","status":200}

{"Prediction":"male","Probability":"99.89%","status":200}



## 3.c.) Think of a company that would use the ML app you just built. What employees could use this app what would they use it for? Write a short paragraph.

They could use my ML app in order to identify a person who is robbing a convenient store. If they want to be sure of who they are tracking, they could have the proper sex. Or, if someone enters a store, you could target them with ads or promotions based on their sex. A computer could record the person who enters the store, tracks the person and what they check out, and then send them promotions for what they have bought and other items that may pertain to their sex.